Merge branch 'release' of github.com:ALRhub/fancy_gym

This commit is contained in:
Dominik Moritz Roth 2024-03-20 10:29:25 +01:00
commit 319578da15
42 changed files with 523 additions and 516 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 28ec069496fc0ad05c8b9641549626a6
config: 36919d67c12a677d3f16f60d980b0313
tags: 645f666f9bcd5a90fca523b33c5a78b7

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.envs.registry &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.envs.registry &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="../../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Overview: module code &mdash; Fancy Gym 0.2 documentation</title>
<title>Overview: module code &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
.. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
Installation from master
~~~~~~~~~~~~~~~~~~~~~~~~
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
.. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld

View File

@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
VERSION: '0.2',
VERSION: '0.3.0',
LANGUAGE: 'en',
COLLAPSE_INDEX: false,
BUILDER: 'html',

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>API &mdash; Fancy Gym 0.2 documentation</title>
<title>API &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DeepMind Control (DMC) &mdash; Fancy Gym 0.2 documentation</title>
<title>DeepMind Control (DMC) &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AirHockey &mdash; Fancy Gym 0.2 documentation</title>
<title>AirHockey &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Classic Control &mdash; Fancy Gym 0.2 documentation</title>
<title>Classic Control &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Fancy &mdash; Fancy Gym 0.2 documentation</title>
<title>Fancy &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Mujoco &mdash; Fancy Gym 0.2 documentation</title>
<title>Mujoco &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Metaworld &mdash; Fancy Gym 0.2 documentation</title>
<title>Metaworld &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Gymnasium &mdash; Fancy Gym 0.2 documentation</title>
<title>Gymnasium &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DeepMind Control Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>DeepMind Control Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -126,7 +126,7 @@
<span class="linenos"> 17</span><span class="sd"> Returns:</span>
<span class="linenos"> 18</span>
<span class="linenos"> 19</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 21</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 22</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 23</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -135,7 +135,7 @@
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 27</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 30</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 31</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 32</span>
@ -193,58 +193,68 @@
<span class="linenos"> 84</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;rbf&#39;,</span>
<span class="linenos"> 85</span> <span class="c1"># &#39;num_basis&#39;: 5</span>
<span class="linenos"> 86</span> <span class="c1"># }</span>
<span class="linenos"> 87</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 88</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 89</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 90</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 91</span>
<span class="linenos"> 92</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 93</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 94</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 95</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 96</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 97</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 98</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos"> 99</span>
<span class="linenos">100</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">101</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">102</span>
<span class="linenos">103</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">104</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">105</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">106</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">108</span>
<span class="linenos">109</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">110</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">111</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">112</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">113</span>
<span class="linenos">114</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">115</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">116</span>
<span class="linenos"> 87</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 88</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 89</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 90</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 91</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 92</span>
<span class="linenos"> 93</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 94</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 95</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 96</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 97</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 98</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 99</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">100</span>
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">103</span>
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">109</span>
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">114</span>
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">117</span>
<span class="linenos">118</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">119</span> <span class="c1"># Disclaimer: DMC environments require the seed to be specified in the beginning.</span>
<span class="linenos">120</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.</span>
<span class="linenos">121</span>
<span class="linenos">122</span> <span class="c1"># For rendering DMC</span>
<span class="linenos">123</span> <span class="c1"># export MUJOCO_GL=&quot;osmesa&quot;</span>
<span class="linenos">124</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
<span class="linenos">125</span>
<span class="linenos">126</span> <span class="c1"># # Standard DMC Suite tasks</span>
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">128</span> <span class="c1">#</span>
<span class="linenos">129</span> <span class="c1"># # Manipulation tasks</span>
<span class="linenos">130</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
<span class="linenos">131</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/manipulation-reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span> <span class="c1">#</span>
<span class="linenos">133</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">135</span>
<span class="linenos">136</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">137</span> <span class="c1"># already registered above</span>
<span class="linenos">138</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">118</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="linenos">119</span> <span class="c1"># # Standard DMC Suite tasks</span>
<span class="linenos">120</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">121</span> <span class="c1">#</span>
<span class="linenos">122</span> <span class="c1"># # Manipulation tasks</span>
<span class="linenos">123</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
<span class="linenos">124</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">125</span> <span class="c1">#</span>
<span class="linenos">126</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">128</span>
<span class="linenos">129</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">130</span> <span class="c1"># already registered above</span>
<span class="linenos">131</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span>
<span class="linenos">133</span> <span class="c1"># # Standard DMC Suite tasks</span>
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">135</span> <span class="c1">#</span>
<span class="linenos">136</span> <span class="c1"># # Manipulation tasks</span>
<span class="linenos">137</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
<span class="linenos">138</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">139</span> <span class="c1">#</span>
<span class="linenos">140</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">141</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">142</span>
<span class="linenos">143</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">144</span> <span class="c1"># already registered above</span>
<span class="linenos">145</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">146</span>
<span class="linenos">147</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">148</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>General Usage Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>General Usage Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -130,7 +130,7 @@
<span class="linenos"> 21</span>
<span class="linenos"> 22</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 23</span>
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 25</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 26</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 27</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Observation shape: &quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -194,21 +194,21 @@
<span class="linenos"> 85</span> <span class="c1"># do not return values above threshold</span>
<span class="linenos"> 86</span> <span class="k">return</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">v</span><span class="p">)[:</span><span class="n">n_samples</span><span class="p">],</span> <span class="n">buffer</span><span class="o">.</span><span class="n">values</span><span class="p">()),</span>
<span class="linenos"> 87</span>
<span class="linenos"> 88</span>
<span class="linenos"> 89</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos"> 90</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
<span class="linenos"> 88</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="linenos"> 89</span> <span class="c1"># Basic gym task</span>
<span class="linenos"> 90</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;Pendulum-v1&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 91</span>
<span class="linenos"> 92</span> <span class="c1"># Basic gym task</span>
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;Pendulum-v1&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 92</span> <span class="c1"># Mujoco task from framework</span>
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;fancy/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 94</span>
<span class="linenos"> 95</span> <span class="c1"># Mujoco task from framework</span>
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;fancy/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 95</span> <span class="c1"># # OpenAI Mujoco task</span>
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;HalfCheetah-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 97</span>
<span class="linenos"> 98</span> <span class="c1"># # OpenAI Mujoco task</span>
<span class="linenos"> 99</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;HalfCheetah-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 98</span> <span class="c1"># Vectorized multiprocessing environments</span>
<span class="linenos"> 99</span> <span class="c1"># example_async(env_id=&quot;HoleReacher-v0&quot;, n_cpu=2, seed=int(&#39;533D&#39;, 16), n_samples=2 * 200)</span>
<span class="linenos">100</span>
<span class="linenos">101</span> <span class="c1"># Vectorized multiprocessing environments</span>
<span class="linenos">102</span> <span class="c1"># example_async(env_id=&quot;HoleReacher-v0&quot;, n_cpu=2, seed=int(&#39;533D&#39;, 16), n_samples=2 * 200)</span>
<span class="linenos">101</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">102</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Metaworld Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>Metaworld Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -111,7 +111,7 @@
<span class="linenos"> 2</span><span class="kn">import</span> <span class="nn">fancy_gym</span>
<span class="linenos"> 3</span>
<span class="linenos"> 4</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">&quot;fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 6</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 7</span><span class="sd"> Example for running a MetaWorld based env in the step based setting.</span>
<span class="linenos"> 8</span><span class="sd"> The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always</span>
@ -127,7 +127,7 @@
<span class="linenos"> 18</span><span class="sd"> Returns:</span>
<span class="linenos"> 19</span>
<span class="linenos"> 20</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 22</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 23</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 24</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -136,111 +136,104 @@
<span class="linenos"> 27</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 28</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 29</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 30</span> <span class="c1"># THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM</span>
<span class="linenos"> 31</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
<span class="linenos"> 32</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="linenos"> 33</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 34</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 35</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 36</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos"> 37</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 38</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 39</span>
<span class="linenos"> 40</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 41</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos"> 42</span>
<span class="linenos"> 43</span>
<span class="linenos"> 44</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 45</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 46</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 47</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 48</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 49</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
<span class="linenos"> 50</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
<span class="linenos"> 51</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 52</span><span class="sd"> Args:</span>
<span class="linenos"> 53</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 54</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 55</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 30</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 31</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 32</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 33</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 34</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos"> 35</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 36</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
<span class="linenos"> 37</span>
<span class="linenos"> 38</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 39</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos"> 40</span>
<span class="linenos"> 41</span>
<span class="linenos"> 42</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 43</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 44</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 45</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 46</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 47</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
<span class="linenos"> 48</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
<span class="linenos"> 49</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 50</span><span class="sd"> Args:</span>
<span class="linenos"> 51</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 52</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 53</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 54</span>
<span class="linenos"> 55</span><span class="sd"> Returns:</span>
<span class="linenos"> 56</span>
<span class="linenos"> 57</span><span class="sd"> Returns:</span>
<span class="linenos"> 57</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 58</span>
<span class="linenos"> 59</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 60</span>
<span class="linenos"> 61</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
<span class="linenos"> 62</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;metaworld/button-press-v2&quot;</span>
<span class="linenos"> 63</span>
<span class="linenos"> 64</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
<span class="linenos"> 65</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
<span class="linenos"> 66</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos"> 67</span> <span class="c1"># # For a ProMP</span>
<span class="linenos"> 68</span> <span class="c1"># trajectory_generator_kwargs = {&#39;trajectory_generator_type&#39;: &#39;promp&#39;}</span>
<span class="linenos"> 69</span> <span class="c1"># phase_generator_kwargs = {&#39;phase_generator_type&#39;: &#39;linear&#39;}</span>
<span class="linenos"> 70</span> <span class="c1"># controller_kwargs = {&#39;controller_type&#39;: &#39;metaworld&#39;}</span>
<span class="linenos"> 71</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;zero_rbf&#39;,</span>
<span class="linenos"> 72</span> <span class="c1"># &#39;num_basis&#39;: 5,</span>
<span class="linenos"> 73</span> <span class="c1"># &#39;num_basis_zero_start&#39;: 1</span>
<span class="linenos"> 74</span> <span class="c1"># }</span>
<span class="linenos"> 75</span>
<span class="linenos"> 76</span> <span class="c1"># For a DMP</span>
<span class="linenos"> 77</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">}</span>
<span class="linenos"> 78</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos"> 79</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
<span class="linenos"> 80</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;metaworld&#39;</span><span class="p">}</span>
<span class="linenos"> 81</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos"> 82</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos"> 83</span> <span class="p">}</span>
<span class="linenos"> 84</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 85</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 86</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 87</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 88</span>
<span class="linenos"> 89</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 90</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 91</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 92</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 93</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 94</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 95</span> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Metaworld render interface bug does not allow to render() fixes its interface. &quot;</span>
<span class="linenos"> 96</span> <span class="s2">&quot;A temporary workaround is to alter their code in MujocoEnv render() from &quot;</span>
<span class="linenos"> 97</span> <span class="s2">&quot;`if not offscreen` to `if not offscreen or offscreen == &#39;human&#39;`.&quot;</span><span class="p">)</span>
<span class="linenos"> 98</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
<span class="linenos"> 99</span> <span class="c1"># env.render(mode=&quot;human&quot;)</span>
<span class="linenos">100</span>
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">103</span>
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 59</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
<span class="linenos"> 60</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;metaworld/button-press-v2&quot;</span>
<span class="linenos"> 61</span>
<span class="linenos"> 62</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
<span class="linenos"> 63</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
<span class="linenos"> 64</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos"> 65</span> <span class="c1"># # For a ProMP</span>
<span class="linenos"> 66</span> <span class="c1"># trajectory_generator_kwargs = {&#39;trajectory_generator_type&#39;: &#39;promp&#39;}</span>
<span class="linenos"> 67</span> <span class="c1"># phase_generator_kwargs = {&#39;phase_generator_type&#39;: &#39;linear&#39;}</span>
<span class="linenos"> 68</span> <span class="c1"># controller_kwargs = {&#39;controller_type&#39;: &#39;metaworld&#39;}</span>
<span class="linenos"> 69</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;zero_rbf&#39;,</span>
<span class="linenos"> 70</span> <span class="c1"># &#39;num_basis&#39;: 5,</span>
<span class="linenos"> 71</span> <span class="c1"># &#39;num_basis_zero_start&#39;: 1</span>
<span class="linenos"> 72</span> <span class="c1"># }</span>
<span class="linenos"> 73</span>
<span class="linenos"> 74</span> <span class="c1"># For a DMP</span>
<span class="linenos"> 75</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">}</span>
<span class="linenos"> 76</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos"> 77</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
<span class="linenos"> 78</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;metaworld&#39;</span><span class="p">}</span>
<span class="linenos"> 79</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos"> 80</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos"> 81</span> <span class="p">}</span>
<span class="linenos"> 82</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 83</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 84</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 85</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 86</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 87</span>
<span class="linenos"> 88</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 89</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 90</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 91</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 92</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 93</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 94</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 95</span>
<span class="linenos"> 96</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 97</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 98</span>
<span class="linenos"> 99</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">100</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">101</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">102</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">103</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">104</span>
<span class="linenos">105</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">106</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">108</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
<span class="linenos">109</span>
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">114</span>
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">117</span>
<span class="linenos">118</span>
<span class="linenos">119</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">120</span> <span class="c1"># Disclaimer: MetaWorld environments require the seed to be specified in the beginning.</span>
<span class="linenos">121</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.</span>
<span class="linenos">122</span>
<span class="linenos">123</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
<span class="linenos">124</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
<span class="linenos">125</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">126</span>
<span class="linenos">127</span> <span class="c1"># # Standard Meta world tasks</span>
<span class="linenos">128</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">129</span>
<span class="linenos">130</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
<span class="linenos">131</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld_ProMP/ButtonPress-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span> <span class="c1">#</span>
<span class="linenos">133</span> <span class="c1"># # Custom MetaWorld task</span>
<span class="linenos">134</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">110</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">111</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">112</span>
<span class="linenos">113</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="linenos">114</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
<span class="linenos">115</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
<span class="linenos">116</span>
<span class="linenos">117</span> <span class="c1"># # Standard Meta world tasks</span>
<span class="linenos">118</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">119</span>
<span class="linenos">120</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
<span class="linenos">121</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld_ProMP/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">122</span> <span class="c1">#</span>
<span class="linenos">123</span> <span class="c1"># # Custom MetaWorld task</span>
<span class="linenos">124</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">125</span>
<span class="linenos">126</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">127</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Movement Primitives Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>Movement Primitives Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -135,252 +135,253 @@
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 27</span>
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">1</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 30</span>
<span class="linenos"> 31</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
<span class="linenos"> 32</span> <span class="c1"># such as a ProMP</span>
<span class="linenos"> 33</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 34</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
<span class="linenos"> 35</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The &#39;reward&#39; is equal</span>
<span class="linenos"> 36</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
<span class="linenos"> 37</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 38</span> <span class="c1"># Aggregated returns</span>
<span class="linenos"> 39</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 40</span>
<span class="linenos"> 41</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 42</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 43</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 44</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 45</span>
<span class="linenos"> 46</span>
<span class="linenos"> 47</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 48</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 49</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 50</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 51</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 52</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos"> 53</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos"> 54</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 55</span><span class="sd"> Args:</span>
<span class="linenos"> 56</span><span class="sd"> seed: seed</span>
<span class="linenos"> 57</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 58</span><span class="sd"> render: Render the episode</span>
<span class="linenos"> 59</span>
<span class="linenos"> 60</span><span class="sd"> Returns:</span>
<span class="linenos"> 29</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 30</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 31</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 32</span>
<span class="linenos"> 33</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
<span class="linenos"> 34</span> <span class="c1"># such as a ProMP</span>
<span class="linenos"> 35</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 36</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
<span class="linenos"> 37</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The &#39;reward&#39; is equal</span>
<span class="linenos"> 38</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
<span class="linenos"> 39</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 40</span> <span class="c1"># Aggregated returns</span>
<span class="linenos"> 41</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 42</span>
<span class="linenos"> 43</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 44</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 45</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 46</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 47</span>
<span class="linenos"> 48</span>
<span class="linenos"> 49</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 50</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 51</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 52</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 53</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 54</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos"> 55</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos"> 56</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 57</span><span class="sd"> Args:</span>
<span class="linenos"> 58</span><span class="sd"> seed: seed</span>
<span class="linenos"> 59</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 60</span><span class="sd"> render: Render the episode</span>
<span class="linenos"> 61</span>
<span class="linenos"> 62</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 63</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
<span class="linenos"> 64</span> <span class="c1"># E.g. here for way to many basis functions</span>
<span class="linenos"> 65</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 66</span>
<span class="linenos"> 67</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 68</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 69</span>
<span class="linenos"> 70</span> <span class="c1"># This time rendering every trajectory</span>
<span class="linenos"> 71</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 72</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 73</span>
<span class="linenos"> 74</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos"> 75</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 76</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 77</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 78</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 79</span>
<span class="linenos"> 80</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 81</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 82</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 83</span>
<span class="linenos"> 84</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 85</span> <span class="k">return</span> <span class="n">obs</span>
<span class="linenos"> 86</span>
<span class="linenos"> 87</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
<span class="linenos"> 88</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
<span class="linenos"> 89</span> <span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 90</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 91</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos"> 92</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos"> 93</span> <span class="p">},</span>
<span class="linenos"> 94</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 95</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos"> 96</span> <span class="p">},</span>
<span class="linenos"> 97</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 98</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos"> 99</span> <span class="p">},</span>
<span class="linenos">100</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">101</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">102</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">103</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">104</span> <span class="p">}</span>
<span class="linenos">105</span> <span class="p">},</span>
<span class="linenos">106</span> <span class="s1">&#39;DMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">107</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">108</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">,</span>
<span class="linenos">109</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">500</span>
<span class="linenos">110</span> <span class="p">},</span>
<span class="linenos">111</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">112</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos">113</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mf">2.5</span>
<span class="linenos">114</span> <span class="p">},</span>
<span class="linenos">115</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">116</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">117</span> <span class="p">},</span>
<span class="linenos">118</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">119</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos">120</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos">121</span> <span class="p">}</span>
<span class="linenos">122</span> <span class="p">}</span>
<span class="linenos">123</span> <span class="p">}</span>
<span class="linenos">124</span>
<span class="linenos">125</span>
<span class="linenos">126</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">127</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">128</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos">129</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos">130</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos">131</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos">132</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos">133</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos">134</span><span class="sd"> Args:</span>
<span class="linenos">135</span><span class="sd"> seed: seed</span>
<span class="linenos">136</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">137</span><span class="sd"> render: Render the episode</span>
<span class="linenos">138</span>
<span class="linenos">139</span><span class="sd"> Returns:</span>
<span class="linenos"> 62</span><span class="sd"> Returns:</span>
<span class="linenos"> 63</span>
<span class="linenos"> 64</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 65</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
<span class="linenos"> 66</span> <span class="c1"># E.g. here for way to many basis functions</span>
<span class="linenos"> 67</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 68</span>
<span class="linenos"> 69</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 70</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 71</span>
<span class="linenos"> 72</span> <span class="c1"># This time rendering every trajectory</span>
<span class="linenos"> 73</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 74</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 75</span>
<span class="linenos"> 76</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos"> 77</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 78</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 79</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 80</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 81</span>
<span class="linenos"> 82</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 83</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 84</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 85</span>
<span class="linenos"> 86</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 87</span> <span class="k">return</span> <span class="n">obs</span>
<span class="linenos"> 88</span>
<span class="linenos"> 89</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
<span class="linenos"> 90</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
<span class="linenos"> 91</span> <span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 92</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 93</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos"> 94</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos"> 95</span> <span class="p">},</span>
<span class="linenos"> 96</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 97</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos"> 98</span> <span class="p">},</span>
<span class="linenos"> 99</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">100</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">101</span> <span class="p">},</span>
<span class="linenos">102</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">103</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">104</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">105</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">106</span> <span class="p">}</span>
<span class="linenos">107</span> <span class="p">},</span>
<span class="linenos">108</span> <span class="s1">&#39;DMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">109</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">110</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">,</span>
<span class="linenos">111</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">500</span>
<span class="linenos">112</span> <span class="p">},</span>
<span class="linenos">113</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">114</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos">115</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mf">2.5</span>
<span class="linenos">116</span> <span class="p">},</span>
<span class="linenos">117</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">118</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">119</span> <span class="p">},</span>
<span class="linenos">120</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">121</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos">122</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos">123</span> <span class="p">}</span>
<span class="linenos">124</span> <span class="p">}</span>
<span class="linenos">125</span> <span class="p">}</span>
<span class="linenos">126</span>
<span class="linenos">127</span>
<span class="linenos">128</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">129</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">130</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos">131</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos">132</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos">133</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos">134</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos">135</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos">136</span><span class="sd"> Args:</span>
<span class="linenos">137</span><span class="sd"> seed: seed</span>
<span class="linenos">138</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">139</span><span class="sd"> render: Render the episode</span>
<span class="linenos">140</span>
<span class="linenos">141</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">141</span><span class="sd"> Returns:</span>
<span class="linenos">142</span>
<span class="linenos">143</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">144</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">145</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_DMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">146</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">147</span>
<span class="linenos">148</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">,</span> <span class="s1">&#39;DMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
<span class="linenos">143</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">144</span>
<span class="linenos">145</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">146</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">147</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_DMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">148</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">149</span>
<span class="linenos">150</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">150</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">,</span> <span class="s1">&#39;DMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
<span class="linenos">151</span>
<span class="linenos">152</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">153</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">154</span>
<span class="linenos">155</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">156</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">157</span>
<span class="linenos">158</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">159</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">160</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">161</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">162</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">163</span>
<span class="linenos">164</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">165</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">166</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">167</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">168</span>
<span class="linenos">169</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">170</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">171</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">172</span> <span class="k">pass</span>
<span class="linenos">173</span>
<span class="linenos">174</span>
<span class="linenos">175</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">176</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">177</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
<span class="linenos">178</span><span class="sd"> Args:</span>
<span class="linenos">179</span><span class="sd"> seed: seed</span>
<span class="linenos">180</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">181</span><span class="sd"> render: Render the episode</span>
<span class="linenos">182</span>
<span class="linenos">183</span><span class="sd"> Returns:</span>
<span class="linenos">152</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">153</span>
<span class="linenos">154</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">155</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">156</span>
<span class="linenos">157</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">158</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">159</span>
<span class="linenos">160</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">161</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">162</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">163</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">164</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">165</span>
<span class="linenos">166</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">167</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">168</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">169</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">170</span>
<span class="linenos">171</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">172</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">173</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">174</span> <span class="k">pass</span>
<span class="linenos">175</span>
<span class="linenos">176</span>
<span class="linenos">177</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">178</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">179</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
<span class="linenos">180</span><span class="sd"> Args:</span>
<span class="linenos">181</span><span class="sd"> seed: seed</span>
<span class="linenos">182</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">183</span><span class="sd"> render: Render the episode</span>
<span class="linenos">184</span>
<span class="linenos">185</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">185</span><span class="sd"> Returns:</span>
<span class="linenos">186</span>
<span class="linenos">187</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">188</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">189</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">190</span>
<span class="linenos">191</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">192</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">193</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos">194</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos">195</span> <span class="p">},</span>
<span class="linenos">196</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">197</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos">198</span> <span class="p">},</span>
<span class="linenos">199</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">200</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">201</span> <span class="p">},</span>
<span class="linenos">202</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">203</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">204</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">205</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">206</span> <span class="p">}</span>
<span class="linenos">207</span> <span class="p">}})</span>
<span class="linenos">208</span>
<span class="linenos">209</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">187</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">188</span>
<span class="linenos">189</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">190</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">191</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">192</span>
<span class="linenos">193</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">194</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">195</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos">196</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos">197</span> <span class="p">},</span>
<span class="linenos">198</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">199</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos">200</span> <span class="p">},</span>
<span class="linenos">201</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">202</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">203</span> <span class="p">},</span>
<span class="linenos">204</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">205</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">206</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">207</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">208</span> <span class="p">}</span>
<span class="linenos">209</span> <span class="p">}})</span>
<span class="linenos">210</span>
<span class="linenos">211</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">212</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">213</span>
<span class="linenos">214</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">215</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">216</span>
<span class="linenos">217</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">218</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">219</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">220</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">221</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">222</span>
<span class="linenos">223</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">224</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">225</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">226</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">227</span>
<span class="linenos">228</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">229</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">230</span>
<span class="linenos">231</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">232</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">233</span>
<span class="linenos">234</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">235</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">236</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">237</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">238</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">239</span>
<span class="linenos">240</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">241</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">242</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">243</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">244</span>
<span class="linenos">245</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">246</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">247</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">248</span> <span class="k">pass</span>
<span class="linenos">249</span>
<span class="linenos">250</span>
<span class="linenos">251</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
<span class="linenos">252</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">253</span> <span class="c1"># DMP</span>
<span class="linenos">254</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_DMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">255</span>
<span class="linenos">256</span> <span class="c1"># ProMP</span>
<span class="linenos">257</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/BoxPushingTemporalSparse-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/TableTennis4D-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">260</span>
<span class="linenos">261</span> <span class="c1"># ProDMP with Replanning</span>
<span class="linenos">262</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennis4DReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennisWindReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">265</span>
<span class="linenos">266</span> <span class="c1"># Altered basis functions</span>
<span class="linenos">267</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">268</span>
<span class="linenos">269</span> <span class="c1"># Custom MP</span>
<span class="linenos">270</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">271</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">272</span>
<span class="linenos">273</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">274</span> <span class="n">main</span><span class="p">()</span>
<span class="linenos">211</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">212</span>
<span class="linenos">213</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">214</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">215</span>
<span class="linenos">216</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">217</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">218</span>
<span class="linenos">219</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">220</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">221</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">222</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">223</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">224</span>
<span class="linenos">225</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">226</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">227</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">228</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">229</span>
<span class="linenos">230</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">231</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">232</span>
<span class="linenos">233</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">234</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">235</span>
<span class="linenos">236</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">237</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">238</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">239</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">240</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">241</span>
<span class="linenos">242</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">243</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">244</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">245</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">246</span>
<span class="linenos">247</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">248</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">249</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">250</span> <span class="k">pass</span>
<span class="linenos">251</span>
<span class="linenos">252</span>
<span class="linenos">253</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="linenos">254</span> <span class="c1"># DMP</span>
<span class="linenos">255</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_DMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">256</span>
<span class="linenos">257</span> <span class="c1"># ProMP</span>
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/BoxPushingTemporalSparse-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">260</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/TableTennis4D-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">261</span>
<span class="linenos">262</span> <span class="c1"># ProDMP with Replanning</span>
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennis4DReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">265</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennisWindReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">266</span>
<span class="linenos">267</span> <span class="c1"># Altered basis functions</span>
<span class="linenos">268</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">269</span>
<span class="linenos">270</span> <span class="c1"># Custom MP</span>
<span class="linenos">271</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">272</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">273</span>
<span class="linenos">274</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">275</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MP Params Tuning Example &mdash; Fancy Gym 0.2 documentation</title>
<title>MP Params Tuning Example &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>OpenAI Envs Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>OpenAI Envs Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -122,27 +122,27 @@
<span class="linenos">13</span><span class="sd"> Returns:</span>
<span class="linenos">14</span>
<span class="linenos">15</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">17</span>
<span class="linenos">18</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">19</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">20</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">21</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
<span class="linenos">22</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">24</span> <span class="k">else</span><span class="p">:</span>
<span class="linenos">25</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">26</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">27</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">28</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">29</span>
<span class="linenos">30</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">31</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
<span class="linenos">32</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">33</span>
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">24</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">25</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">26</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">27</span>
<span class="linenos">28</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">29</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
<span class="linenos">30</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">31</span>
<span class="linenos">32</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">33</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;gym_ProMP/Reacher-v2&quot;</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">34</span>
<span class="linenos">35</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">36</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;gym_ProMP/Reacher-v2&quot;</span><span class="p">)</span>
<span class="linenos">36</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>PD Control Gain Tuning Example &mdash; Fancy Gym 0.2 documentation</title>
<title>PD Control Gain Tuning Example &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Replanning Example &mdash; Fancy Gym 0.2 documentation</title>
<title>Replanning Example &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -112,24 +112,24 @@
<span class="linenos"> 3</span>
<span class="linenos"> 4</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 7</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 8</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 9</span> <span class="n">done</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">10</span> <span class="k">while</span> <span class="n">done</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
<span class="linenos">11</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">12</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">13</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">14</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">15</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">16</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 9</span> <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="linenos">10</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">11</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">12</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">13</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">14</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">15</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">16</span> <span class="k">break</span>
<span class="linenos">17</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">18</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">19</span>
<span class="linenos">20</span>
<span class="linenos">21</span><span class="k">def</span> <span class="nf">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">22</span> <span class="c1"># id for a step-based environment</span>
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;BoxPushingDense-v0&quot;</span>
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/BoxPushingDense-v0&quot;</span>
<span class="linenos">24</span>
<span class="linenos">25</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">box_pushing</span><span class="o">.</span><span class="n">mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos">26</span>
@ -147,31 +147,34 @@
<span class="linenos">38</span> <span class="s1">&#39;replanning_schedule&#39;</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">pos</span><span class="p">,</span> <span class="n">vel</span><span class="p">,</span> <span class="n">obs</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="n">t</span> <span class="o">%</span> <span class="mi">25</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span>
<span class="linenos">39</span> <span class="s1">&#39;condition_on_desired&#39;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span>
<span class="linenos">40</span>
<span class="linenos">41</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
<span class="linenos">42</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos">43</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos">44</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">45</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">46</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">47</span>
<span class="linenos">48</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">49</span>
<span class="linenos">50</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
<span class="linenos">51</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">52</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">53</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">54</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">55</span>
<span class="linenos">56</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">57</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">58</span>
<span class="linenos">41</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">42</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
<span class="linenos">43</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos">44</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos">45</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">46</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">47</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">48</span>
<span class="linenos">49</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">50</span>
<span class="linenos">51</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
<span class="linenos">52</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">53</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">54</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">55</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">56</span>
<span class="linenos">57</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">58</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">59</span>
<span class="linenos">60</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="linenos">60</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="linenos">61</span> <span class="c1"># run a registered replanning environment</span>
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">63</span>
<span class="linenos">64</span> <span class="c1"># run a custom replanning environment</span>
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">66</span>
<span class="linenos">67</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="linenos">68</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.envs &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.envs &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -39,7 +39,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.register &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.register &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.upgrade &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.upgrade &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -40,7 +40,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Index &mdash; Fancy Gym 0.2 documentation</title>
<title>Index &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Basic Usage &mdash; Fancy Gym 0.2 documentation</title>
<title>Basic Usage &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>What is Episodic RL? &mdash; Fancy Gym 0.2 documentation</title>
<title>What is Episodic RL? &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Installation &mdash; Fancy Gym 0.2 documentation</title>
<title>Installation &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -135,7 +135,7 @@ pip<span class="w"> </span>install<span class="w"> </span><span class="s1">&#39;
</div>
<p>Pip can not automatically install up-to-date versions of metaworld,
since they are not avaible on PyPI yet. Install metaworld via</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
</pre></div>
</div>
</section>
@ -169,7 +169,7 @@ pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </s
</pre></div>
</div>
<p>Metaworld has to be installed manually with</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Creating new MP Environments &mdash; Fancy Gym 0.2 documentation</title>
<title>Creating new MP Environments &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Fancy Gym &mdash; Fancy Gym 0.2 documentation</title>
<title>Fancy Gym &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -40,7 +40,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

Binary file not shown.

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Python Module Index &mdash; Fancy Gym 0.2 documentation</title>
<title>Python Module Index &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Search &mdash; Fancy Gym 0.2 documentation</title>
<title>Search &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="#" method="get">

File diff suppressed because one or more lines are too long