Merge branch 'release' of github.com:ALRhub/fancy_gym
This commit is contained in:
commit
319578da15
BIN
docs/build/doctrees/environment.pickle
vendored
BIN
docs/build/doctrees/environment.pickle
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/dmc.doctree
vendored
BIN
docs/build/doctrees/examples/dmc.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/general.doctree
vendored
BIN
docs/build/doctrees/examples/general.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/metaworld.doctree
vendored
BIN
docs/build/doctrees/examples/metaworld.doctree
vendored
Binary file not shown.
Binary file not shown.
BIN
docs/build/doctrees/examples/open_ai.doctree
vendored
BIN
docs/build/doctrees/examples/open_ai.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/replanning_envs.doctree
vendored
BIN
docs/build/doctrees/examples/replanning_envs.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/guide/installation.doctree
vendored
BIN
docs/build/doctrees/guide/installation.doctree
vendored
Binary file not shown.
2
docs/build/html/.buildinfo
vendored
2
docs/build/html/.buildinfo
vendored
@ -1,4 +1,4 @@
|
||||
# Sphinx build info version 1
|
||||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
||||
config: 28ec069496fc0ad05c8b9641549626a6
|
||||
config: 36919d67c12a677d3f16f60d980b0313
|
||||
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
||||
|
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.envs.registry — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.envs.registry — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/style.css" type="text/css" />
|
||||
@ -38,7 +38,7 @@
|
||||
<img src="../../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
|
4
docs/build/html/_modules/index.html
vendored
4
docs/build/html/_modules/index.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Overview: module code — Fancy Gym 0.2 documentation</title>
|
||||
<title>Overview: module code — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -38,7 +38,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
|
||||
|
||||
.. code:: bash
|
||||
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
|
||||
|
||||
Installation from master
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
|
||||
|
||||
.. code:: bash
|
||||
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
|
||||
|
@ -1,6 +1,6 @@
|
||||
var DOCUMENTATION_OPTIONS = {
|
||||
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
|
||||
VERSION: '0.2',
|
||||
VERSION: '0.3.0',
|
||||
LANGUAGE: 'en',
|
||||
COLLAPSE_INDEX: false,
|
||||
BUILDER: 'html',
|
||||
|
4
docs/build/html/api.html
vendored
4
docs/build/html/api.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>API — Fancy Gym 0.2 documentation</title>
|
||||
<title>API — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
4
docs/build/html/envs/dmc.html
vendored
4
docs/build/html/envs/dmc.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DeepMind Control (DMC) — Fancy Gym 0.2 documentation</title>
|
||||
<title>DeepMind Control (DMC) — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/envs/fancy/airhockey.html
vendored
4
docs/build/html/envs/fancy/airhockey.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AirHockey — Fancy Gym 0.2 documentation</title>
|
||||
<title>AirHockey — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Classic Control — Fancy Gym 0.2 documentation</title>
|
||||
<title>Classic Control — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
4
docs/build/html/envs/fancy/index.html
vendored
4
docs/build/html/envs/fancy/index.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Fancy — Fancy Gym 0.2 documentation</title>
|
||||
<title>Fancy — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
4
docs/build/html/envs/fancy/mujoco.html
vendored
4
docs/build/html/envs/fancy/mujoco.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Mujoco — Fancy Gym 0.2 documentation</title>
|
||||
<title>Mujoco — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
4
docs/build/html/envs/meta.html
vendored
4
docs/build/html/envs/meta.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Metaworld — Fancy Gym 0.2 documentation</title>
|
||||
<title>Metaworld — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/envs/open_ai.html
vendored
4
docs/build/html/envs/open_ai.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Gymnasium — Fancy Gym 0.2 documentation</title>
|
||||
<title>Gymnasium — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
120
docs/build/html/examples/dmc.html
vendored
120
docs/build/html/examples/dmc.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DeepMind Control Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>DeepMind Control Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -126,7 +126,7 @@
|
||||
<span class="linenos"> 17</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 18</span>
|
||||
<span class="linenos"> 19</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
|
||||
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 21</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 22</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 23</span> <span class="nb">print</span><span class="p">(</span><span class="s2">"observation shape:"</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
|
||||
@ -135,7 +135,7 @@
|
||||
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 27</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 30</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 31</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 32</span>
|
||||
@ -193,58 +193,68 @@
|
||||
<span class="linenos"> 84</span> <span class="c1"># basis_generator_kwargs = {'basis_generator_type': 'rbf',</span>
|
||||
<span class="linenos"> 85</span> <span class="c1"># 'num_basis': 5</span>
|
||||
<span class="linenos"> 86</span> <span class="c1"># }</span>
|
||||
<span class="linenos"> 87</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 88</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 89</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 90</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 91</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 93</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 94</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 96</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 97</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 98</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos"> 99</span>
|
||||
<span class="linenos">100</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">101</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">102</span>
|
||||
<span class="linenos">103</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">104</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">105</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">106</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">108</span>
|
||||
<span class="linenos">109</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">110</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">111</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">112</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">113</span>
|
||||
<span class="linenos">114</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">115</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">116</span>
|
||||
<span class="linenos"> 87</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 88</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 89</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 90</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 91</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 92</span>
|
||||
<span class="linenos"> 93</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 94</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 96</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 97</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 98</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 99</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">100</span>
|
||||
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">103</span>
|
||||
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">109</span>
|
||||
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">114</span>
|
||||
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">117</span>
|
||||
<span class="linenos">118</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">119</span> <span class="c1"># Disclaimer: DMC environments require the seed to be specified in the beginning.</span>
|
||||
<span class="linenos">120</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.</span>
|
||||
<span class="linenos">121</span>
|
||||
<span class="linenos">122</span> <span class="c1"># For rendering DMC</span>
|
||||
<span class="linenos">123</span> <span class="c1"># export MUJOCO_GL="osmesa"</span>
|
||||
<span class="linenos">124</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="linenos">125</span>
|
||||
<span class="linenos">126</span> <span class="c1"># # Standard DMC Suite tasks</span>
|
||||
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">128</span> <span class="c1">#</span>
|
||||
<span class="linenos">129</span> <span class="c1"># # Manipulation tasks</span>
|
||||
<span class="linenos">130</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
|
||||
<span class="linenos">131</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/manipulation-reach_site_features"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">132</span> <span class="c1">#</span>
|
||||
<span class="linenos">133</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
|
||||
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control_ProMP/ball_in_cup-catch-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">135</span>
|
||||
<span class="linenos">136</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
|
||||
<span class="linenos">137</span> <span class="c1"># already registered above</span>
|
||||
<span class="linenos">138</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">118</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">119</span> <span class="c1"># # Standard DMC Suite tasks</span>
|
||||
<span class="linenos">120</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">121</span> <span class="c1">#</span>
|
||||
<span class="linenos">122</span> <span class="c1"># # Manipulation tasks</span>
|
||||
<span class="linenos">123</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
|
||||
<span class="linenos">124</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/reach_site_features"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">125</span> <span class="c1">#</span>
|
||||
<span class="linenos">126</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
|
||||
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control_ProMP/ball_in_cup-catch-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">128</span>
|
||||
<span class="linenos">129</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
|
||||
<span class="linenos">130</span> <span class="c1"># already registered above</span>
|
||||
<span class="linenos">131</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">132</span>
|
||||
<span class="linenos">133</span> <span class="c1"># # Standard DMC Suite tasks</span>
|
||||
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">135</span> <span class="c1">#</span>
|
||||
<span class="linenos">136</span> <span class="c1"># # Manipulation tasks</span>
|
||||
<span class="linenos">137</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
|
||||
<span class="linenos">138</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/reach_site_features"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">139</span> <span class="c1">#</span>
|
||||
<span class="linenos">140</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
|
||||
<span class="linenos">141</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control_ProMP/ball_in_cup-catch-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">142</span>
|
||||
<span class="linenos">143</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
|
||||
<span class="linenos">144</span> <span class="c1"># already registered above</span>
|
||||
<span class="linenos">145</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">146</span>
|
||||
<span class="linenos">147</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">148</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
28
docs/build/html/examples/general.html
vendored
28
docs/build/html/examples/general.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>General Usage Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>General Usage Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -130,7 +130,7 @@
|
||||
<span class="linenos"> 21</span>
|
||||
<span class="linenos"> 22</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 23</span>
|
||||
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
|
||||
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 25</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 26</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 27</span> <span class="nb">print</span><span class="p">(</span><span class="s2">"Observation shape: "</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
|
||||
@ -194,21 +194,21 @@
|
||||
<span class="linenos"> 85</span> <span class="c1"># do not return values above threshold</span>
|
||||
<span class="linenos"> 86</span> <span class="k">return</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">v</span><span class="p">)[:</span><span class="n">n_samples</span><span class="p">],</span> <span class="n">buffer</span><span class="o">.</span><span class="n">values</span><span class="p">()),</span>
|
||||
<span class="linenos"> 87</span>
|
||||
<span class="linenos"> 88</span>
|
||||
<span class="linenos"> 89</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos"> 90</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="linenos"> 88</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos"> 89</span> <span class="c1"># Basic gym task</span>
|
||||
<span class="linenos"> 90</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"Pendulum-v1"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 91</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># Basic gym task</span>
|
||||
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"Pendulum-v1"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># Mujoco task from framework</span>
|
||||
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"fancy/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 94</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># Mujoco task from framework</span>
|
||||
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"fancy/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># # OpenAI Mujoco task</span>
|
||||
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"HalfCheetah-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 97</span>
|
||||
<span class="linenos"> 98</span> <span class="c1"># # OpenAI Mujoco task</span>
|
||||
<span class="linenos"> 99</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"HalfCheetah-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 98</span> <span class="c1"># Vectorized multiprocessing environments</span>
|
||||
<span class="linenos"> 99</span> <span class="c1"># example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)</span>
|
||||
<span class="linenos">100</span>
|
||||
<span class="linenos">101</span> <span class="c1"># Vectorized multiprocessing environments</span>
|
||||
<span class="linenos">102</span> <span class="c1"># example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)</span>
|
||||
<span class="linenos">101</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">102</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
205
docs/build/html/examples/metaworld.html
vendored
205
docs/build/html/examples/metaworld.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Metaworld Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>Metaworld Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -111,7 +111,7 @@
|
||||
<span class="linenos"> 2</span><span class="kn">import</span> <span class="nn">fancy_gym</span>
|
||||
<span class="linenos"> 3</span>
|
||||
<span class="linenos"> 4</span>
|
||||
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">"fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">"metaworld/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 6</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 7</span><span class="sd"> Example for running a MetaWorld based env in the step based setting.</span>
|
||||
<span class="linenos"> 8</span><span class="sd"> The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always</span>
|
||||
@ -127,7 +127,7 @@
|
||||
<span class="linenos"> 18</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 19</span>
|
||||
<span class="linenos"> 20</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
|
||||
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 22</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 23</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 24</span> <span class="nb">print</span><span class="p">(</span><span class="s2">"observation shape:"</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
|
||||
@ -136,111 +136,104 @@
|
||||
<span class="linenos"> 27</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 28</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 29</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 30</span> <span class="c1"># THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM</span>
|
||||
<span class="linenos"> 31</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
|
||||
<span class="linenos"> 32</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="linenos"> 33</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 34</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 35</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 36</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos"> 37</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 38</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 39</span>
|
||||
<span class="linenos"> 40</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 41</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos"> 42</span>
|
||||
<span class="linenos"> 43</span>
|
||||
<span class="linenos"> 44</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 45</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 46</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 47</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 48</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 49</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 50</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 54</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 30</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 31</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 32</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 33</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 34</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos"> 35</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 36</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="linenos"> 37</span>
|
||||
<span class="linenos"> 38</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 39</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos"> 40</span>
|
||||
<span class="linenos"> 41</span>
|
||||
<span class="linenos"> 42</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 43</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 44</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 45</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 46</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 47</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 48</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
|
||||
<span class="linenos"> 49</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 50</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 54</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 56</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 58</span>
|
||||
<span class="linenos"> 59</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 60</span>
|
||||
<span class="linenos"> 61</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
|
||||
<span class="linenos"> 62</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"metaworld/button-press-v2"</span>
|
||||
<span class="linenos"> 63</span>
|
||||
<span class="linenos"> 64</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
|
||||
<span class="linenos"> 65</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
|
||||
<span class="linenos"> 66</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
|
||||
<span class="linenos"> 67</span> <span class="c1"># # For a ProMP</span>
|
||||
<span class="linenos"> 68</span> <span class="c1"># trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}</span>
|
||||
<span class="linenos"> 69</span> <span class="c1"># phase_generator_kwargs = {'phase_generator_type': 'linear'}</span>
|
||||
<span class="linenos"> 70</span> <span class="c1"># controller_kwargs = {'controller_type': 'metaworld'}</span>
|
||||
<span class="linenos"> 71</span> <span class="c1"># basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',</span>
|
||||
<span class="linenos"> 72</span> <span class="c1"># 'num_basis': 5,</span>
|
||||
<span class="linenos"> 73</span> <span class="c1"># 'num_basis_zero_start': 1</span>
|
||||
<span class="linenos"> 74</span> <span class="c1"># }</span>
|
||||
<span class="linenos"> 75</span>
|
||||
<span class="linenos"> 76</span> <span class="c1"># For a DMP</span>
|
||||
<span class="linenos"> 77</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">}</span>
|
||||
<span class="linenos"> 78</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 79</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
|
||||
<span class="linenos"> 80</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'metaworld'</span><span class="p">}</span>
|
||||
<span class="linenos"> 81</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos"> 82</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos"> 83</span> <span class="p">}</span>
|
||||
<span class="linenos"> 84</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 85</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 86</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 87</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 88</span>
|
||||
<span class="linenos"> 89</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 90</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 91</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 93</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 94</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 95</span> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Metaworld render interface bug does not allow to render() fixes its interface. "</span>
|
||||
<span class="linenos"> 96</span> <span class="s2">"A temporary workaround is to alter their code in MujocoEnv render() from "</span>
|
||||
<span class="linenos"> 97</span> <span class="s2">"`if not offscreen` to `if not offscreen or offscreen == 'human'`."</span><span class="p">)</span>
|
||||
<span class="linenos"> 98</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
|
||||
<span class="linenos"> 99</span> <span class="c1"># env.render(mode="human")</span>
|
||||
<span class="linenos">100</span>
|
||||
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">103</span>
|
||||
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 59</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
|
||||
<span class="linenos"> 60</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"metaworld/button-press-v2"</span>
|
||||
<span class="linenos"> 61</span>
|
||||
<span class="linenos"> 62</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
|
||||
<span class="linenos"> 63</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
|
||||
<span class="linenos"> 64</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
|
||||
<span class="linenos"> 65</span> <span class="c1"># # For a ProMP</span>
|
||||
<span class="linenos"> 66</span> <span class="c1"># trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}</span>
|
||||
<span class="linenos"> 67</span> <span class="c1"># phase_generator_kwargs = {'phase_generator_type': 'linear'}</span>
|
||||
<span class="linenos"> 68</span> <span class="c1"># controller_kwargs = {'controller_type': 'metaworld'}</span>
|
||||
<span class="linenos"> 69</span> <span class="c1"># basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',</span>
|
||||
<span class="linenos"> 70</span> <span class="c1"># 'num_basis': 5,</span>
|
||||
<span class="linenos"> 71</span> <span class="c1"># 'num_basis_zero_start': 1</span>
|
||||
<span class="linenos"> 72</span> <span class="c1"># }</span>
|
||||
<span class="linenos"> 73</span>
|
||||
<span class="linenos"> 74</span> <span class="c1"># For a DMP</span>
|
||||
<span class="linenos"> 75</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">}</span>
|
||||
<span class="linenos"> 76</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 77</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
|
||||
<span class="linenos"> 78</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'metaworld'</span><span class="p">}</span>
|
||||
<span class="linenos"> 79</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos"> 80</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos"> 81</span> <span class="p">}</span>
|
||||
<span class="linenos"> 82</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 83</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 84</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 85</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 86</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 87</span>
|
||||
<span class="linenos"> 88</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 89</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 90</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 91</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 93</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 94</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 95</span>
|
||||
<span class="linenos"> 96</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 97</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 98</span>
|
||||
<span class="linenos"> 99</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">100</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">101</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">102</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">103</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">104</span>
|
||||
<span class="linenos">105</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">106</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">108</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="linenos">109</span>
|
||||
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">114</span>
|
||||
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">117</span>
|
||||
<span class="linenos">118</span>
|
||||
<span class="linenos">119</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">120</span> <span class="c1"># Disclaimer: MetaWorld environments require the seed to be specified in the beginning.</span>
|
||||
<span class="linenos">121</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.</span>
|
||||
<span class="linenos">122</span>
|
||||
<span class="linenos">123</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
|
||||
<span class="linenos">124</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
|
||||
<span class="linenos">125</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="linenos">126</span>
|
||||
<span class="linenos">127</span> <span class="c1"># # Standard Meta world tasks</span>
|
||||
<span class="linenos">128</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">129</span>
|
||||
<span class="linenos">130</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
|
||||
<span class="linenos">131</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld_ProMP/ButtonPress-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">132</span> <span class="c1">#</span>
|
||||
<span class="linenos">133</span> <span class="c1"># # Custom MetaWorld task</span>
|
||||
<span class="linenos">134</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">110</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">111</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">112</span>
|
||||
<span class="linenos">113</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">114</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
|
||||
<span class="linenos">115</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
|
||||
<span class="linenos">116</span>
|
||||
<span class="linenos">117</span> <span class="c1"># # Standard Meta world tasks</span>
|
||||
<span class="linenos">118</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">119</span>
|
||||
<span class="linenos">120</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
|
||||
<span class="linenos">121</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld_ProMP/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">122</span> <span class="c1">#</span>
|
||||
<span class="linenos">123</span> <span class="c1"># # Custom MetaWorld task</span>
|
||||
<span class="linenos">124</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">125</span>
|
||||
<span class="linenos">126</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">127</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
481
docs/build/html/examples/movement_primitives.html
vendored
481
docs/build/html/examples/movement_primitives.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Movement Primitives Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>Movement Primitives Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -135,252 +135,253 @@
|
||||
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 27</span>
|
||||
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">1</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 30</span>
|
||||
<span class="linenos"> 31</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
|
||||
<span class="linenos"> 32</span> <span class="c1"># such as a ProMP</span>
|
||||
<span class="linenos"> 33</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 34</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
|
||||
<span class="linenos"> 35</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal</span>
|
||||
<span class="linenos"> 36</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
|
||||
<span class="linenos"> 37</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 38</span> <span class="c1"># Aggregated returns</span>
|
||||
<span class="linenos"> 39</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 40</span>
|
||||
<span class="linenos"> 41</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 42</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 43</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 44</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 45</span>
|
||||
<span class="linenos"> 46</span>
|
||||
<span class="linenos"> 47</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 48</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 49</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 50</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos"> 54</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 56</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 58</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos"> 59</span>
|
||||
<span class="linenos"> 60</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 29</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 30</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 31</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 32</span>
|
||||
<span class="linenos"> 33</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
|
||||
<span class="linenos"> 34</span> <span class="c1"># such as a ProMP</span>
|
||||
<span class="linenos"> 35</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 36</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
|
||||
<span class="linenos"> 37</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal</span>
|
||||
<span class="linenos"> 38</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
|
||||
<span class="linenos"> 39</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 40</span> <span class="c1"># Aggregated returns</span>
|
||||
<span class="linenos"> 41</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 42</span>
|
||||
<span class="linenos"> 43</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 44</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 45</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 46</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 47</span>
|
||||
<span class="linenos"> 48</span>
|
||||
<span class="linenos"> 49</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 50</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 54</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos"> 56</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 58</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos"> 59</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 60</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos"> 61</span>
|
||||
<span class="linenos"> 62</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 63</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
|
||||
<span class="linenos"> 64</span> <span class="c1"># E.g. here for way to many basis functions</span>
|
||||
<span class="linenos"> 65</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 66</span>
|
||||
<span class="linenos"> 67</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 68</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 69</span>
|
||||
<span class="linenos"> 70</span> <span class="c1"># This time rendering every trajectory</span>
|
||||
<span class="linenos"> 71</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 72</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 73</span>
|
||||
<span class="linenos"> 74</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos"> 75</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 76</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 77</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 78</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 79</span>
|
||||
<span class="linenos"> 80</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 81</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 82</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 83</span>
|
||||
<span class="linenos"> 84</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 85</span> <span class="k">return</span> <span class="n">obs</span>
|
||||
<span class="linenos"> 86</span>
|
||||
<span class="linenos"> 87</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
|
||||
<span class="linenos"> 88</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="linenos"> 89</span> <span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 90</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 91</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 92</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos"> 93</span> <span class="p">},</span>
|
||||
<span class="linenos"> 94</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 95</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos"> 96</span> <span class="p">},</span>
|
||||
<span class="linenos"> 97</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 98</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos"> 99</span> <span class="p">},</span>
|
||||
<span class="linenos">100</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">101</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">102</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">103</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">104</span> <span class="p">}</span>
|
||||
<span class="linenos">105</span> <span class="p">},</span>
|
||||
<span class="linenos">106</span> <span class="s1">'DMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">107</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">108</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">,</span>
|
||||
<span class="linenos">109</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">500</span>
|
||||
<span class="linenos">110</span> <span class="p">},</span>
|
||||
<span class="linenos">111</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">112</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos">113</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mf">2.5</span>
|
||||
<span class="linenos">114</span> <span class="p">},</span>
|
||||
<span class="linenos">115</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">116</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">117</span> <span class="p">},</span>
|
||||
<span class="linenos">118</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">119</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">120</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos">121</span> <span class="p">}</span>
|
||||
<span class="linenos">122</span> <span class="p">}</span>
|
||||
<span class="linenos">123</span> <span class="p">}</span>
|
||||
<span class="linenos">124</span>
|
||||
<span class="linenos">125</span>
|
||||
<span class="linenos">126</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">127</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">128</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos">129</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos">130</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos">131</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos">132</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos">133</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos">134</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">135</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">136</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">137</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">138</span>
|
||||
<span class="linenos">139</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 62</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 63</span>
|
||||
<span class="linenos"> 64</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 65</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
|
||||
<span class="linenos"> 66</span> <span class="c1"># E.g. here for way to many basis functions</span>
|
||||
<span class="linenos"> 67</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 68</span>
|
||||
<span class="linenos"> 69</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 70</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 71</span>
|
||||
<span class="linenos"> 72</span> <span class="c1"># This time rendering every trajectory</span>
|
||||
<span class="linenos"> 73</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 74</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 75</span>
|
||||
<span class="linenos"> 76</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos"> 77</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 78</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 79</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 80</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 81</span>
|
||||
<span class="linenos"> 82</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 83</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 84</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 85</span>
|
||||
<span class="linenos"> 86</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 87</span> <span class="k">return</span> <span class="n">obs</span>
|
||||
<span class="linenos"> 88</span>
|
||||
<span class="linenos"> 89</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
|
||||
<span class="linenos"> 90</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="linenos"> 91</span> <span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 92</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 93</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 94</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos"> 95</span> <span class="p">},</span>
|
||||
<span class="linenos"> 96</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 97</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos"> 98</span> <span class="p">},</span>
|
||||
<span class="linenos"> 99</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">100</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">101</span> <span class="p">},</span>
|
||||
<span class="linenos">102</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">103</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">104</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">105</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">106</span> <span class="p">}</span>
|
||||
<span class="linenos">107</span> <span class="p">},</span>
|
||||
<span class="linenos">108</span> <span class="s1">'DMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">109</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">110</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">,</span>
|
||||
<span class="linenos">111</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">500</span>
|
||||
<span class="linenos">112</span> <span class="p">},</span>
|
||||
<span class="linenos">113</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">114</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos">115</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mf">2.5</span>
|
||||
<span class="linenos">116</span> <span class="p">},</span>
|
||||
<span class="linenos">117</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">118</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">119</span> <span class="p">},</span>
|
||||
<span class="linenos">120</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">121</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">122</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos">123</span> <span class="p">}</span>
|
||||
<span class="linenos">124</span> <span class="p">}</span>
|
||||
<span class="linenos">125</span> <span class="p">}</span>
|
||||
<span class="linenos">126</span>
|
||||
<span class="linenos">127</span>
|
||||
<span class="linenos">128</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">129</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">130</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos">131</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos">132</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos">133</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos">134</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos">135</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos">136</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">137</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">138</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">139</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">140</span>
|
||||
<span class="linenos">141</span><span class="sd"> """</span>
|
||||
<span class="linenos">141</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">142</span>
|
||||
<span class="linenos">143</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">144</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">145</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">"fancy_DMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">146</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">147</span>
|
||||
<span class="linenos">148</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">,</span> <span class="s1">'DMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
|
||||
<span class="linenos">143</span><span class="sd"> """</span>
|
||||
<span class="linenos">144</span>
|
||||
<span class="linenos">145</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">146</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">147</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">"fancy_DMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">148</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">149</span>
|
||||
<span class="linenos">150</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">150</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">,</span> <span class="s1">'DMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
|
||||
<span class="linenos">151</span>
|
||||
<span class="linenos">152</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">153</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">154</span>
|
||||
<span class="linenos">155</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">156</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">157</span>
|
||||
<span class="linenos">158</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">159</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">160</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">161</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">162</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">163</span>
|
||||
<span class="linenos">164</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">165</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">166</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">167</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">168</span>
|
||||
<span class="linenos">169</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">170</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">171</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">172</span> <span class="k">pass</span>
|
||||
<span class="linenos">173</span>
|
||||
<span class="linenos">174</span>
|
||||
<span class="linenos">175</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">176</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">177</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
|
||||
<span class="linenos">178</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">179</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">180</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">181</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">182</span>
|
||||
<span class="linenos">183</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">152</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">153</span>
|
||||
<span class="linenos">154</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">155</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">156</span>
|
||||
<span class="linenos">157</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">158</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">159</span>
|
||||
<span class="linenos">160</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">161</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">162</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">163</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">164</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">165</span>
|
||||
<span class="linenos">166</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">167</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">168</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">169</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">170</span>
|
||||
<span class="linenos">171</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">172</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">173</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">174</span> <span class="k">pass</span>
|
||||
<span class="linenos">175</span>
|
||||
<span class="linenos">176</span>
|
||||
<span class="linenos">177</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">178</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">179</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
|
||||
<span class="linenos">180</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">181</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">182</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">183</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">184</span>
|
||||
<span class="linenos">185</span><span class="sd"> """</span>
|
||||
<span class="linenos">185</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">186</span>
|
||||
<span class="linenos">187</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">188</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">189</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">190</span>
|
||||
<span class="linenos">191</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">192</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">193</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos">194</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos">195</span> <span class="p">},</span>
|
||||
<span class="linenos">196</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">197</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos">198</span> <span class="p">},</span>
|
||||
<span class="linenos">199</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">200</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">201</span> <span class="p">},</span>
|
||||
<span class="linenos">202</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">203</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">204</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">205</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">206</span> <span class="p">}</span>
|
||||
<span class="linenos">207</span> <span class="p">}})</span>
|
||||
<span class="linenos">208</span>
|
||||
<span class="linenos">209</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">187</span><span class="sd"> """</span>
|
||||
<span class="linenos">188</span>
|
||||
<span class="linenos">189</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">190</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">191</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">192</span>
|
||||
<span class="linenos">193</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">194</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">195</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos">196</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos">197</span> <span class="p">},</span>
|
||||
<span class="linenos">198</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">199</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos">200</span> <span class="p">},</span>
|
||||
<span class="linenos">201</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">202</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">203</span> <span class="p">},</span>
|
||||
<span class="linenos">204</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">205</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">206</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">207</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">208</span> <span class="p">}</span>
|
||||
<span class="linenos">209</span> <span class="p">}})</span>
|
||||
<span class="linenos">210</span>
|
||||
<span class="linenos">211</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">212</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">213</span>
|
||||
<span class="linenos">214</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">215</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">216</span>
|
||||
<span class="linenos">217</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">218</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">219</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">220</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">221</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">222</span>
|
||||
<span class="linenos">223</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">224</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">225</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">226</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">227</span>
|
||||
<span class="linenos">228</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">229</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">230</span>
|
||||
<span class="linenos">231</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">232</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">233</span>
|
||||
<span class="linenos">234</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">235</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">236</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">237</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">238</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">239</span>
|
||||
<span class="linenos">240</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">241</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">242</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">243</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">244</span>
|
||||
<span class="linenos">245</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">246</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">247</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">248</span> <span class="k">pass</span>
|
||||
<span class="linenos">249</span>
|
||||
<span class="linenos">250</span>
|
||||
<span class="linenos">251</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
|
||||
<span class="linenos">252</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="linenos">253</span> <span class="c1"># DMP</span>
|
||||
<span class="linenos">254</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_DMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">255</span>
|
||||
<span class="linenos">256</span> <span class="c1"># ProMP</span>
|
||||
<span class="linenos">257</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/BoxPushingTemporalSparse-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/TableTennis4D-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">260</span>
|
||||
<span class="linenos">261</span> <span class="c1"># ProDMP with Replanning</span>
|
||||
<span class="linenos">262</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennis4DReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennisWindReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">265</span>
|
||||
<span class="linenos">266</span> <span class="c1"># Altered basis functions</span>
|
||||
<span class="linenos">267</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">268</span>
|
||||
<span class="linenos">269</span> <span class="c1"># Custom MP</span>
|
||||
<span class="linenos">270</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">271</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">272</span>
|
||||
<span class="linenos">273</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">274</span> <span class="n">main</span><span class="p">()</span>
|
||||
<span class="linenos">211</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">212</span>
|
||||
<span class="linenos">213</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">214</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">215</span>
|
||||
<span class="linenos">216</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">217</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">218</span>
|
||||
<span class="linenos">219</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">220</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">221</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">222</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">223</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">224</span>
|
||||
<span class="linenos">225</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">226</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">227</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">228</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">229</span>
|
||||
<span class="linenos">230</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">231</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">232</span>
|
||||
<span class="linenos">233</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">234</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">235</span>
|
||||
<span class="linenos">236</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">237</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">238</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">239</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">240</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">241</span>
|
||||
<span class="linenos">242</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">243</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">244</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">245</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">246</span>
|
||||
<span class="linenos">247</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">248</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">249</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">250</span> <span class="k">pass</span>
|
||||
<span class="linenos">251</span>
|
||||
<span class="linenos">252</span>
|
||||
<span class="linenos">253</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">254</span> <span class="c1"># DMP</span>
|
||||
<span class="linenos">255</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_DMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">256</span>
|
||||
<span class="linenos">257</span> <span class="c1"># ProMP</span>
|
||||
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/BoxPushingTemporalSparse-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">260</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/TableTennis4D-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">261</span>
|
||||
<span class="linenos">262</span> <span class="c1"># ProDMP with Replanning</span>
|
||||
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennis4DReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">265</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennisWindReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">266</span>
|
||||
<span class="linenos">267</span> <span class="c1"># Altered basis functions</span>
|
||||
<span class="linenos">268</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">269</span>
|
||||
<span class="linenos">270</span> <span class="c1"># Custom MP</span>
|
||||
<span class="linenos">271</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">272</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">273</span>
|
||||
<span class="linenos">274</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">275</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>MP Params Tuning Example — Fancy Gym 0.2 documentation</title>
|
||||
<title>MP Params Tuning Example — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
30
docs/build/html/examples/open_ai.html
vendored
30
docs/build/html/examples/open_ai.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>OpenAI Envs Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>OpenAI Envs Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -122,27 +122,27 @@
|
||||
<span class="linenos">13</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">14</span>
|
||||
<span class="linenos">15</span><span class="sd"> """</span>
|
||||
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
|
||||
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">17</span>
|
||||
<span class="linenos">18</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">19</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos">20</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">21</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
|
||||
<span class="linenos">22</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos">24</span> <span class="k">else</span><span class="p">:</span>
|
||||
<span class="linenos">25</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">26</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">27</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">28</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">29</span>
|
||||
<span class="linenos">30</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">31</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
|
||||
<span class="linenos">32</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">33</span>
|
||||
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">24</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">25</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">26</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">27</span>
|
||||
<span class="linenos">28</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">29</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
|
||||
<span class="linenos">30</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">31</span>
|
||||
<span class="linenos">32</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">33</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"gym_ProMP/Reacher-v2"</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">34</span>
|
||||
<span class="linenos">35</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">36</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"gym_ProMP/Reacher-v2"</span><span class="p">)</span>
|
||||
<span class="linenos">36</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>PD Control Gain Tuning Example — Fancy Gym 0.2 documentation</title>
|
||||
<title>PD Control Gain Tuning Example — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
69
docs/build/html/examples/replanning_envs.html
vendored
69
docs/build/html/examples/replanning_envs.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Replanning Example — Fancy Gym 0.2 documentation</title>
|
||||
<title>Replanning Example — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -112,24 +112,24 @@
|
||||
<span class="linenos"> 3</span>
|
||||
<span class="linenos"> 4</span>
|
||||
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
|
||||
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 7</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 8</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 9</span> <span class="n">done</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="linenos">10</span> <span class="k">while</span> <span class="n">done</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
|
||||
<span class="linenos">11</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">12</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">13</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">14</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos">15</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">16</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 9</span> <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||||
<span class="linenos">10</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">11</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">12</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">13</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">14</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">15</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">16</span> <span class="k">break</span>
|
||||
<span class="linenos">17</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">18</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">19</span>
|
||||
<span class="linenos">20</span>
|
||||
<span class="linenos">21</span><span class="k">def</span> <span class="nf">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">22</span> <span class="c1"># id for a step-based environment</span>
|
||||
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"BoxPushingDense-v0"</span>
|
||||
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/BoxPushingDense-v0"</span>
|
||||
<span class="linenos">24</span>
|
||||
<span class="linenos">25</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">box_pushing</span><span class="o">.</span><span class="n">mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
|
||||
<span class="linenos">26</span>
|
||||
@ -147,31 +147,34 @@
|
||||
<span class="linenos">38</span> <span class="s1">'replanning_schedule'</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">pos</span><span class="p">,</span> <span class="n">vel</span><span class="p">,</span> <span class="n">obs</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="n">t</span> <span class="o">%</span> <span class="mi">25</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span>
|
||||
<span class="linenos">39</span> <span class="s1">'condition_on_desired'</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span>
|
||||
<span class="linenos">40</span>
|
||||
<span class="linenos">41</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">42</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">43</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">44</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos">45</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">46</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos">47</span>
|
||||
<span class="linenos">48</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">49</span>
|
||||
<span class="linenos">50</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
|
||||
<span class="linenos">51</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">52</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">53</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">54</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">55</span>
|
||||
<span class="linenos">56</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">57</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">58</span>
|
||||
<span class="linenos">41</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">42</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">43</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">44</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">45</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos">46</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">47</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">48</span>
|
||||
<span class="linenos">49</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">50</span>
|
||||
<span class="linenos">51</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
|
||||
<span class="linenos">52</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">53</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">54</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">55</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">56</span>
|
||||
<span class="linenos">57</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">58</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">59</span>
|
||||
<span class="linenos">60</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
||||
<span class="linenos">60</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">61</span> <span class="c1"># run a registered replanning environment</span>
|
||||
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">63</span>
|
||||
<span class="linenos">64</span> <span class="c1"># run a custom replanning environment</span>
|
||||
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">66</span>
|
||||
<span class="linenos">67</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
||||
<span class="linenos">68</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.envs — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.envs — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -39,7 +39,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.register — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.register — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.upgrade — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.upgrade — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -40,7 +40,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/genindex.html
vendored
4
docs/build/html/genindex.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Index — Fancy Gym 0.2 documentation</title>
|
||||
<title>Index — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -38,7 +38,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
4
docs/build/html/guide/basic_usage.html
vendored
4
docs/build/html/guide/basic_usage.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Basic Usage — Fancy Gym 0.2 documentation</title>
|
||||
<title>Basic Usage — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/guide/episodic_rl.html
vendored
4
docs/build/html/guide/episodic_rl.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>What is Episodic RL? — Fancy Gym 0.2 documentation</title>
|
||||
<title>What is Episodic RL? — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
8
docs/build/html/guide/installation.html
vendored
8
docs/build/html/guide/installation.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Installation — Fancy Gym 0.2 documentation</title>
|
||||
<title>Installation — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -135,7 +135,7 @@ pip<span class="w"> </span>install<span class="w"> </span><span class="s1">'
|
||||
</div>
|
||||
<p>Pip can not automatically install up-to-date versions of metaworld,
|
||||
since they are not avaible on PyPI yet. Install metaworld via</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
@ -169,7 +169,7 @@ pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </s
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Metaworld has to be installed manually with</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
4
docs/build/html/guide/upgrading_envs.html
vendored
4
docs/build/html/guide/upgrading_envs.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Creating new MP Environments — Fancy Gym 0.2 documentation</title>
|
||||
<title>Creating new MP Environments — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/index.html
vendored
4
docs/build/html/index.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Fancy Gym — Fancy Gym 0.2 documentation</title>
|
||||
<title>Fancy Gym — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -40,7 +40,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
BIN
docs/build/html/objects.inv
vendored
BIN
docs/build/html/objects.inv
vendored
Binary file not shown.
4
docs/build/html/py-modindex.html
vendored
4
docs/build/html/py-modindex.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Python Module Index — Fancy Gym 0.2 documentation</title>
|
||||
<title>Python Module Index — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
4
docs/build/html/search.html
vendored
4
docs/build/html/search.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Search — Fancy Gym 0.2 documentation</title>
|
||||
<title>Search — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="#" method="get">
|
||||
|
2
docs/build/html/searchindex.js
vendored
2
docs/build/html/searchindex.js
vendored
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user