Merge branch 'master' into bruce_port_envs

This commit is contained in:
Dominik Moritz Roth 2024-03-20 11:06:30 +01:00
commit a33640abf6
66 changed files with 721 additions and 589 deletions

View File

@ -1,26 +0,0 @@
name: Ensure Tagged Commits on Release
on:
pull_request:
branches:
- release
jobs:
check_tag:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check if base commit of PR is tagged
run: |
BASE_COMMIT=$(jq -r .pull_request.base.sha < "$GITHUB_EVENT_PATH")
TAG=$(git tag --contains $BASE_COMMIT)
if [ -z "$TAG" ]; then
echo "Base commit of PR is not tagged. PRs onto release must be tagged with the version number."
exit 1
fi
echo "Base commit of PR is tagged. Check passed."

View File

@ -0,0 +1,52 @@
name: Ensure Version Consistency on PR to Release
on:
pull_request:
branches:
- release
jobs:
check_version_and_tag:
runs-on: ubuntu-latest
strategy:
fail-fast: true # Terminate the job immediately if any step fails
steps:
- name: Check out code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Necessary to fetch all tags for comparison
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install toml
- name: Extract version from pyproject.toml
run: |
echo "Extracting version from pyproject.toml"
VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])')
echo "Version in pyproject.toml is $VERSION"
echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Get tag for the PR's head commit
run: |
PR_HEAD_SHA=$(jq -r .pull_request.head.sha < "$GITHUB_EVENT_PATH")
TAG=$(git tag --contains $PR_HEAD_SHA)
echo "Tag on PR's head commit is $TAG"
echo "TAG=$TAG" >> $GITHUB_ENV
- name: Compare version and tag
run: |
if [ -z "$TAG" ]; then
echo "Head commit of PR is not tagged. Ensure the head commit of PRs onto release is tagged with the version number."
exit 1
elif [ "$VERSION" != "$TAG" ]; then
echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG)."
exit 1
else
echo "Version and git tag match. Check passed."
fi

View File

@ -8,6 +8,8 @@ on:
jobs:
publish:
name: Publish to PyPI
strategy:
fail-fast: true # Terminate the job immediately if any step fails
runs-on: ubuntu-latest
steps:
- name: Check out code
@ -15,19 +17,24 @@ jobs:
with:
fetch-depth: 0 # This fetches all history for all branches and tags
- name: Check if commit is tagged
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Validate version against tag
run: |
VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])')
TAG=$(git tag --contains HEAD)
if [ -z "$TAG" ]; then
echo "Commit is not tagged. Failing the workflow."
exit 1
fi
echo "Commit is tagged. Proceeding with the workflow."
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
if [ "$VERSION" != "$TAG" ]; then
echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG). Failing the workflow."
exit 1
fi
echo "Version and commit tag match. Proceeding with the workflow."
- name: Install pypa/build/setuptools/twine
run: >-
@ -36,9 +43,6 @@ jobs:
build setuptools twine
--user
- name: Prevent fallback onto setup.py
run: rm setup.py
- name: Build a binary wheel and a source tarball
run: python3 -m build

View File

@ -10,25 +10,25 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org) (a mainta
**Key Features**:
- **New Challenging Environments**: `fancy_gym` includes several new environments ([Panda Box Pushing](https://dominik-roth.eu/fancy/envs/fancy/mujoco.html#box-pushing), [Table Tennis](https://dominik-roth.eu/fancy/envs/fancy/mujoco.html#table-tennis), [etc.](https://dominik-roth.eu/fancy/envs/fancy/index.html)) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research.
- **New Challenging Environments**: `fancy_gym` includes several new environments ([Panda Box Pushing](https://alrhub.github.io/fancy_gym/envs/fancy/mujoco.html#box-pushing), [Table Tennis](https://alrhub.github.io/fancy_gym/envs/fancy/mujoco.html#table-tennis), [etc.](https://alrhub.github.io/fancy_gym/envs/fancy/index.html)) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research.
- **Support for Movement Primitives**: `fancy_gym` supports a range of movement primitives (MPs), including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP).
- **Upgrade to Movement Primitives**: With our framework, its straightforward to transform standard Gymnasium environments into environments that support movement primitives.
- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](dominik-roth.eu/fancy/envs/dmc.html)
and [Metaworld](https://dominik-roth.eu/fancy/envs/meta.html), whether you want to use them in the regular step-based setting or using MPs.
- **Contribute Your Own Environments**: If youre inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://dominik-roth.eu/fancy/guide/upgrading_envs.html) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`.
- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://alrhub.github.io/fancy_gym/envs/dmc.html)
and [Metaworld](https://alrhub.github.io/fancy_gym/envs/meta.html), whether you want to use them in the regular step-based setting or using MPs.
- **Contribute Your Own Environments**: If youre inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://alrhub.github.io/fancy_gym/guide/upgrading_envs.html) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`.
## Quickstart Guide
| &#x26A0; We recommend installing `fancy_gym` into a virtual environment as provided by [venv](https://docs.python.org/3/library/venv.html), [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/). |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
Install via pip [or use an alternative installation method](https://dominik-roth.eu/fancy/guide/installation.html)
Install via pip [or use an alternative installation method](https://alrhub.github.io/fancy_gym/guide/installation.html)
```bash
pip install 'fancy_gym[all]'
```
Try out one of our step-based environments [or explore our other envs](https://dominik-roth.eu/fancy/envs/fancy/index.html)
Try out one of our step-based environments [or explore our other envs](https://alrhub.github.io/fancy_gym/envs/fancy/index.html)
```python
import gymnasium as gym
@ -48,7 +48,7 @@ Try out one of our step-based environments [or explore our other envs](https://d
observation, info = env.reset()
```
Explore the MP-based variant [or learn more about Movement Primitives (MPs)](https://dominik-roth.eu/fancy/guide/episodic_rl.html)
Explore the MP-based variant [or learn more about Movement Primitives (MPs)](https://alrhub.github.io/fancy_gym/guide/episodic_rl.html)
```python
import gymnasium as gym
@ -66,7 +66,7 @@ Explore the MP-based variant [or learn more about Movement Primitives (MPs)](htt
## Documentation
Documentation for `fancy_gym` can be found [here](https://dominik-roth.eu/fancy); Usage Examples can be found [here](https://dominik-roth.eu/fancy/examples/general.html).
Documentation for `fancy_gym` can be found [here](https://alrhub.github.io/fancy_gym/); Usage Examples can be found [here](https://alrhub.github.io/fancy_gym/examples/general.html).
## Citing the Project

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 28ec069496fc0ad05c8b9641549626a6
config: 36919d67c12a677d3f16f60d980b0313
tags: 645f666f9bcd5a90fca523b33c5a78b7

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.envs.registry &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.envs.registry &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="../../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Overview: module code &mdash; Fancy Gym 0.2 documentation</title>
<title>Overview: module code &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
.. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
Installation from master
~~~~~~~~~~~~~~~~~~~~~~~~
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
.. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld

View File

@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
VERSION: '0.2',
VERSION: '0.3.0',
LANGUAGE: 'en',
COLLAPSE_INDEX: false,
BUILDER: 'html',

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>API &mdash; Fancy Gym 0.2 documentation</title>
<title>API &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DeepMind Control (DMC) &mdash; Fancy Gym 0.2 documentation</title>
<title>DeepMind Control (DMC) &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AirHockey &mdash; Fancy Gym 0.2 documentation</title>
<title>AirHockey &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Classic Control &mdash; Fancy Gym 0.2 documentation</title>
<title>Classic Control &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Fancy &mdash; Fancy Gym 0.2 documentation</title>
<title>Fancy &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Mujoco &mdash; Fancy Gym 0.2 documentation</title>
<title>Mujoco &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Metaworld &mdash; Fancy Gym 0.2 documentation</title>
<title>Metaworld &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Gymnasium &mdash; Fancy Gym 0.2 documentation</title>
<title>Gymnasium &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DeepMind Control Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>DeepMind Control Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -126,7 +126,7 @@
<span class="linenos"> 17</span><span class="sd"> Returns:</span>
<span class="linenos"> 18</span>
<span class="linenos"> 19</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 21</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 22</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 23</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -135,7 +135,7 @@
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 27</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 30</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 31</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 32</span>
@ -193,58 +193,68 @@
<span class="linenos"> 84</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;rbf&#39;,</span>
<span class="linenos"> 85</span> <span class="c1"># &#39;num_basis&#39;: 5</span>
<span class="linenos"> 86</span> <span class="c1"># }</span>
<span class="linenos"> 87</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 88</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 89</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 90</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 91</span>
<span class="linenos"> 92</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 93</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 94</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 95</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 96</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 97</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 98</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos"> 99</span>
<span class="linenos">100</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">101</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">102</span>
<span class="linenos">103</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">104</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">105</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">106</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">108</span>
<span class="linenos">109</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">110</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">111</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">112</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">113</span>
<span class="linenos">114</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">115</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">116</span>
<span class="linenos"> 87</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 88</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 89</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 90</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 91</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 92</span>
<span class="linenos"> 93</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 94</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 95</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 96</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 97</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 98</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 99</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">100</span>
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">103</span>
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">109</span>
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">114</span>
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">117</span>
<span class="linenos">118</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">119</span> <span class="c1"># Disclaimer: DMC environments require the seed to be specified in the beginning.</span>
<span class="linenos">120</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.</span>
<span class="linenos">121</span>
<span class="linenos">122</span> <span class="c1"># For rendering DMC</span>
<span class="linenos">123</span> <span class="c1"># export MUJOCO_GL=&quot;osmesa&quot;</span>
<span class="linenos">124</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
<span class="linenos">125</span>
<span class="linenos">126</span> <span class="c1"># # Standard DMC Suite tasks</span>
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">128</span> <span class="c1">#</span>
<span class="linenos">129</span> <span class="c1"># # Manipulation tasks</span>
<span class="linenos">130</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
<span class="linenos">131</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/manipulation-reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span> <span class="c1">#</span>
<span class="linenos">133</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">135</span>
<span class="linenos">136</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">137</span> <span class="c1"># already registered above</span>
<span class="linenos">138</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">118</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="linenos">119</span> <span class="c1"># # Standard DMC Suite tasks</span>
<span class="linenos">120</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">121</span> <span class="c1">#</span>
<span class="linenos">122</span> <span class="c1"># # Manipulation tasks</span>
<span class="linenos">123</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
<span class="linenos">124</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">125</span> <span class="c1">#</span>
<span class="linenos">126</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">128</span>
<span class="linenos">129</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">130</span> <span class="c1"># already registered above</span>
<span class="linenos">131</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span>
<span class="linenos">133</span> <span class="c1"># # Standard DMC Suite tasks</span>
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">135</span> <span class="c1">#</span>
<span class="linenos">136</span> <span class="c1"># # Manipulation tasks</span>
<span class="linenos">137</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
<span class="linenos">138</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">139</span> <span class="c1">#</span>
<span class="linenos">140</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">141</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">142</span>
<span class="linenos">143</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">144</span> <span class="c1"># already registered above</span>
<span class="linenos">145</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">146</span>
<span class="linenos">147</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">148</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>General Usage Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>General Usage Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -130,7 +130,7 @@
<span class="linenos"> 21</span>
<span class="linenos"> 22</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 23</span>
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 25</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 26</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 27</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Observation shape: &quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -194,21 +194,21 @@
<span class="linenos"> 85</span> <span class="c1"># do not return values above threshold</span>
<span class="linenos"> 86</span> <span class="k">return</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">v</span><span class="p">)[:</span><span class="n">n_samples</span><span class="p">],</span> <span class="n">buffer</span><span class="o">.</span><span class="n">values</span><span class="p">()),</span>
<span class="linenos"> 87</span>
<span class="linenos"> 88</span>
<span class="linenos"> 89</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos"> 90</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
<span class="linenos"> 88</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="linenos"> 89</span> <span class="c1"># Basic gym task</span>
<span class="linenos"> 90</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;Pendulum-v1&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 91</span>
<span class="linenos"> 92</span> <span class="c1"># Basic gym task</span>
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;Pendulum-v1&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 92</span> <span class="c1"># Mujoco task from framework</span>
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;fancy/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 94</span>
<span class="linenos"> 95</span> <span class="c1"># Mujoco task from framework</span>
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;fancy/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 95</span> <span class="c1"># # OpenAI Mujoco task</span>
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;HalfCheetah-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 97</span>
<span class="linenos"> 98</span> <span class="c1"># # OpenAI Mujoco task</span>
<span class="linenos"> 99</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;HalfCheetah-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 98</span> <span class="c1"># Vectorized multiprocessing environments</span>
<span class="linenos"> 99</span> <span class="c1"># example_async(env_id=&quot;HoleReacher-v0&quot;, n_cpu=2, seed=int(&#39;533D&#39;, 16), n_samples=2 * 200)</span>
<span class="linenos">100</span>
<span class="linenos">101</span> <span class="c1"># Vectorized multiprocessing environments</span>
<span class="linenos">102</span> <span class="c1"># example_async(env_id=&quot;HoleReacher-v0&quot;, n_cpu=2, seed=int(&#39;533D&#39;, 16), n_samples=2 * 200)</span>
<span class="linenos">101</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">102</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Metaworld Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>Metaworld Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -111,7 +111,7 @@
<span class="linenos"> 2</span><span class="kn">import</span> <span class="nn">fancy_gym</span>
<span class="linenos"> 3</span>
<span class="linenos"> 4</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">&quot;fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 6</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 7</span><span class="sd"> Example for running a MetaWorld based env in the step based setting.</span>
<span class="linenos"> 8</span><span class="sd"> The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always</span>
@ -127,7 +127,7 @@
<span class="linenos"> 18</span><span class="sd"> Returns:</span>
<span class="linenos"> 19</span>
<span class="linenos"> 20</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 22</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 23</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 24</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -136,111 +136,104 @@
<span class="linenos"> 27</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 28</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 29</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 30</span> <span class="c1"># THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM</span>
<span class="linenos"> 31</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
<span class="linenos"> 32</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="linenos"> 33</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 34</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 35</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 36</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos"> 37</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 38</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 39</span>
<span class="linenos"> 40</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 41</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos"> 42</span>
<span class="linenos"> 43</span>
<span class="linenos"> 44</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 45</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 46</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 47</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 48</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 49</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
<span class="linenos"> 50</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
<span class="linenos"> 51</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 52</span><span class="sd"> Args:</span>
<span class="linenos"> 53</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 54</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 55</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 30</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 31</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 32</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 33</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 34</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos"> 35</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 36</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
<span class="linenos"> 37</span>
<span class="linenos"> 38</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 39</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos"> 40</span>
<span class="linenos"> 41</span>
<span class="linenos"> 42</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 43</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 44</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 45</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 46</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 47</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
<span class="linenos"> 48</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
<span class="linenos"> 49</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 50</span><span class="sd"> Args:</span>
<span class="linenos"> 51</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 52</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 53</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 54</span>
<span class="linenos"> 55</span><span class="sd"> Returns:</span>
<span class="linenos"> 56</span>
<span class="linenos"> 57</span><span class="sd"> Returns:</span>
<span class="linenos"> 57</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 58</span>
<span class="linenos"> 59</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 60</span>
<span class="linenos"> 61</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
<span class="linenos"> 62</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;metaworld/button-press-v2&quot;</span>
<span class="linenos"> 63</span>
<span class="linenos"> 64</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
<span class="linenos"> 65</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
<span class="linenos"> 66</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos"> 67</span> <span class="c1"># # For a ProMP</span>
<span class="linenos"> 68</span> <span class="c1"># trajectory_generator_kwargs = {&#39;trajectory_generator_type&#39;: &#39;promp&#39;}</span>
<span class="linenos"> 69</span> <span class="c1"># phase_generator_kwargs = {&#39;phase_generator_type&#39;: &#39;linear&#39;}</span>
<span class="linenos"> 70</span> <span class="c1"># controller_kwargs = {&#39;controller_type&#39;: &#39;metaworld&#39;}</span>
<span class="linenos"> 71</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;zero_rbf&#39;,</span>
<span class="linenos"> 72</span> <span class="c1"># &#39;num_basis&#39;: 5,</span>
<span class="linenos"> 73</span> <span class="c1"># &#39;num_basis_zero_start&#39;: 1</span>
<span class="linenos"> 74</span> <span class="c1"># }</span>
<span class="linenos"> 75</span>
<span class="linenos"> 76</span> <span class="c1"># For a DMP</span>
<span class="linenos"> 77</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">}</span>
<span class="linenos"> 78</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos"> 79</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
<span class="linenos"> 80</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;metaworld&#39;</span><span class="p">}</span>
<span class="linenos"> 81</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos"> 82</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos"> 83</span> <span class="p">}</span>
<span class="linenos"> 84</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 85</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 86</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 87</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 88</span>
<span class="linenos"> 89</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 90</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 91</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 92</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 93</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 94</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 95</span> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Metaworld render interface bug does not allow to render() fixes its interface. &quot;</span>
<span class="linenos"> 96</span> <span class="s2">&quot;A temporary workaround is to alter their code in MujocoEnv render() from &quot;</span>
<span class="linenos"> 97</span> <span class="s2">&quot;`if not offscreen` to `if not offscreen or offscreen == &#39;human&#39;`.&quot;</span><span class="p">)</span>
<span class="linenos"> 98</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
<span class="linenos"> 99</span> <span class="c1"># env.render(mode=&quot;human&quot;)</span>
<span class="linenos">100</span>
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">103</span>
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 59</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
<span class="linenos"> 60</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;metaworld/button-press-v2&quot;</span>
<span class="linenos"> 61</span>
<span class="linenos"> 62</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
<span class="linenos"> 63</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
<span class="linenos"> 64</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos"> 65</span> <span class="c1"># # For a ProMP</span>
<span class="linenos"> 66</span> <span class="c1"># trajectory_generator_kwargs = {&#39;trajectory_generator_type&#39;: &#39;promp&#39;}</span>
<span class="linenos"> 67</span> <span class="c1"># phase_generator_kwargs = {&#39;phase_generator_type&#39;: &#39;linear&#39;}</span>
<span class="linenos"> 68</span> <span class="c1"># controller_kwargs = {&#39;controller_type&#39;: &#39;metaworld&#39;}</span>
<span class="linenos"> 69</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;zero_rbf&#39;,</span>
<span class="linenos"> 70</span> <span class="c1"># &#39;num_basis&#39;: 5,</span>
<span class="linenos"> 71</span> <span class="c1"># &#39;num_basis_zero_start&#39;: 1</span>
<span class="linenos"> 72</span> <span class="c1"># }</span>
<span class="linenos"> 73</span>
<span class="linenos"> 74</span> <span class="c1"># For a DMP</span>
<span class="linenos"> 75</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">}</span>
<span class="linenos"> 76</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos"> 77</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
<span class="linenos"> 78</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;metaworld&#39;</span><span class="p">}</span>
<span class="linenos"> 79</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos"> 80</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos"> 81</span> <span class="p">}</span>
<span class="linenos"> 82</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 83</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 84</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 85</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 86</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 87</span>
<span class="linenos"> 88</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 89</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 90</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 91</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 92</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 93</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 94</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 95</span>
<span class="linenos"> 96</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 97</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 98</span>
<span class="linenos"> 99</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">100</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">101</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">102</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">103</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">104</span>
<span class="linenos">105</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">106</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">108</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
<span class="linenos">109</span>
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">114</span>
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">117</span>
<span class="linenos">118</span>
<span class="linenos">119</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">120</span> <span class="c1"># Disclaimer: MetaWorld environments require the seed to be specified in the beginning.</span>
<span class="linenos">121</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.</span>
<span class="linenos">122</span>
<span class="linenos">123</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
<span class="linenos">124</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
<span class="linenos">125</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">126</span>
<span class="linenos">127</span> <span class="c1"># # Standard Meta world tasks</span>
<span class="linenos">128</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">129</span>
<span class="linenos">130</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
<span class="linenos">131</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld_ProMP/ButtonPress-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span> <span class="c1">#</span>
<span class="linenos">133</span> <span class="c1"># # Custom MetaWorld task</span>
<span class="linenos">134</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">110</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">111</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">112</span>
<span class="linenos">113</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="linenos">114</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
<span class="linenos">115</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
<span class="linenos">116</span>
<span class="linenos">117</span> <span class="c1"># # Standard Meta world tasks</span>
<span class="linenos">118</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">119</span>
<span class="linenos">120</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
<span class="linenos">121</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld_ProMP/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">122</span> <span class="c1">#</span>
<span class="linenos">123</span> <span class="c1"># # Custom MetaWorld task</span>
<span class="linenos">124</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">125</span>
<span class="linenos">126</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">127</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Movement Primitives Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>Movement Primitives Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -135,252 +135,253 @@
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 27</span>
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">1</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 30</span>
<span class="linenos"> 31</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
<span class="linenos"> 32</span> <span class="c1"># such as a ProMP</span>
<span class="linenos"> 33</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 34</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
<span class="linenos"> 35</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The &#39;reward&#39; is equal</span>
<span class="linenos"> 36</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
<span class="linenos"> 37</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 38</span> <span class="c1"># Aggregated returns</span>
<span class="linenos"> 39</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 40</span>
<span class="linenos"> 41</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 42</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 43</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 44</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 45</span>
<span class="linenos"> 46</span>
<span class="linenos"> 47</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 48</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 49</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 50</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 51</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 52</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos"> 53</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos"> 54</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 55</span><span class="sd"> Args:</span>
<span class="linenos"> 56</span><span class="sd"> seed: seed</span>
<span class="linenos"> 57</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 58</span><span class="sd"> render: Render the episode</span>
<span class="linenos"> 59</span>
<span class="linenos"> 60</span><span class="sd"> Returns:</span>
<span class="linenos"> 29</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 30</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 31</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 32</span>
<span class="linenos"> 33</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
<span class="linenos"> 34</span> <span class="c1"># such as a ProMP</span>
<span class="linenos"> 35</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 36</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
<span class="linenos"> 37</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The &#39;reward&#39; is equal</span>
<span class="linenos"> 38</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
<span class="linenos"> 39</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 40</span> <span class="c1"># Aggregated returns</span>
<span class="linenos"> 41</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 42</span>
<span class="linenos"> 43</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 44</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 45</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 46</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 47</span>
<span class="linenos"> 48</span>
<span class="linenos"> 49</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 50</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 51</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 52</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 53</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 54</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos"> 55</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos"> 56</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 57</span><span class="sd"> Args:</span>
<span class="linenos"> 58</span><span class="sd"> seed: seed</span>
<span class="linenos"> 59</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 60</span><span class="sd"> render: Render the episode</span>
<span class="linenos"> 61</span>
<span class="linenos"> 62</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 63</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
<span class="linenos"> 64</span> <span class="c1"># E.g. here for way to many basis functions</span>
<span class="linenos"> 65</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 66</span>
<span class="linenos"> 67</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 68</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 69</span>
<span class="linenos"> 70</span> <span class="c1"># This time rendering every trajectory</span>
<span class="linenos"> 71</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 72</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 73</span>
<span class="linenos"> 74</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos"> 75</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 76</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 77</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 78</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 79</span>
<span class="linenos"> 80</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 81</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 82</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 83</span>
<span class="linenos"> 84</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 85</span> <span class="k">return</span> <span class="n">obs</span>
<span class="linenos"> 86</span>
<span class="linenos"> 87</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
<span class="linenos"> 88</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
<span class="linenos"> 89</span> <span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 90</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 91</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos"> 92</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos"> 93</span> <span class="p">},</span>
<span class="linenos"> 94</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 95</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos"> 96</span> <span class="p">},</span>
<span class="linenos"> 97</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 98</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos"> 99</span> <span class="p">},</span>
<span class="linenos">100</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">101</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">102</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">103</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">104</span> <span class="p">}</span>
<span class="linenos">105</span> <span class="p">},</span>
<span class="linenos">106</span> <span class="s1">&#39;DMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">107</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">108</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">,</span>
<span class="linenos">109</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">500</span>
<span class="linenos">110</span> <span class="p">},</span>
<span class="linenos">111</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">112</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos">113</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mf">2.5</span>
<span class="linenos">114</span> <span class="p">},</span>
<span class="linenos">115</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">116</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">117</span> <span class="p">},</span>
<span class="linenos">118</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">119</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos">120</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos">121</span> <span class="p">}</span>
<span class="linenos">122</span> <span class="p">}</span>
<span class="linenos">123</span> <span class="p">}</span>
<span class="linenos">124</span>
<span class="linenos">125</span>
<span class="linenos">126</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">127</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">128</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos">129</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos">130</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos">131</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos">132</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos">133</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos">134</span><span class="sd"> Args:</span>
<span class="linenos">135</span><span class="sd"> seed: seed</span>
<span class="linenos">136</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">137</span><span class="sd"> render: Render the episode</span>
<span class="linenos">138</span>
<span class="linenos">139</span><span class="sd"> Returns:</span>
<span class="linenos"> 62</span><span class="sd"> Returns:</span>
<span class="linenos"> 63</span>
<span class="linenos"> 64</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 65</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
<span class="linenos"> 66</span> <span class="c1"># E.g. here for way to many basis functions</span>
<span class="linenos"> 67</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 68</span>
<span class="linenos"> 69</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 70</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 71</span>
<span class="linenos"> 72</span> <span class="c1"># This time rendering every trajectory</span>
<span class="linenos"> 73</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 74</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 75</span>
<span class="linenos"> 76</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos"> 77</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 78</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 79</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 80</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 81</span>
<span class="linenos"> 82</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 83</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 84</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 85</span>
<span class="linenos"> 86</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 87</span> <span class="k">return</span> <span class="n">obs</span>
<span class="linenos"> 88</span>
<span class="linenos"> 89</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
<span class="linenos"> 90</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
<span class="linenos"> 91</span> <span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 92</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 93</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos"> 94</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos"> 95</span> <span class="p">},</span>
<span class="linenos"> 96</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 97</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos"> 98</span> <span class="p">},</span>
<span class="linenos"> 99</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">100</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">101</span> <span class="p">},</span>
<span class="linenos">102</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">103</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">104</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">105</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">106</span> <span class="p">}</span>
<span class="linenos">107</span> <span class="p">},</span>
<span class="linenos">108</span> <span class="s1">&#39;DMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">109</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">110</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">,</span>
<span class="linenos">111</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">500</span>
<span class="linenos">112</span> <span class="p">},</span>
<span class="linenos">113</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">114</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos">115</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mf">2.5</span>
<span class="linenos">116</span> <span class="p">},</span>
<span class="linenos">117</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">118</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">119</span> <span class="p">},</span>
<span class="linenos">120</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">121</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos">122</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos">123</span> <span class="p">}</span>
<span class="linenos">124</span> <span class="p">}</span>
<span class="linenos">125</span> <span class="p">}</span>
<span class="linenos">126</span>
<span class="linenos">127</span>
<span class="linenos">128</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">129</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">130</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos">131</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos">132</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos">133</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos">134</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos">135</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos">136</span><span class="sd"> Args:</span>
<span class="linenos">137</span><span class="sd"> seed: seed</span>
<span class="linenos">138</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">139</span><span class="sd"> render: Render the episode</span>
<span class="linenos">140</span>
<span class="linenos">141</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">141</span><span class="sd"> Returns:</span>
<span class="linenos">142</span>
<span class="linenos">143</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">144</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">145</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_DMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">146</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">147</span>
<span class="linenos">148</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">,</span> <span class="s1">&#39;DMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
<span class="linenos">143</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">144</span>
<span class="linenos">145</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">146</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">147</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_DMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">148</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">149</span>
<span class="linenos">150</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">150</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">,</span> <span class="s1">&#39;DMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
<span class="linenos">151</span>
<span class="linenos">152</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">153</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">154</span>
<span class="linenos">155</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">156</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">157</span>
<span class="linenos">158</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">159</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">160</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">161</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">162</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">163</span>
<span class="linenos">164</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">165</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">166</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">167</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">168</span>
<span class="linenos">169</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">170</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">171</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">172</span> <span class="k">pass</span>
<span class="linenos">173</span>
<span class="linenos">174</span>
<span class="linenos">175</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">176</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">177</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
<span class="linenos">178</span><span class="sd"> Args:</span>
<span class="linenos">179</span><span class="sd"> seed: seed</span>
<span class="linenos">180</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">181</span><span class="sd"> render: Render the episode</span>
<span class="linenos">182</span>
<span class="linenos">183</span><span class="sd"> Returns:</span>
<span class="linenos">152</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">153</span>
<span class="linenos">154</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">155</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">156</span>
<span class="linenos">157</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">158</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">159</span>
<span class="linenos">160</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">161</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">162</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">163</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">164</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">165</span>
<span class="linenos">166</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">167</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">168</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">169</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">170</span>
<span class="linenos">171</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">172</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">173</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">174</span> <span class="k">pass</span>
<span class="linenos">175</span>
<span class="linenos">176</span>
<span class="linenos">177</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">178</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">179</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
<span class="linenos">180</span><span class="sd"> Args:</span>
<span class="linenos">181</span><span class="sd"> seed: seed</span>
<span class="linenos">182</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">183</span><span class="sd"> render: Render the episode</span>
<span class="linenos">184</span>
<span class="linenos">185</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">185</span><span class="sd"> Returns:</span>
<span class="linenos">186</span>
<span class="linenos">187</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">188</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">189</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">190</span>
<span class="linenos">191</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">192</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">193</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos">194</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos">195</span> <span class="p">},</span>
<span class="linenos">196</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">197</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos">198</span> <span class="p">},</span>
<span class="linenos">199</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">200</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">201</span> <span class="p">},</span>
<span class="linenos">202</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">203</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">204</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">205</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">206</span> <span class="p">}</span>
<span class="linenos">207</span> <span class="p">}})</span>
<span class="linenos">208</span>
<span class="linenos">209</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">187</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">188</span>
<span class="linenos">189</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">190</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">191</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">192</span>
<span class="linenos">193</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">194</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">195</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos">196</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos">197</span> <span class="p">},</span>
<span class="linenos">198</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">199</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos">200</span> <span class="p">},</span>
<span class="linenos">201</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">202</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">203</span> <span class="p">},</span>
<span class="linenos">204</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">205</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">206</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">207</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">208</span> <span class="p">}</span>
<span class="linenos">209</span> <span class="p">}})</span>
<span class="linenos">210</span>
<span class="linenos">211</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">212</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">213</span>
<span class="linenos">214</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">215</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">216</span>
<span class="linenos">217</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">218</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">219</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">220</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">221</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">222</span>
<span class="linenos">223</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">224</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">225</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">226</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">227</span>
<span class="linenos">228</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">229</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">230</span>
<span class="linenos">231</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">232</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">233</span>
<span class="linenos">234</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">235</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">236</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">237</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">238</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">239</span>
<span class="linenos">240</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">241</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">242</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">243</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">244</span>
<span class="linenos">245</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">246</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">247</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">248</span> <span class="k">pass</span>
<span class="linenos">249</span>
<span class="linenos">250</span>
<span class="linenos">251</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
<span class="linenos">252</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">253</span> <span class="c1"># DMP</span>
<span class="linenos">254</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_DMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">255</span>
<span class="linenos">256</span> <span class="c1"># ProMP</span>
<span class="linenos">257</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/BoxPushingTemporalSparse-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/TableTennis4D-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">260</span>
<span class="linenos">261</span> <span class="c1"># ProDMP with Replanning</span>
<span class="linenos">262</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennis4DReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennisWindReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">265</span>
<span class="linenos">266</span> <span class="c1"># Altered basis functions</span>
<span class="linenos">267</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">268</span>
<span class="linenos">269</span> <span class="c1"># Custom MP</span>
<span class="linenos">270</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">271</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">272</span>
<span class="linenos">273</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">274</span> <span class="n">main</span><span class="p">()</span>
<span class="linenos">211</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">212</span>
<span class="linenos">213</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">214</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">215</span>
<span class="linenos">216</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">217</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">218</span>
<span class="linenos">219</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">220</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">221</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">222</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">223</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">224</span>
<span class="linenos">225</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">226</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">227</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">228</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">229</span>
<span class="linenos">230</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">231</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">232</span>
<span class="linenos">233</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">234</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">235</span>
<span class="linenos">236</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">237</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">238</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">239</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">240</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">241</span>
<span class="linenos">242</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">243</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">244</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">245</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">246</span>
<span class="linenos">247</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">248</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">249</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">250</span> <span class="k">pass</span>
<span class="linenos">251</span>
<span class="linenos">252</span>
<span class="linenos">253</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="linenos">254</span> <span class="c1"># DMP</span>
<span class="linenos">255</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_DMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">256</span>
<span class="linenos">257</span> <span class="c1"># ProMP</span>
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/BoxPushingTemporalSparse-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">260</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/TableTennis4D-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">261</span>
<span class="linenos">262</span> <span class="c1"># ProDMP with Replanning</span>
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennis4DReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">265</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennisWindReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">266</span>
<span class="linenos">267</span> <span class="c1"># Altered basis functions</span>
<span class="linenos">268</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">269</span>
<span class="linenos">270</span> <span class="c1"># Custom MP</span>
<span class="linenos">271</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">272</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">273</span>
<span class="linenos">274</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">275</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MP Params Tuning Example &mdash; Fancy Gym 0.2 documentation</title>
<title>MP Params Tuning Example &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>OpenAI Envs Examples &mdash; Fancy Gym 0.2 documentation</title>
<title>OpenAI Envs Examples &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -122,27 +122,27 @@
<span class="linenos">13</span><span class="sd"> Returns:</span>
<span class="linenos">14</span>
<span class="linenos">15</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">17</span>
<span class="linenos">18</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">19</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">20</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">21</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
<span class="linenos">22</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">24</span> <span class="k">else</span><span class="p">:</span>
<span class="linenos">25</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">26</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">27</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">28</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">29</span>
<span class="linenos">30</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">31</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
<span class="linenos">32</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">33</span>
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">24</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">25</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">26</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">27</span>
<span class="linenos">28</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">29</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
<span class="linenos">30</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">31</span>
<span class="linenos">32</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">33</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;gym_ProMP/Reacher-v2&quot;</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">34</span>
<span class="linenos">35</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">36</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;gym_ProMP/Reacher-v2&quot;</span><span class="p">)</span>
<span class="linenos">36</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>PD Control Gain Tuning Example &mdash; Fancy Gym 0.2 documentation</title>
<title>PD Control Gain Tuning Example &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Replanning Example &mdash; Fancy Gym 0.2 documentation</title>
<title>Replanning Example &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -112,24 +112,24 @@
<span class="linenos"> 3</span>
<span class="linenos"> 4</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 7</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 8</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 9</span> <span class="n">done</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">10</span> <span class="k">while</span> <span class="n">done</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
<span class="linenos">11</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">12</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">13</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">14</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">15</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">16</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 9</span> <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="linenos">10</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">11</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">12</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">13</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">14</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">15</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">16</span> <span class="k">break</span>
<span class="linenos">17</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">18</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">19</span>
<span class="linenos">20</span>
<span class="linenos">21</span><span class="k">def</span> <span class="nf">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">22</span> <span class="c1"># id for a step-based environment</span>
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;BoxPushingDense-v0&quot;</span>
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/BoxPushingDense-v0&quot;</span>
<span class="linenos">24</span>
<span class="linenos">25</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">box_pushing</span><span class="o">.</span><span class="n">mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos">26</span>
@ -147,31 +147,34 @@
<span class="linenos">38</span> <span class="s1">&#39;replanning_schedule&#39;</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">pos</span><span class="p">,</span> <span class="n">vel</span><span class="p">,</span> <span class="n">obs</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="n">t</span> <span class="o">%</span> <span class="mi">25</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span>
<span class="linenos">39</span> <span class="s1">&#39;condition_on_desired&#39;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span>
<span class="linenos">40</span>
<span class="linenos">41</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
<span class="linenos">42</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos">43</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos">44</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">45</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">46</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">47</span>
<span class="linenos">48</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">49</span>
<span class="linenos">50</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
<span class="linenos">51</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">52</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">53</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">54</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">55</span>
<span class="linenos">56</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">57</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">58</span>
<span class="linenos">41</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">42</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
<span class="linenos">43</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos">44</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos">45</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">46</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">47</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">48</span>
<span class="linenos">49</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">50</span>
<span class="linenos">51</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
<span class="linenos">52</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">53</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">54</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">55</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">56</span>
<span class="linenos">57</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">58</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">59</span>
<span class="linenos">60</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="linenos">60</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="linenos">61</span> <span class="c1"># run a registered replanning environment</span>
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">63</span>
<span class="linenos">64</span> <span class="c1"># run a custom replanning environment</span>
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">66</span>
<span class="linenos">67</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="linenos">68</span> <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.envs &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.envs &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -39,7 +39,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.register &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.register &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.upgrade &mdash; Fancy Gym 0.2 documentation</title>
<title>fancy_gym.upgrade &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -40,7 +40,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Index &mdash; Fancy Gym 0.2 documentation</title>
<title>Index &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Basic Usage &mdash; Fancy Gym 0.2 documentation</title>
<title>Basic Usage &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>What is Episodic RL? &mdash; Fancy Gym 0.2 documentation</title>
<title>What is Episodic RL? &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Installation &mdash; Fancy Gym 0.2 documentation</title>
<title>Installation &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -135,7 +135,7 @@ pip<span class="w"> </span>install<span class="w"> </span><span class="s1">&#39;
</div>
<p>Pip can not automatically install up-to-date versions of metaworld,
since they are not avaible on PyPI yet. Install metaworld via</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
</pre></div>
</div>
</section>
@ -169,7 +169,7 @@ pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </s
</pre></div>
</div>
<p>Metaworld has to be installed manually with</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
</pre></div>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Creating new MP Environments &mdash; Fancy Gym 0.2 documentation</title>
<title>Creating new MP Environments &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Fancy Gym &mdash; Fancy Gym 0.2 documentation</title>
<title>Fancy Gym &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -40,7 +40,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

Binary file not shown.

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Python Module Index &mdash; Fancy Gym 0.2 documentation</title>
<title>Python Module Index &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -3,7 +3,7 @@
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Search &mdash; Fancy Gym 0.2 documentation</title>
<title>Search &mdash; Fancy Gym 0.3.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/>
</a>
<div class="version">
0.2
0.3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="#" method="get">

File diff suppressed because one or more lines are too long

View File

@ -1,13 +1,17 @@
# This conf.py is in large parts inspired by the oen used by stable-baselines 3
import toml
import datetime
project = 'Fancy Gym'
author = 'Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou'
copyright = f'2020-{datetime.date.today().year}, {author}'
release = '0.2' # The full version, including alpha/beta/rc tags
version = '0.2' # The short X.Y version
pyproject_content = toml.load("../../pyproject.toml")
proj_version = pyproject_content["project"]["version"]
release = proj_version # The full version, including alpha/beta/rc tags
version = proj_version # The short X.Y version
extensions = [
'myst_parser',
@ -50,4 +54,4 @@ html_context = {
}
def setup(app):
app.add_css_file("style.css")
app.add_css_file("style.css")

View File

@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
.. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
Installation from master
~~~~~~~~~~~~~~~~~~~~~~~~
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
.. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld

View File

@ -115,6 +115,7 @@ class AntJumpEnv(AntEnvCustomXML):
contact_force_range=contact_force_range,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs)
self.render_active = False
def step(self, action):
self.current_step += 1
@ -153,8 +154,15 @@ class AntJumpEnv(AntEnvCustomXML):
}
truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return obs, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)

View File

@ -44,6 +44,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self)
self._steps = 0
# Small Context -> Easier. Todo: Should we do different versions?
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "beerpong_wo_cup.xml")
@ -89,7 +90,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
observation_space=self.observation_space,
**kwargs
)
utils.EzPickle.__init__(self)
self.render_active = False
@property
def start_pos(self):
@ -169,8 +170,15 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return ob, reward, terminated, truncated, infos
def render(self):
self.render_active = True
return super().render()
def _get_obs(self):
theta = self.data.qpos.flat[:7].copy()
theta_dot = self.data.qvel.flat[:7].copy()

View File

@ -4,6 +4,7 @@ import numpy as np
from gymnasium import utils, spaces
from gymnasium.envs.mujoco import MujocoEnv
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import calculate_jerk_profile, calculate_mean_squared_jerk, calculate_dimensionless_jerk, calculate_maximum_jerk
@ -62,6 +63,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
frame_skip=self.frame_skip,
observation_space=self.observation_space, **kwargs)
self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
self.render_active = False
def step(self, action):
action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
@ -116,8 +118,15 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
terminated = episode_end and infos['is_success']
truncated = episode_end and not infos['is_success']
if self.render_active and self.render_mode=='human':
self.render()
return obs, reward, terminated, truncated, infos
def render(self):
self.render_active = True
return super().render()
def calculate_smoothness_metrics(self, velocity_profile, dt):
"""
Calculates the smoothness metrics for the given velocity profile.

View File

@ -60,7 +60,11 @@ class HalfCheetahEnvCustomXML(HalfCheetahEnv):
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
self.render_active = False
def render(self):
self.render_active = True
return super().render()
class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
"""
@ -120,6 +124,9 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
'max_height': self.max_height
}
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info
def _get_obs(self):

View File

@ -88,6 +88,12 @@ class HopperEnvCustomXML(HopperEnv):
**kwargs,
)
self.render_active = False
def render(self):
self.render_active = True
return super().render()
class HopperJumpEnv(HopperEnvCustomXML):
"""
@ -201,6 +207,10 @@ class HopperJumpEnv(HopperEnvCustomXML):
healthy=self.is_healthy,
contact_dist=self.contact_dist or 0
)
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info
def _get_obs(self):

View File

@ -140,6 +140,9 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML):
truncated = self.current_step >= self.max_episode_steps and not terminated
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info
def _get_obs(self):

View File

@ -61,6 +61,8 @@ class HopperThrowEnv(HopperEnvCustomXML):
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
**kwargs)
self.render_active = False
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
@ -94,8 +96,15 @@ class HopperThrowEnv(HopperEnvCustomXML):
}
truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)

View File

@ -68,6 +68,7 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
**kwargs)
self.render_active = False
def step(self, action):
@ -118,8 +119,15 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
}
truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self):
return np.append(super()._get_obs(), self.basket_x)

View File

@ -47,6 +47,8 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
**kwargs
)
self.render_active = False
def step(self, action):
self._steps += 1
@ -77,8 +79,15 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
goal=self.goal if hasattr(self, "goal") else None
)
if self.render_active and self.render_mode=='human':
self.render()
return ob, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def distance_reward(self):
vec = self.get_body_com("fingertip") - self.get_body_com("target")
return -self._reward_weight * np.linalg.norm(vec)

View File

@ -83,6 +83,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
observation_space=self.observation_space,
**kwargs)
self.render_active = False
if ctxt_dim == 2:
self.context_bounds = CONTEXT_BOUNDS_2DIMS
elif ctxt_dim == 4:
@ -170,8 +172,15 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS
if self.render_active and self.render_mode=='human':
self.render()
return self._get_obs(), reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _contact_checker(self, id_1, id_2):
for coni in range(0, self.data.ncon):
con = self.data.contact[coni]

View File

@ -79,6 +79,8 @@ class Walker2dEnvCustomXML(Walker2dEnv):
**kwargs,
)
self.render_active = False
class Walker2dJumpEnv(Walker2dEnvCustomXML):
"""
@ -145,8 +147,15 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML):
}
truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self):
return np.append(super()._get_obs(), self.goal)

View File

@ -3,14 +3,14 @@ import fancy_gym
def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False):
env = gym.make(env_name)
env = gym.make(env_name, render_mode='human' if render else None)
env.reset(seed=seed)
for i in range(iterations):
while True:
ac = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(ac)
if render:
env.render(mode="human")
env.render()
if terminated or truncated:
env.reset()
break
@ -38,13 +38,13 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True):
'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0,
'condition_on_desired': True}
base_env = gym.make(base_env_id)
base_env = gym.make(base_env_id, render_mode='human' if render else None)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs,
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
if render:
env.render(mode="human")
env.render()
obs = env.reset()

View File

@ -17,7 +17,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T
Returns:
"""
env = gym.make(env_id)
env = gym.make(env_id, render_mode='human' if render else None)
rewards = 0
obs = env.reset(seed=seed)
print("observation shape:", env.observation_space.shape)
@ -26,7 +26,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T
for i in range(iterations):
ac = env.action_space.sample()
if render:
env.render(mode="human")
env.render()
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
@ -84,7 +84,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
# 'num_basis': 5
# }
base_env = gym.make(base_env_id)
base_env = gym.make(base_env_id, render_mode='human' if render else None)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
@ -96,7 +96,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed.
if render:
env.render(mode="human")
env.render()
rewards = 0
obs = env.reset()
@ -115,7 +115,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
env.close()
del env
def main(render = True):
def main(render = False):
# # Standard DMC Suite tasks
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
#

View File

@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
"""
env = gym.make(env_id)
env = gym.make(env_id, render_mode='human' if render else None)
rewards = 0
obs = env.reset(seed=seed)
print("Observation shape: ", env.observation_space.shape)
@ -85,7 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16),
# do not return values above threshold
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
def main(render = True):
def main(render = False):
# Basic gym task
example_general("Pendulum-v1", seed=10, iterations=200, render=render)

View File

@ -2,7 +2,7 @@ import gymnasium as gym
import fancy_gym
def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, render=True):
"""
Example for running a MetaWorld based env in the step based setting.
The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
@ -18,7 +18,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
Returns:
"""
env = gym.make(env_id)
env = gym.make(env_id, render_mode='human' if render else None)
rewards = 0
obs = env.reset(seed=seed)
print("observation shape:", env.observation_space.shape)
@ -27,9 +27,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
for i in range(iterations):
ac = env.action_space.sample()
if render:
# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
# TODO: Remove this, when Metaworld fixes its interface.
env.render(False)
env.render()
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if terminated or truncated:
@ -81,7 +79,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
basis_generator_kwargs = {'basis_generator_type': 'rbf',
'num_basis': 5
}
base_env = gym.make(base_env_id)
base_env = gym.make(base_env_id, render_mode='human' if render else None)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
@ -93,7 +91,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
# It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed.
if render:
env.render(mode="human")
env.render()
rewards = 0
obs = env.reset(seed=seed)

View File

@ -13,15 +13,13 @@ def example_mp(env_name, seed=1, render=True):
Returns:
"""
env = gym.make(env_name)
env = gym.make(env_name, render_mode='human' if render else None)
returns = 0
obs = env.reset(seed=seed)
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
if render and i % 2 == 0:
env.render(mode="human")
else:
env.render()
ac = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(ac)

View File

@ -1,6 +1,6 @@
[project]
name = "fancy_gym"
version = "0.1.4"
version = "0.3.0"
description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches."
readme = "README.md"
authors = [
@ -26,6 +26,7 @@ classifiers = [
]
dependencies = [
"toml",
"mp_pytorch<=0.1.3",
"mujoco==2.3.3",
"gymnasium[mujoco]>=0.26.0"

View File

@ -1,5 +1,6 @@
# We still provide a setup.py for backwards compatability.
# But the pyproject.toml should be prefered.
import toml
import itertools
from pathlib import Path
from typing import List
@ -8,6 +9,9 @@ from setuptools import setup, find_packages
print('[!] You are currently installing/building fancy_gym via setup.py. This is only provided for backwards-compatability. Please use the pyproject.toml instead.')
pyproject_content = toml.load("pyproject.toml")
project_version = pyproject_content["project"]["version"]
# Environment-specific dependencies for dmc and metaworld
extras = {
'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'],
@ -38,7 +42,7 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]:
setup(
author='Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou',
name='fancy_gym',
version='0.1.0',
version=project_version,
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Science/Research',
@ -55,6 +59,7 @@ setup(
],
extras_require=extras,
install_requires=[
'toml',
'mp_pytorch<=0.1.3',
'mujoco==2.3.3',
'gymnasium[mujoco]>=0.26.0'