Merge branch 'master' into bruce_port_envs
This commit is contained in:
commit
a33640abf6
26
.github/workflows/ensure-release-tagged.yaml
vendored
26
.github/workflows/ensure-release-tagged.yaml
vendored
@ -1,26 +0,0 @@
|
||||
name: Ensure Tagged Commits on Release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- release
|
||||
|
||||
jobs:
|
||||
check_tag:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check if base commit of PR is tagged
|
||||
run: |
|
||||
BASE_COMMIT=$(jq -r .pull_request.base.sha < "$GITHUB_EVENT_PATH")
|
||||
TAG=$(git tag --contains $BASE_COMMIT)
|
||||
if [ -z "$TAG" ]; then
|
||||
echo "Base commit of PR is not tagged. PRs onto release must be tagged with the version number."
|
||||
exit 1
|
||||
fi
|
||||
echo "Base commit of PR is tagged. Check passed."
|
||||
|
52
.github/workflows/ensure-version-consistency.yaml
vendored
Normal file
52
.github/workflows/ensure-version-consistency.yaml
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
name: Ensure Version Consistency on PR to Release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- release
|
||||
|
||||
jobs:
|
||||
check_version_and_tag:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: true # Terminate the job immediately if any step fails
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Necessary to fetch all tags for comparison
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install toml
|
||||
|
||||
- name: Extract version from pyproject.toml
|
||||
run: |
|
||||
echo "Extracting version from pyproject.toml"
|
||||
VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])')
|
||||
echo "Version in pyproject.toml is $VERSION"
|
||||
echo "VERSION=$VERSION" >> $GITHUB_ENV
|
||||
|
||||
- name: Get tag for the PR's head commit
|
||||
run: |
|
||||
PR_HEAD_SHA=$(jq -r .pull_request.head.sha < "$GITHUB_EVENT_PATH")
|
||||
TAG=$(git tag --contains $PR_HEAD_SHA)
|
||||
echo "Tag on PR's head commit is $TAG"
|
||||
echo "TAG=$TAG" >> $GITHUB_ENV
|
||||
|
||||
- name: Compare version and tag
|
||||
run: |
|
||||
if [ -z "$TAG" ]; then
|
||||
echo "Head commit of PR is not tagged. Ensure the head commit of PRs onto release is tagged with the version number."
|
||||
exit 1
|
||||
elif [ "$VERSION" != "$TAG" ]; then
|
||||
echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG)."
|
||||
exit 1
|
||||
else
|
||||
echo "Version and git tag match. Check passed."
|
||||
fi
|
24
.github/workflows/publish-to-pypi.yml
vendored
24
.github/workflows/publish-to-pypi.yml
vendored
@ -8,6 +8,8 @@ on:
|
||||
jobs:
|
||||
publish:
|
||||
name: Publish to PyPI
|
||||
strategy:
|
||||
fail-fast: true # Terminate the job immediately if any step fails
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out code
|
||||
@ -15,19 +17,24 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0 # This fetches all history for all branches and tags
|
||||
|
||||
- name: Check if commit is tagged
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.x"
|
||||
|
||||
- name: Validate version against tag
|
||||
run: |
|
||||
VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])')
|
||||
TAG=$(git tag --contains HEAD)
|
||||
if [ -z "$TAG" ]; then
|
||||
echo "Commit is not tagged. Failing the workflow."
|
||||
exit 1
|
||||
fi
|
||||
echo "Commit is tagged. Proceeding with the workflow."
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.x"
|
||||
if [ "$VERSION" != "$TAG" ]; then
|
||||
echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG). Failing the workflow."
|
||||
exit 1
|
||||
fi
|
||||
echo "Version and commit tag match. Proceeding with the workflow."
|
||||
|
||||
- name: Install pypa/build/setuptools/twine
|
||||
run: >-
|
||||
@ -36,9 +43,6 @@ jobs:
|
||||
build setuptools twine
|
||||
--user
|
||||
|
||||
- name: Prevent fallback onto setup.py
|
||||
run: rm setup.py
|
||||
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
|
||||
|
16
README.md
16
README.md
@ -10,25 +10,25 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org) (a mainta
|
||||
|
||||
**Key Features**:
|
||||
|
||||
- **New Challenging Environments**: `fancy_gym` includes several new environments ([Panda Box Pushing](https://dominik-roth.eu/fancy/envs/fancy/mujoco.html#box-pushing), [Table Tennis](https://dominik-roth.eu/fancy/envs/fancy/mujoco.html#table-tennis), [etc.](https://dominik-roth.eu/fancy/envs/fancy/index.html)) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research.
|
||||
- **New Challenging Environments**: `fancy_gym` includes several new environments ([Panda Box Pushing](https://alrhub.github.io/fancy_gym/envs/fancy/mujoco.html#box-pushing), [Table Tennis](https://alrhub.github.io/fancy_gym/envs/fancy/mujoco.html#table-tennis), [etc.](https://alrhub.github.io/fancy_gym/envs/fancy/index.html)) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research.
|
||||
- **Support for Movement Primitives**: `fancy_gym` supports a range of movement primitives (MPs), including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP).
|
||||
- **Upgrade to Movement Primitives**: With our framework, it’s straightforward to transform standard Gymnasium environments into environments that support movement primitives.
|
||||
- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](dominik-roth.eu/fancy/envs/dmc.html)
|
||||
and [Metaworld](https://dominik-roth.eu/fancy/envs/meta.html), whether you want to use them in the regular step-based setting or using MPs.
|
||||
- **Contribute Your Own Environments**: If you’re inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://dominik-roth.eu/fancy/guide/upgrading_envs.html) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`.
|
||||
- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://alrhub.github.io/fancy_gym/envs/dmc.html)
|
||||
and [Metaworld](https://alrhub.github.io/fancy_gym/envs/meta.html), whether you want to use them in the regular step-based setting or using MPs.
|
||||
- **Contribute Your Own Environments**: If you’re inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://alrhub.github.io/fancy_gym/guide/upgrading_envs.html) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`.
|
||||
|
||||
## Quickstart Guide
|
||||
|
||||
| ⚠ We recommend installing `fancy_gym` into a virtual environment as provided by [venv](https://docs.python.org/3/library/venv.html), [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/). |
|
||||
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
|
||||
Install via pip [or use an alternative installation method](https://dominik-roth.eu/fancy/guide/installation.html)
|
||||
Install via pip [or use an alternative installation method](https://alrhub.github.io/fancy_gym/guide/installation.html)
|
||||
|
||||
```bash
|
||||
pip install 'fancy_gym[all]'
|
||||
```
|
||||
|
||||
Try out one of our step-based environments [or explore our other envs](https://dominik-roth.eu/fancy/envs/fancy/index.html)
|
||||
Try out one of our step-based environments [or explore our other envs](https://alrhub.github.io/fancy_gym/envs/fancy/index.html)
|
||||
|
||||
```python
|
||||
import gymnasium as gym
|
||||
@ -48,7 +48,7 @@ Try out one of our step-based environments [or explore our other envs](https://d
|
||||
observation, info = env.reset()
|
||||
```
|
||||
|
||||
Explore the MP-based variant [or learn more about Movement Primitives (MPs)](https://dominik-roth.eu/fancy/guide/episodic_rl.html)
|
||||
Explore the MP-based variant [or learn more about Movement Primitives (MPs)](https://alrhub.github.io/fancy_gym/guide/episodic_rl.html)
|
||||
|
||||
```python
|
||||
import gymnasium as gym
|
||||
@ -66,7 +66,7 @@ Explore the MP-based variant [or learn more about Movement Primitives (MPs)](htt
|
||||
|
||||
## Documentation
|
||||
|
||||
Documentation for `fancy_gym` can be found [here](https://dominik-roth.eu/fancy); Usage Examples can be found [here](https://dominik-roth.eu/fancy/examples/general.html).
|
||||
Documentation for `fancy_gym` can be found [here](https://alrhub.github.io/fancy_gym/); Usage Examples can be found [here](https://alrhub.github.io/fancy_gym/examples/general.html).
|
||||
|
||||
## Citing the Project
|
||||
|
||||
|
BIN
docs/build/doctrees/environment.pickle
vendored
BIN
docs/build/doctrees/environment.pickle
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/dmc.doctree
vendored
BIN
docs/build/doctrees/examples/dmc.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/general.doctree
vendored
BIN
docs/build/doctrees/examples/general.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/metaworld.doctree
vendored
BIN
docs/build/doctrees/examples/metaworld.doctree
vendored
Binary file not shown.
Binary file not shown.
BIN
docs/build/doctrees/examples/open_ai.doctree
vendored
BIN
docs/build/doctrees/examples/open_ai.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/examples/replanning_envs.doctree
vendored
BIN
docs/build/doctrees/examples/replanning_envs.doctree
vendored
Binary file not shown.
BIN
docs/build/doctrees/guide/installation.doctree
vendored
BIN
docs/build/doctrees/guide/installation.doctree
vendored
Binary file not shown.
2
docs/build/html/.buildinfo
vendored
2
docs/build/html/.buildinfo
vendored
@ -1,4 +1,4 @@
|
||||
# Sphinx build info version 1
|
||||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
||||
config: 28ec069496fc0ad05c8b9641549626a6
|
||||
config: 36919d67c12a677d3f16f60d980b0313
|
||||
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
||||
|
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.envs.registry — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.envs.registry — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../../_static/style.css" type="text/css" />
|
||||
@ -38,7 +38,7 @@
|
||||
<img src="../../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
|
4
docs/build/html/_modules/index.html
vendored
4
docs/build/html/_modules/index.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Overview: module code — Fancy Gym 0.2 documentation</title>
|
||||
<title>Overview: module code — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -38,7 +38,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
|
||||
|
||||
.. code:: bash
|
||||
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
|
||||
|
||||
Installation from master
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
|
||||
|
||||
.. code:: bash
|
||||
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
|
||||
|
@ -1,6 +1,6 @@
|
||||
var DOCUMENTATION_OPTIONS = {
|
||||
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
|
||||
VERSION: '0.2',
|
||||
VERSION: '0.3.0',
|
||||
LANGUAGE: 'en',
|
||||
COLLAPSE_INDEX: false,
|
||||
BUILDER: 'html',
|
||||
|
4
docs/build/html/api.html
vendored
4
docs/build/html/api.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>API — Fancy Gym 0.2 documentation</title>
|
||||
<title>API — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
4
docs/build/html/envs/dmc.html
vendored
4
docs/build/html/envs/dmc.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DeepMind Control (DMC) — Fancy Gym 0.2 documentation</title>
|
||||
<title>DeepMind Control (DMC) — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/envs/fancy/airhockey.html
vendored
4
docs/build/html/envs/fancy/airhockey.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AirHockey — Fancy Gym 0.2 documentation</title>
|
||||
<title>AirHockey — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Classic Control — Fancy Gym 0.2 documentation</title>
|
||||
<title>Classic Control — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
4
docs/build/html/envs/fancy/index.html
vendored
4
docs/build/html/envs/fancy/index.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Fancy — Fancy Gym 0.2 documentation</title>
|
||||
<title>Fancy — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
4
docs/build/html/envs/fancy/mujoco.html
vendored
4
docs/build/html/envs/fancy/mujoco.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Mujoco — Fancy Gym 0.2 documentation</title>
|
||||
<title>Mujoco — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
|
4
docs/build/html/envs/meta.html
vendored
4
docs/build/html/envs/meta.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Metaworld — Fancy Gym 0.2 documentation</title>
|
||||
<title>Metaworld — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/envs/open_ai.html
vendored
4
docs/build/html/envs/open_ai.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Gymnasium — Fancy Gym 0.2 documentation</title>
|
||||
<title>Gymnasium — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
120
docs/build/html/examples/dmc.html
vendored
120
docs/build/html/examples/dmc.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DeepMind Control Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>DeepMind Control Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -126,7 +126,7 @@
|
||||
<span class="linenos"> 17</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 18</span>
|
||||
<span class="linenos"> 19</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
|
||||
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 21</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 22</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 23</span> <span class="nb">print</span><span class="p">(</span><span class="s2">"observation shape:"</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
|
||||
@ -135,7 +135,7 @@
|
||||
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 27</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 30</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 31</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 32</span>
|
||||
@ -193,58 +193,68 @@
|
||||
<span class="linenos"> 84</span> <span class="c1"># basis_generator_kwargs = {'basis_generator_type': 'rbf',</span>
|
||||
<span class="linenos"> 85</span> <span class="c1"># 'num_basis': 5</span>
|
||||
<span class="linenos"> 86</span> <span class="c1"># }</span>
|
||||
<span class="linenos"> 87</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 88</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 89</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 90</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 91</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 93</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 94</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 96</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 97</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 98</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos"> 99</span>
|
||||
<span class="linenos">100</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">101</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">102</span>
|
||||
<span class="linenos">103</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">104</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">105</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">106</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">108</span>
|
||||
<span class="linenos">109</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">110</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">111</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">112</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">113</span>
|
||||
<span class="linenos">114</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">115</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">116</span>
|
||||
<span class="linenos"> 87</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 88</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 89</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 90</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 91</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 92</span>
|
||||
<span class="linenos"> 93</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 94</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 96</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 97</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 98</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 99</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">100</span>
|
||||
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">103</span>
|
||||
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">109</span>
|
||||
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">114</span>
|
||||
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">117</span>
|
||||
<span class="linenos">118</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">119</span> <span class="c1"># Disclaimer: DMC environments require the seed to be specified in the beginning.</span>
|
||||
<span class="linenos">120</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.</span>
|
||||
<span class="linenos">121</span>
|
||||
<span class="linenos">122</span> <span class="c1"># For rendering DMC</span>
|
||||
<span class="linenos">123</span> <span class="c1"># export MUJOCO_GL="osmesa"</span>
|
||||
<span class="linenos">124</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="linenos">125</span>
|
||||
<span class="linenos">126</span> <span class="c1"># # Standard DMC Suite tasks</span>
|
||||
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">128</span> <span class="c1">#</span>
|
||||
<span class="linenos">129</span> <span class="c1"># # Manipulation tasks</span>
|
||||
<span class="linenos">130</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
|
||||
<span class="linenos">131</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/manipulation-reach_site_features"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">132</span> <span class="c1">#</span>
|
||||
<span class="linenos">133</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
|
||||
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control_ProMP/ball_in_cup-catch-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">135</span>
|
||||
<span class="linenos">136</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
|
||||
<span class="linenos">137</span> <span class="c1"># already registered above</span>
|
||||
<span class="linenos">138</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">118</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">119</span> <span class="c1"># # Standard DMC Suite tasks</span>
|
||||
<span class="linenos">120</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">121</span> <span class="c1">#</span>
|
||||
<span class="linenos">122</span> <span class="c1"># # Manipulation tasks</span>
|
||||
<span class="linenos">123</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
|
||||
<span class="linenos">124</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/reach_site_features"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">125</span> <span class="c1">#</span>
|
||||
<span class="linenos">126</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
|
||||
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control_ProMP/ball_in_cup-catch-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">128</span>
|
||||
<span class="linenos">129</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
|
||||
<span class="linenos">130</span> <span class="c1"># already registered above</span>
|
||||
<span class="linenos">131</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">132</span>
|
||||
<span class="linenos">133</span> <span class="c1"># # Standard DMC Suite tasks</span>
|
||||
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">135</span> <span class="c1">#</span>
|
||||
<span class="linenos">136</span> <span class="c1"># # Manipulation tasks</span>
|
||||
<span class="linenos">137</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
|
||||
<span class="linenos">138</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control/reach_site_features"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">139</span> <span class="c1">#</span>
|
||||
<span class="linenos">140</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
|
||||
<span class="linenos">141</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">"dm_control_ProMP/ball_in_cup-catch-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">142</span>
|
||||
<span class="linenos">143</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
|
||||
<span class="linenos">144</span> <span class="c1"># already registered above</span>
|
||||
<span class="linenos">145</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">146</span>
|
||||
<span class="linenos">147</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">148</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
28
docs/build/html/examples/general.html
vendored
28
docs/build/html/examples/general.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>General Usage Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>General Usage Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -130,7 +130,7 @@
|
||||
<span class="linenos"> 21</span>
|
||||
<span class="linenos"> 22</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 23</span>
|
||||
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
|
||||
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 25</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 26</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 27</span> <span class="nb">print</span><span class="p">(</span><span class="s2">"Observation shape: "</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
|
||||
@ -194,21 +194,21 @@
|
||||
<span class="linenos"> 85</span> <span class="c1"># do not return values above threshold</span>
|
||||
<span class="linenos"> 86</span> <span class="k">return</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">v</span><span class="p">)[:</span><span class="n">n_samples</span><span class="p">],</span> <span class="n">buffer</span><span class="o">.</span><span class="n">values</span><span class="p">()),</span>
|
||||
<span class="linenos"> 87</span>
|
||||
<span class="linenos"> 88</span>
|
||||
<span class="linenos"> 89</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos"> 90</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="linenos"> 88</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos"> 89</span> <span class="c1"># Basic gym task</span>
|
||||
<span class="linenos"> 90</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"Pendulum-v1"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 91</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># Basic gym task</span>
|
||||
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"Pendulum-v1"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># Mujoco task from framework</span>
|
||||
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"fancy/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 94</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># Mujoco task from framework</span>
|
||||
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"fancy/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 95</span> <span class="c1"># # OpenAI Mujoco task</span>
|
||||
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"HalfCheetah-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 97</span>
|
||||
<span class="linenos"> 98</span> <span class="c1"># # OpenAI Mujoco task</span>
|
||||
<span class="linenos"> 99</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">"HalfCheetah-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos"> 98</span> <span class="c1"># Vectorized multiprocessing environments</span>
|
||||
<span class="linenos"> 99</span> <span class="c1"># example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)</span>
|
||||
<span class="linenos">100</span>
|
||||
<span class="linenos">101</span> <span class="c1"># Vectorized multiprocessing environments</span>
|
||||
<span class="linenos">102</span> <span class="c1"># example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)</span>
|
||||
<span class="linenos">101</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">102</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
205
docs/build/html/examples/metaworld.html
vendored
205
docs/build/html/examples/metaworld.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Metaworld Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>Metaworld Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -111,7 +111,7 @@
|
||||
<span class="linenos"> 2</span><span class="kn">import</span> <span class="nn">fancy_gym</span>
|
||||
<span class="linenos"> 3</span>
|
||||
<span class="linenos"> 4</span>
|
||||
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">"fish-swim"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">"metaworld/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 6</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 7</span><span class="sd"> Example for running a MetaWorld based env in the step based setting.</span>
|
||||
<span class="linenos"> 8</span><span class="sd"> The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always</span>
|
||||
@ -127,7 +127,7 @@
|
||||
<span class="linenos"> 18</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 19</span>
|
||||
<span class="linenos"> 20</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
|
||||
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 22</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 23</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 24</span> <span class="nb">print</span><span class="p">(</span><span class="s2">"observation shape:"</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
|
||||
@ -136,111 +136,104 @@
|
||||
<span class="linenos"> 27</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 28</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 29</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 30</span> <span class="c1"># THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM</span>
|
||||
<span class="linenos"> 31</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
|
||||
<span class="linenos"> 32</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="linenos"> 33</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 34</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 35</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 36</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos"> 37</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 38</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 39</span>
|
||||
<span class="linenos"> 40</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 41</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos"> 42</span>
|
||||
<span class="linenos"> 43</span>
|
||||
<span class="linenos"> 44</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 45</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 46</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 47</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 48</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 49</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 50</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 54</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 30</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 31</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 32</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 33</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 34</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos"> 35</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 36</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="linenos"> 37</span>
|
||||
<span class="linenos"> 38</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 39</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos"> 40</span>
|
||||
<span class="linenos"> 41</span>
|
||||
<span class="linenos"> 42</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 43</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 44</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 45</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 46</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 47</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 48</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
|
||||
<span class="linenos"> 49</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 50</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
|
||||
<span class="linenos"> 54</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 56</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 58</span>
|
||||
<span class="linenos"> 59</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 60</span>
|
||||
<span class="linenos"> 61</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
|
||||
<span class="linenos"> 62</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"metaworld/button-press-v2"</span>
|
||||
<span class="linenos"> 63</span>
|
||||
<span class="linenos"> 64</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
|
||||
<span class="linenos"> 65</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
|
||||
<span class="linenos"> 66</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
|
||||
<span class="linenos"> 67</span> <span class="c1"># # For a ProMP</span>
|
||||
<span class="linenos"> 68</span> <span class="c1"># trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}</span>
|
||||
<span class="linenos"> 69</span> <span class="c1"># phase_generator_kwargs = {'phase_generator_type': 'linear'}</span>
|
||||
<span class="linenos"> 70</span> <span class="c1"># controller_kwargs = {'controller_type': 'metaworld'}</span>
|
||||
<span class="linenos"> 71</span> <span class="c1"># basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',</span>
|
||||
<span class="linenos"> 72</span> <span class="c1"># 'num_basis': 5,</span>
|
||||
<span class="linenos"> 73</span> <span class="c1"># 'num_basis_zero_start': 1</span>
|
||||
<span class="linenos"> 74</span> <span class="c1"># }</span>
|
||||
<span class="linenos"> 75</span>
|
||||
<span class="linenos"> 76</span> <span class="c1"># For a DMP</span>
|
||||
<span class="linenos"> 77</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">}</span>
|
||||
<span class="linenos"> 78</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 79</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
|
||||
<span class="linenos"> 80</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'metaworld'</span><span class="p">}</span>
|
||||
<span class="linenos"> 81</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos"> 82</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos"> 83</span> <span class="p">}</span>
|
||||
<span class="linenos"> 84</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 85</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 86</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 87</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 88</span>
|
||||
<span class="linenos"> 89</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 90</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 91</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 93</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 94</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 95</span> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"Metaworld render interface bug does not allow to render() fixes its interface. "</span>
|
||||
<span class="linenos"> 96</span> <span class="s2">"A temporary workaround is to alter their code in MujocoEnv render() from "</span>
|
||||
<span class="linenos"> 97</span> <span class="s2">"`if not offscreen` to `if not offscreen or offscreen == 'human'`."</span><span class="p">)</span>
|
||||
<span class="linenos"> 98</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
|
||||
<span class="linenos"> 99</span> <span class="c1"># env.render(mode="human")</span>
|
||||
<span class="linenos">100</span>
|
||||
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">103</span>
|
||||
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 59</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
|
||||
<span class="linenos"> 60</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"metaworld/button-press-v2"</span>
|
||||
<span class="linenos"> 61</span>
|
||||
<span class="linenos"> 62</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
|
||||
<span class="linenos"> 63</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
|
||||
<span class="linenos"> 64</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
|
||||
<span class="linenos"> 65</span> <span class="c1"># # For a ProMP</span>
|
||||
<span class="linenos"> 66</span> <span class="c1"># trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}</span>
|
||||
<span class="linenos"> 67</span> <span class="c1"># phase_generator_kwargs = {'phase_generator_type': 'linear'}</span>
|
||||
<span class="linenos"> 68</span> <span class="c1"># controller_kwargs = {'controller_type': 'metaworld'}</span>
|
||||
<span class="linenos"> 69</span> <span class="c1"># basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',</span>
|
||||
<span class="linenos"> 70</span> <span class="c1"># 'num_basis': 5,</span>
|
||||
<span class="linenos"> 71</span> <span class="c1"># 'num_basis_zero_start': 1</span>
|
||||
<span class="linenos"> 72</span> <span class="c1"># }</span>
|
||||
<span class="linenos"> 73</span>
|
||||
<span class="linenos"> 74</span> <span class="c1"># For a DMP</span>
|
||||
<span class="linenos"> 75</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">}</span>
|
||||
<span class="linenos"> 76</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 77</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
|
||||
<span class="linenos"> 78</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'metaworld'</span><span class="p">}</span>
|
||||
<span class="linenos"> 79</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos"> 80</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos"> 81</span> <span class="p">}</span>
|
||||
<span class="linenos"> 82</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 83</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
|
||||
<span class="linenos"> 84</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 85</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos"> 86</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 87</span>
|
||||
<span class="linenos"> 88</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 89</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 90</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
|
||||
<span class="linenos"> 91</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
|
||||
<span class="linenos"> 92</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
|
||||
<span class="linenos"> 93</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 94</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 95</span>
|
||||
<span class="linenos"> 96</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 97</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 98</span>
|
||||
<span class="linenos"> 99</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">100</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">101</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">102</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">103</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">104</span>
|
||||
<span class="linenos">105</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">106</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">108</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="linenos">109</span>
|
||||
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">114</span>
|
||||
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">117</span>
|
||||
<span class="linenos">118</span>
|
||||
<span class="linenos">119</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">120</span> <span class="c1"># Disclaimer: MetaWorld environments require the seed to be specified in the beginning.</span>
|
||||
<span class="linenos">121</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.</span>
|
||||
<span class="linenos">122</span>
|
||||
<span class="linenos">123</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
|
||||
<span class="linenos">124</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
|
||||
<span class="linenos">125</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="linenos">126</span>
|
||||
<span class="linenos">127</span> <span class="c1"># # Standard Meta world tasks</span>
|
||||
<span class="linenos">128</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">129</span>
|
||||
<span class="linenos">130</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
|
||||
<span class="linenos">131</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld_ProMP/ButtonPress-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">132</span> <span class="c1">#</span>
|
||||
<span class="linenos">133</span> <span class="c1"># # Custom MetaWorld task</span>
|
||||
<span class="linenos">134</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">110</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">111</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">112</span>
|
||||
<span class="linenos">113</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">114</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
|
||||
<span class="linenos">115</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
|
||||
<span class="linenos">116</span>
|
||||
<span class="linenos">117</span> <span class="c1"># # Standard Meta world tasks</span>
|
||||
<span class="linenos">118</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">119</span>
|
||||
<span class="linenos">120</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
|
||||
<span class="linenos">121</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">"metaworld_ProMP/button-press-v2"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">122</span> <span class="c1">#</span>
|
||||
<span class="linenos">123</span> <span class="c1"># # Custom MetaWorld task</span>
|
||||
<span class="linenos">124</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">125</span>
|
||||
<span class="linenos">126</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">127</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
479
docs/build/html/examples/movement_primitives.html
vendored
479
docs/build/html/examples/movement_primitives.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Movement Primitives Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>Movement Primitives Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -135,252 +135,253 @@
|
||||
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 27</span>
|
||||
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">1</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 30</span>
|
||||
<span class="linenos"> 31</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
|
||||
<span class="linenos"> 32</span> <span class="c1"># such as a ProMP</span>
|
||||
<span class="linenos"> 33</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 34</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
|
||||
<span class="linenos"> 35</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal</span>
|
||||
<span class="linenos"> 36</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
|
||||
<span class="linenos"> 37</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 38</span> <span class="c1"># Aggregated returns</span>
|
||||
<span class="linenos"> 39</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 40</span>
|
||||
<span class="linenos"> 41</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 42</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 43</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 44</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 45</span>
|
||||
<span class="linenos"> 46</span>
|
||||
<span class="linenos"> 47</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 48</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 49</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 50</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos"> 54</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 56</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 58</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos"> 59</span>
|
||||
<span class="linenos"> 60</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 29</span> <span class="c1"># This renders the full MP trajectory</span>
|
||||
<span class="linenos"> 30</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
|
||||
<span class="linenos"> 31</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 32</span>
|
||||
<span class="linenos"> 33</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
|
||||
<span class="linenos"> 34</span> <span class="c1"># such as a ProMP</span>
|
||||
<span class="linenos"> 35</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 36</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
|
||||
<span class="linenos"> 37</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal</span>
|
||||
<span class="linenos"> 38</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
|
||||
<span class="linenos"> 39</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 40</span> <span class="c1"># Aggregated returns</span>
|
||||
<span class="linenos"> 41</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 42</span>
|
||||
<span class="linenos"> 43</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 44</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 45</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 46</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 47</span>
|
||||
<span class="linenos"> 48</span>
|
||||
<span class="linenos"> 49</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos"> 50</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos"> 51</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos"> 52</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos"> 53</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos"> 54</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos"> 55</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos"> 56</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos"> 57</span><span class="sd"> Args:</span>
|
||||
<span class="linenos"> 58</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos"> 59</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos"> 60</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos"> 61</span>
|
||||
<span class="linenos"> 62</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 63</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
|
||||
<span class="linenos"> 64</span> <span class="c1"># E.g. here for way to many basis functions</span>
|
||||
<span class="linenos"> 65</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 66</span>
|
||||
<span class="linenos"> 67</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 68</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 69</span>
|
||||
<span class="linenos"> 70</span> <span class="c1"># This time rendering every trajectory</span>
|
||||
<span class="linenos"> 71</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 72</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 73</span>
|
||||
<span class="linenos"> 74</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos"> 75</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 76</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 77</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 78</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 79</span>
|
||||
<span class="linenos"> 80</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 81</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 82</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 83</span>
|
||||
<span class="linenos"> 84</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 85</span> <span class="k">return</span> <span class="n">obs</span>
|
||||
<span class="linenos"> 86</span>
|
||||
<span class="linenos"> 87</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
|
||||
<span class="linenos"> 88</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="linenos"> 89</span> <span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 90</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 91</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 92</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos"> 93</span> <span class="p">},</span>
|
||||
<span class="linenos"> 94</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 95</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos"> 96</span> <span class="p">},</span>
|
||||
<span class="linenos"> 97</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 98</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos"> 99</span> <span class="p">},</span>
|
||||
<span class="linenos">100</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">101</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">102</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">103</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">104</span> <span class="p">}</span>
|
||||
<span class="linenos">105</span> <span class="p">},</span>
|
||||
<span class="linenos">106</span> <span class="s1">'DMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">107</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">108</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">,</span>
|
||||
<span class="linenos">109</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">500</span>
|
||||
<span class="linenos">110</span> <span class="p">},</span>
|
||||
<span class="linenos">111</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">112</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos">113</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mf">2.5</span>
|
||||
<span class="linenos">114</span> <span class="p">},</span>
|
||||
<span class="linenos">115</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">116</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">117</span> <span class="p">},</span>
|
||||
<span class="linenos">118</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">119</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">120</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos">121</span> <span class="p">}</span>
|
||||
<span class="linenos">122</span> <span class="p">}</span>
|
||||
<span class="linenos"> 62</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos"> 63</span>
|
||||
<span class="linenos"> 64</span><span class="sd"> """</span>
|
||||
<span class="linenos"> 65</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
|
||||
<span class="linenos"> 66</span> <span class="c1"># E.g. here for way to many basis functions</span>
|
||||
<span class="linenos"> 67</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 68</span>
|
||||
<span class="linenos"> 69</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos"> 70</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 71</span>
|
||||
<span class="linenos"> 72</span> <span class="c1"># This time rendering every trajectory</span>
|
||||
<span class="linenos"> 73</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos"> 74</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos"> 75</span>
|
||||
<span class="linenos"> 76</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos"> 77</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 78</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos"> 79</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos"> 80</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos"> 81</span>
|
||||
<span class="linenos"> 82</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos"> 83</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
|
||||
<span class="linenos"> 84</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 85</span>
|
||||
<span class="linenos"> 86</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos"> 87</span> <span class="k">return</span> <span class="n">obs</span>
|
||||
<span class="linenos"> 88</span>
|
||||
<span class="linenos"> 89</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
|
||||
<span class="linenos"> 90</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="linenos"> 91</span> <span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 92</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 93</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos"> 94</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos"> 95</span> <span class="p">},</span>
|
||||
<span class="linenos"> 96</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos"> 97</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos"> 98</span> <span class="p">},</span>
|
||||
<span class="linenos"> 99</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">100</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">101</span> <span class="p">},</span>
|
||||
<span class="linenos">102</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">103</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">104</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">105</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">106</span> <span class="p">}</span>
|
||||
<span class="linenos">107</span> <span class="p">},</span>
|
||||
<span class="linenos">108</span> <span class="s1">'DMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">109</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">110</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'dmp'</span><span class="p">,</span>
|
||||
<span class="linenos">111</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">500</span>
|
||||
<span class="linenos">112</span> <span class="p">},</span>
|
||||
<span class="linenos">113</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">114</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'exp'</span><span class="p">,</span>
|
||||
<span class="linenos">115</span> <span class="s1">'alpha_phase'</span><span class="p">:</span> <span class="mf">2.5</span>
|
||||
<span class="linenos">116</span> <span class="p">},</span>
|
||||
<span class="linenos">117</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">118</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">119</span> <span class="p">},</span>
|
||||
<span class="linenos">120</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">121</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">122</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span>
|
||||
<span class="linenos">123</span> <span class="p">}</span>
|
||||
<span class="linenos">124</span>
|
||||
<span class="linenos">125</span>
|
||||
<span class="linenos">126</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">127</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">128</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos">129</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos">130</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos">131</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos">132</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos">133</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos">134</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">135</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">136</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">137</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">138</span>
|
||||
<span class="linenos">139</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">124</span> <span class="p">}</span>
|
||||
<span class="linenos">125</span> <span class="p">}</span>
|
||||
<span class="linenos">126</span>
|
||||
<span class="linenos">127</span>
|
||||
<span class="linenos">128</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">129</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">130</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
|
||||
<span class="linenos">131</span><span class="sd"> Our already registered environments follow the same structure.</span>
|
||||
<span class="linenos">132</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
|
||||
<span class="linenos">133</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
|
||||
<span class="linenos">134</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
|
||||
<span class="linenos">135</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
|
||||
<span class="linenos">136</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">137</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">138</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">139</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">140</span>
|
||||
<span class="linenos">141</span><span class="sd"> """</span>
|
||||
<span class="linenos">141</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">142</span>
|
||||
<span class="linenos">143</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">144</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">145</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">"fancy_DMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">146</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">147</span>
|
||||
<span class="linenos">148</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">,</span> <span class="s1">'DMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
|
||||
<span class="linenos">143</span><span class="sd"> """</span>
|
||||
<span class="linenos">144</span>
|
||||
<span class="linenos">145</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">146</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">147</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">"fancy_DMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">148</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">149</span>
|
||||
<span class="linenos">150</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">150</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">,</span> <span class="s1">'DMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
|
||||
<span class="linenos">151</span>
|
||||
<span class="linenos">152</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">153</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">154</span>
|
||||
<span class="linenos">155</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">156</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">157</span>
|
||||
<span class="linenos">158</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">159</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">160</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">161</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">162</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">163</span>
|
||||
<span class="linenos">164</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">165</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">166</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">167</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">168</span>
|
||||
<span class="linenos">169</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">170</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">171</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">172</span> <span class="k">pass</span>
|
||||
<span class="linenos">173</span>
|
||||
<span class="linenos">174</span>
|
||||
<span class="linenos">175</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">176</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">177</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
|
||||
<span class="linenos">178</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">179</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">180</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">181</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">182</span>
|
||||
<span class="linenos">183</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">152</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">153</span>
|
||||
<span class="linenos">154</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">155</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">156</span>
|
||||
<span class="linenos">157</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">158</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">159</span>
|
||||
<span class="linenos">160</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">161</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">162</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">163</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">164</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">165</span>
|
||||
<span class="linenos">166</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">167</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">168</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">169</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">170</span>
|
||||
<span class="linenos">171</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">172</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">173</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">174</span> <span class="k">pass</span>
|
||||
<span class="linenos">175</span>
|
||||
<span class="linenos">176</span>
|
||||
<span class="linenos">177</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">178</span><span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="linenos">179</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
|
||||
<span class="linenos">180</span><span class="sd"> Args:</span>
|
||||
<span class="linenos">181</span><span class="sd"> seed: seed</span>
|
||||
<span class="linenos">182</span><span class="sd"> iterations: Number of rollout steps to run</span>
|
||||
<span class="linenos">183</span><span class="sd"> render: Render the episode</span>
|
||||
<span class="linenos">184</span>
|
||||
<span class="linenos">185</span><span class="sd"> """</span>
|
||||
<span class="linenos">185</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">186</span>
|
||||
<span class="linenos">187</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">188</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">189</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">190</span>
|
||||
<span class="linenos">191</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">192</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">193</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos">194</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos">195</span> <span class="p">},</span>
|
||||
<span class="linenos">196</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">197</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos">198</span> <span class="p">},</span>
|
||||
<span class="linenos">199</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">200</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">201</span> <span class="p">},</span>
|
||||
<span class="linenos">202</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">203</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">204</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">205</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">206</span> <span class="p">}</span>
|
||||
<span class="linenos">207</span> <span class="p">}})</span>
|
||||
<span class="linenos">208</span>
|
||||
<span class="linenos">209</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">187</span><span class="sd"> """</span>
|
||||
<span class="linenos">188</span>
|
||||
<span class="linenos">189</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-v0"</span>
|
||||
<span class="linenos">190</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">"fancy/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">191</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">"fancy_ProMP/Reacher5d-Custom-v0"</span>
|
||||
<span class="linenos">192</span>
|
||||
<span class="linenos">193</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">'ProMP'</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">'ProMP'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">194</span> <span class="s1">'trajectory_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">195</span> <span class="s1">'trajectory_generator_type'</span><span class="p">:</span> <span class="s1">'promp'</span><span class="p">,</span>
|
||||
<span class="linenos">196</span> <span class="s1">'weights_scale'</span><span class="p">:</span> <span class="mi">2</span>
|
||||
<span class="linenos">197</span> <span class="p">},</span>
|
||||
<span class="linenos">198</span> <span class="s1">'phase_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">199</span> <span class="s1">'phase_generator_type'</span><span class="p">:</span> <span class="s1">'linear'</span>
|
||||
<span class="linenos">200</span> <span class="p">},</span>
|
||||
<span class="linenos">201</span> <span class="s1">'controller_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">202</span> <span class="s1">'controller_type'</span><span class="p">:</span> <span class="s1">'velocity'</span>
|
||||
<span class="linenos">203</span> <span class="p">},</span>
|
||||
<span class="linenos">204</span> <span class="s1">'basis_generator_kwargs'</span><span class="p">:</span> <span class="p">{</span>
|
||||
<span class="linenos">205</span> <span class="s1">'basis_generator_type'</span><span class="p">:</span> <span class="s1">'zero_rbf'</span><span class="p">,</span>
|
||||
<span class="linenos">206</span> <span class="s1">'num_basis'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
|
||||
<span class="linenos">207</span> <span class="s1">'num_basis_zero_start'</span><span class="p">:</span> <span class="mi">1</span>
|
||||
<span class="linenos">208</span> <span class="p">}</span>
|
||||
<span class="linenos">209</span> <span class="p">}})</span>
|
||||
<span class="linenos">210</span>
|
||||
<span class="linenos">211</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">212</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">213</span>
|
||||
<span class="linenos">214</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">215</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">216</span>
|
||||
<span class="linenos">217</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">218</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">219</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">220</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">221</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">222</span>
|
||||
<span class="linenos">223</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">224</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">225</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">226</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">227</span>
|
||||
<span class="linenos">228</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">229</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">230</span>
|
||||
<span class="linenos">231</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">232</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">233</span>
|
||||
<span class="linenos">234</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">235</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">236</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">237</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">238</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">239</span>
|
||||
<span class="linenos">240</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">241</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">242</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">243</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">244</span>
|
||||
<span class="linenos">245</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">246</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">247</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">248</span> <span class="k">pass</span>
|
||||
<span class="linenos">249</span>
|
||||
<span class="linenos">250</span>
|
||||
<span class="linenos">251</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
|
||||
<span class="linenos">252</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="linenos">253</span> <span class="c1"># DMP</span>
|
||||
<span class="linenos">254</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_DMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">255</span>
|
||||
<span class="linenos">256</span> <span class="c1"># ProMP</span>
|
||||
<span class="linenos">257</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/BoxPushingTemporalSparse-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/TableTennis4D-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">260</span>
|
||||
<span class="linenos">261</span> <span class="c1"># ProDMP with Replanning</span>
|
||||
<span class="linenos">262</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennis4DReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennisWindReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">265</span>
|
||||
<span class="linenos">266</span> <span class="c1"># Altered basis functions</span>
|
||||
<span class="linenos">267</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">268</span>
|
||||
<span class="linenos">269</span> <span class="c1"># Custom MP</span>
|
||||
<span class="linenos">270</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">271</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">272</span>
|
||||
<span class="linenos">273</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">274</span> <span class="n">main</span><span class="p">()</span>
|
||||
<span class="linenos">211</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">212</span>
|
||||
<span class="linenos">213</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">214</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">215</span>
|
||||
<span class="linenos">216</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">217</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">218</span>
|
||||
<span class="linenos">219</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">220</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">221</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">222</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">223</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">224</span>
|
||||
<span class="linenos">225</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">226</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">227</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">228</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">229</span>
|
||||
<span class="linenos">230</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">231</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">232</span>
|
||||
<span class="linenos">233</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">234</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">235</span>
|
||||
<span class="linenos">236</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">237</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos">238</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">239</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">240</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">241</span>
|
||||
<span class="linenos">242</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">243</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
|
||||
<span class="linenos">244</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">245</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">246</span>
|
||||
<span class="linenos">247</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don't correlcty implement .close</span>
|
||||
<span class="linenos">248</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">249</span> <span class="k">except</span><span class="p">:</span>
|
||||
<span class="linenos">250</span> <span class="k">pass</span>
|
||||
<span class="linenos">251</span>
|
||||
<span class="linenos">252</span>
|
||||
<span class="linenos">253</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">254</span> <span class="c1"># DMP</span>
|
||||
<span class="linenos">255</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_DMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">256</span>
|
||||
<span class="linenos">257</span> <span class="c1"># ProMP</span>
|
||||
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/HoleReacher-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/BoxPushingTemporalSparse-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">260</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/TableTennis4D-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">261</span>
|
||||
<span class="linenos">262</span> <span class="c1"># ProDMP with Replanning</span>
|
||||
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennis4DReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">265</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"fancy_ProDMP/TableTennisWindReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">266</span>
|
||||
<span class="linenos">267</span> <span class="c1"># Altered basis functions</span>
|
||||
<span class="linenos">268</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">"fancy_ProMP/Reacher5d-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">269</span>
|
||||
<span class="linenos">270</span> <span class="c1"># Custom MP</span>
|
||||
<span class="linenos">271</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">272</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">273</span>
|
||||
<span class="linenos">274</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">275</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>MP Params Tuning Example — Fancy Gym 0.2 documentation</title>
|
||||
<title>MP Params Tuning Example — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
30
docs/build/html/examples/open_ai.html
vendored
30
docs/build/html/examples/open_ai.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>OpenAI Envs Examples — Fancy Gym 0.2 documentation</title>
|
||||
<title>OpenAI Envs Examples — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -122,27 +122,27 @@
|
||||
<span class="linenos">13</span><span class="sd"> Returns:</span>
|
||||
<span class="linenos">14</span>
|
||||
<span class="linenos">15</span><span class="sd"> """</span>
|
||||
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
|
||||
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">17</span>
|
||||
<span class="linenos">18</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="linenos">19</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos">20</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
|
||||
<span class="linenos">21</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
|
||||
<span class="linenos">22</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos">24</span> <span class="k">else</span><span class="p">:</span>
|
||||
<span class="linenos">25</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">26</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">27</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">28</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">29</span>
|
||||
<span class="linenos">30</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">31</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
|
||||
<span class="linenos">32</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">33</span>
|
||||
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">24</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">25</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">26</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
|
||||
<span class="linenos">27</span>
|
||||
<span class="linenos">28</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">29</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
|
||||
<span class="linenos">30</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">31</span>
|
||||
<span class="linenos">32</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">33</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"gym_ProMP/Reacher-v2"</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">34</span>
|
||||
<span class="linenos">35</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="linenos">36</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">"gym_ProMP/Reacher-v2"</span><span class="p">)</span>
|
||||
<span class="linenos">36</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>PD Control Gain Tuning Example — Fancy Gym 0.2 documentation</title>
|
||||
<title>PD Control Gain Tuning Example — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
69
docs/build/html/examples/replanning_envs.html
vendored
69
docs/build/html/examples/replanning_envs.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Replanning Example — Fancy Gym 0.2 documentation</title>
|
||||
<title>Replanning Example — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -112,24 +112,24 @@
|
||||
<span class="linenos"> 3</span>
|
||||
<span class="linenos"> 4</span>
|
||||
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
|
||||
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos"> 7</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos"> 8</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
|
||||
<span class="linenos"> 9</span> <span class="n">done</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="linenos">10</span> <span class="k">while</span> <span class="n">done</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
|
||||
<span class="linenos">11</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">12</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">13</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">14</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos">15</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">16</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos"> 9</span> <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||||
<span class="linenos">10</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">11</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">12</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">13</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">14</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">15</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">16</span> <span class="k">break</span>
|
||||
<span class="linenos">17</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">18</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">19</span>
|
||||
<span class="linenos">20</span>
|
||||
<span class="linenos">21</span><span class="k">def</span> <span class="nf">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="linenos">22</span> <span class="c1"># id for a step-based environment</span>
|
||||
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"BoxPushingDense-v0"</span>
|
||||
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">"fancy/BoxPushingDense-v0"</span>
|
||||
<span class="linenos">24</span>
|
||||
<span class="linenos">25</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">box_pushing</span><span class="o">.</span><span class="n">mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
|
||||
<span class="linenos">26</span>
|
||||
@ -147,31 +147,34 @@
|
||||
<span class="linenos">38</span> <span class="s1">'replanning_schedule'</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">pos</span><span class="p">,</span> <span class="n">vel</span><span class="p">,</span> <span class="n">obs</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="n">t</span> <span class="o">%</span> <span class="mi">25</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span>
|
||||
<span class="linenos">39</span> <span class="s1">'condition_on_desired'</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span>
|
||||
<span class="linenos">40</span>
|
||||
<span class="linenos">41</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">42</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">43</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">44</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos">45</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">46</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||||
<span class="linenos">47</span>
|
||||
<span class="linenos">48</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">49</span>
|
||||
<span class="linenos">50</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
|
||||
<span class="linenos">51</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">52</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">53</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">54</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">55</span>
|
||||
<span class="linenos">56</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">57</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">58</span>
|
||||
<span class="linenos">41</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">'human'</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="linenos">42</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">43</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">44</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
|
||||
<span class="linenos">45</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||||
<span class="linenos">46</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
|
||||
<span class="linenos">47</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
|
||||
<span class="linenos">48</span>
|
||||
<span class="linenos">49</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">50</span>
|
||||
<span class="linenos">51</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
|
||||
<span class="linenos">52</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||||
<span class="linenos">53</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
|
||||
<span class="linenos">54</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||||
<span class="linenos">55</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||||
<span class="linenos">56</span>
|
||||
<span class="linenos">57</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||||
<span class="linenos">58</span> <span class="k">del</span> <span class="n">env</span>
|
||||
<span class="linenos">59</span>
|
||||
<span class="linenos">60</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
||||
<span class="linenos">60</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="linenos">61</span> <span class="c1"># run a registered replanning environment</span>
|
||||
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">"fancy_ProDMP/BoxPushingDenseReplan-v0"</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">63</span>
|
||||
<span class="linenos">64</span> <span class="c1"># run a custom replanning environment</span>
|
||||
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
|
||||
<span class="linenos">66</span>
|
||||
<span class="linenos">67</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
||||
<span class="linenos">68</span> <span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.envs — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.envs — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -39,7 +39,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.register — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.register — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>fancy_gym.upgrade — Fancy Gym 0.2 documentation</title>
|
||||
<title>fancy_gym.upgrade — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -40,7 +40,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/genindex.html
vendored
4
docs/build/html/genindex.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Index — Fancy Gym 0.2 documentation</title>
|
||||
<title>Index — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -38,7 +38,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
4
docs/build/html/guide/basic_usage.html
vendored
4
docs/build/html/guide/basic_usage.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Basic Usage — Fancy Gym 0.2 documentation</title>
|
||||
<title>Basic Usage — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/guide/episodic_rl.html
vendored
4
docs/build/html/guide/episodic_rl.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>What is Episodic RL? — Fancy Gym 0.2 documentation</title>
|
||||
<title>What is Episodic RL? — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
8
docs/build/html/guide/installation.html
vendored
8
docs/build/html/guide/installation.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Installation — Fancy Gym 0.2 documentation</title>
|
||||
<title>Installation — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
@ -135,7 +135,7 @@ pip<span class="w"> </span>install<span class="w"> </span><span class="s1">'
|
||||
</div>
|
||||
<p>Pip can not automatically install up-to-date versions of metaworld,
|
||||
since they are not avaible on PyPI yet. Install metaworld via</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
@ -169,7 +169,7 @@ pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </s
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Metaworld has to be installed manually with</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
|
4
docs/build/html/guide/upgrading_envs.html
vendored
4
docs/build/html/guide/upgrading_envs.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Creating new MP Environments — Fancy Gym 0.2 documentation</title>
|
||||
<title>Creating new MP Environments — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="../_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
|
4
docs/build/html/index.html
vendored
4
docs/build/html/index.html
vendored
@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Fancy Gym — Fancy Gym 0.2 documentation</title>
|
||||
<title>Fancy Gym — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -40,7 +40,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
BIN
docs/build/html/objects.inv
vendored
BIN
docs/build/html/objects.inv
vendored
Binary file not shown.
4
docs/build/html/py-modindex.html
vendored
4
docs/build/html/py-modindex.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Python Module Index — Fancy Gym 0.2 documentation</title>
|
||||
<title>Python Module Index — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
|
4
docs/build/html/search.html
vendored
4
docs/build/html/search.html
vendored
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Search — Fancy Gym 0.2 documentation</title>
|
||||
<title>Search — Fancy Gym 0.3.0 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/style.css" type="text/css" />
|
||||
@ -41,7 +41,7 @@
|
||||
<img src="_static/icon.svg" class="logo" alt="Logo"/>
|
||||
</a>
|
||||
<div class="version">
|
||||
0.2
|
||||
0.3.0
|
||||
</div>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="#" method="get">
|
||||
|
2
docs/build/html/searchindex.js
vendored
2
docs/build/html/searchindex.js
vendored
File diff suppressed because one or more lines are too long
@ -1,13 +1,17 @@
|
||||
# This conf.py is in large parts inspired by the oen used by stable-baselines 3
|
||||
|
||||
import toml
|
||||
import datetime
|
||||
|
||||
project = 'Fancy Gym'
|
||||
author = 'Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou'
|
||||
copyright = f'2020-{datetime.date.today().year}, {author}'
|
||||
|
||||
release = '0.2' # The full version, including alpha/beta/rc tags
|
||||
version = '0.2' # The short X.Y version
|
||||
pyproject_content = toml.load("../../pyproject.toml")
|
||||
proj_version = pyproject_content["project"]["version"]
|
||||
|
||||
release = proj_version # The full version, including alpha/beta/rc tags
|
||||
version = proj_version # The short X.Y version
|
||||
|
||||
extensions = [
|
||||
'myst_parser',
|
||||
|
@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
|
||||
|
||||
.. code:: bash
|
||||
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
|
||||
|
||||
Installation from master
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
|
||||
|
||||
.. code:: bash
|
||||
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
|
||||
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
|
||||
|
@ -115,6 +115,7 @@ class AntJumpEnv(AntEnvCustomXML):
|
||||
contact_force_range=contact_force_range,
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs)
|
||||
self.render_active = False
|
||||
|
||||
def step(self, action):
|
||||
self.current_step += 1
|
||||
@ -153,8 +154,15 @@ class AntJumpEnv(AntEnvCustomXML):
|
||||
}
|
||||
truncated = False
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.append(super()._get_obs(), self.goal)
|
||||
|
||||
|
@ -44,6 +44,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
utils.EzPickle.__init__(self)
|
||||
self._steps = 0
|
||||
# Small Context -> Easier. Todo: Should we do different versions?
|
||||
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "beerpong_wo_cup.xml")
|
||||
@ -89,7 +90,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
observation_space=self.observation_space,
|
||||
**kwargs
|
||||
)
|
||||
utils.EzPickle.__init__(self)
|
||||
self.render_active = False
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
@ -169,8 +170,15 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
truncated = False
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return ob, reward, terminated, truncated, infos
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def _get_obs(self):
|
||||
theta = self.data.qpos.flat[:7].copy()
|
||||
theta_dot = self.data.qvel.flat[:7].copy()
|
||||
|
@ -4,6 +4,7 @@ import numpy as np
|
||||
from gymnasium import utils, spaces
|
||||
from gymnasium.envs.mujoco import MujocoEnv
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import calculate_jerk_profile, calculate_mean_squared_jerk, calculate_dimensionless_jerk, calculate_maximum_jerk
|
||||
@ -62,6 +63,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
|
||||
frame_skip=self.frame_skip,
|
||||
observation_space=self.observation_space, **kwargs)
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
|
||||
self.render_active = False
|
||||
|
||||
def step(self, action):
|
||||
action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
|
||||
@ -116,8 +118,15 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
|
||||
terminated = episode_end and infos['is_success']
|
||||
truncated = episode_end and not infos['is_success']
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return obs, reward, terminated, truncated, infos
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def calculate_smoothness_metrics(self, velocity_profile, dt):
|
||||
"""
|
||||
Calculates the smoothness metrics for the given velocity profile.
|
||||
|
@ -60,7 +60,11 @@ class HalfCheetahEnvCustomXML(HalfCheetahEnv):
|
||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||
**kwargs,
|
||||
)
|
||||
self.render_active = False
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
|
||||
"""
|
||||
@ -120,6 +124,9 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
|
||||
'max_height': self.max_height
|
||||
}
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@ -88,6 +88,12 @@ class HopperEnvCustomXML(HopperEnv):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self.render_active = False
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
|
||||
class HopperJumpEnv(HopperEnvCustomXML):
|
||||
"""
|
||||
@ -201,6 +207,10 @@ class HopperJumpEnv(HopperEnvCustomXML):
|
||||
healthy=self.is_healthy,
|
||||
contact_dist=self.contact_dist or 0
|
||||
)
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@ -140,6 +140,9 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML):
|
||||
|
||||
truncated = self.current_step >= self.max_episode_steps and not terminated
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@ -61,6 +61,8 @@ class HopperThrowEnv(HopperEnvCustomXML):
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
|
||||
self.render_active = False
|
||||
|
||||
def step(self, action):
|
||||
self.current_step += 1
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
@ -94,8 +96,15 @@ class HopperThrowEnv(HopperEnvCustomXML):
|
||||
}
|
||||
truncated = False
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.append(super()._get_obs(), self.goal)
|
||||
|
||||
|
@ -68,6 +68,7 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
self.render_active = False
|
||||
|
||||
def step(self, action):
|
||||
|
||||
@ -118,8 +119,15 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
|
||||
}
|
||||
truncated = False
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.append(super()._get_obs(), self.basket_x)
|
||||
|
||||
|
@ -47,6 +47,8 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
||||
**kwargs
|
||||
)
|
||||
|
||||
self.render_active = False
|
||||
|
||||
def step(self, action):
|
||||
self._steps += 1
|
||||
|
||||
@ -77,8 +79,15 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
||||
goal=self.goal if hasattr(self, "goal") else None
|
||||
)
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return ob, reward, terminated, truncated, info
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def distance_reward(self):
|
||||
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
||||
return -self._reward_weight * np.linalg.norm(vec)
|
||||
|
@ -83,6 +83,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
observation_space=self.observation_space,
|
||||
**kwargs)
|
||||
|
||||
self.render_active = False
|
||||
|
||||
if ctxt_dim == 2:
|
||||
self.context_bounds = CONTEXT_BOUNDS_2DIMS
|
||||
elif ctxt_dim == 4:
|
||||
@ -170,8 +172,15 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return self._get_obs(), reward, terminated, truncated, info
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def _contact_checker(self, id_1, id_2):
|
||||
for coni in range(0, self.data.ncon):
|
||||
con = self.data.contact[coni]
|
||||
|
@ -79,6 +79,8 @@ class Walker2dEnvCustomXML(Walker2dEnv):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self.render_active = False
|
||||
|
||||
|
||||
class Walker2dJumpEnv(Walker2dEnvCustomXML):
|
||||
"""
|
||||
@ -145,8 +147,15 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML):
|
||||
}
|
||||
truncated = False
|
||||
|
||||
if self.render_active and self.render_mode=='human':
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
||||
def render(self):
|
||||
self.render_active = True
|
||||
return super().render()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.append(super()._get_obs(), self.goal)
|
||||
|
||||
|
@ -3,14 +3,14 @@ import fancy_gym
|
||||
|
||||
|
||||
def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False):
|
||||
env = gym.make(env_name)
|
||||
env = gym.make(env_name, render_mode='human' if render else None)
|
||||
env.reset(seed=seed)
|
||||
for i in range(iterations):
|
||||
while True:
|
||||
ac = env.action_space.sample()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
if render:
|
||||
env.render(mode="human")
|
||||
env.render()
|
||||
if terminated or truncated:
|
||||
env.reset()
|
||||
break
|
||||
@ -38,13 +38,13 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True):
|
||||
'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0,
|
||||
'condition_on_desired': True}
|
||||
|
||||
base_env = gym.make(base_env_id)
|
||||
base_env = gym.make(base_env_id, render_mode='human' if render else None)
|
||||
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs,
|
||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||
seed=seed)
|
||||
if render:
|
||||
env.render(mode="human")
|
||||
env.render()
|
||||
|
||||
obs = env.reset()
|
||||
|
||||
|
@ -17,7 +17,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T
|
||||
Returns:
|
||||
|
||||
"""
|
||||
env = gym.make(env_id)
|
||||
env = gym.make(env_id, render_mode='human' if render else None)
|
||||
rewards = 0
|
||||
obs = env.reset(seed=seed)
|
||||
print("observation shape:", env.observation_space.shape)
|
||||
@ -26,7 +26,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T
|
||||
for i in range(iterations):
|
||||
ac = env.action_space.sample()
|
||||
if render:
|
||||
env.render(mode="human")
|
||||
env.render()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
rewards += reward
|
||||
|
||||
@ -84,7 +84,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
||||
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
||||
# 'num_basis': 5
|
||||
# }
|
||||
base_env = gym.make(base_env_id)
|
||||
base_env = gym.make(base_env_id, render_mode='human' if render else None)
|
||||
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
|
||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||
@ -96,7 +96,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
||||
# It is also possible to change them mode multiple times when
|
||||
# e.g. only every nth trajectory should be displayed.
|
||||
if render:
|
||||
env.render(mode="human")
|
||||
env.render()
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
@ -115,7 +115,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
||||
env.close()
|
||||
del env
|
||||
|
||||
def main(render = True):
|
||||
def main(render = False):
|
||||
# # Standard DMC Suite tasks
|
||||
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
|
||||
#
|
||||
|
@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
|
||||
|
||||
"""
|
||||
|
||||
env = gym.make(env_id)
|
||||
env = gym.make(env_id, render_mode='human' if render else None)
|
||||
rewards = 0
|
||||
obs = env.reset(seed=seed)
|
||||
print("Observation shape: ", env.observation_space.shape)
|
||||
@ -85,7 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16),
|
||||
# do not return values above threshold
|
||||
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
|
||||
|
||||
def main(render = True):
|
||||
def main(render = False):
|
||||
# Basic gym task
|
||||
example_general("Pendulum-v1", seed=10, iterations=200, render=render)
|
||||
|
||||
|
@ -2,7 +2,7 @@ import gymnasium as gym
|
||||
import fancy_gym
|
||||
|
||||
|
||||
def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
||||
def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, render=True):
|
||||
"""
|
||||
Example for running a MetaWorld based env in the step based setting.
|
||||
The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
|
||||
@ -18,7 +18,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
env = gym.make(env_id)
|
||||
env = gym.make(env_id, render_mode='human' if render else None)
|
||||
rewards = 0
|
||||
obs = env.reset(seed=seed)
|
||||
print("observation shape:", env.observation_space.shape)
|
||||
@ -27,9 +27,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
||||
for i in range(iterations):
|
||||
ac = env.action_space.sample()
|
||||
if render:
|
||||
# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
|
||||
# TODO: Remove this, when Metaworld fixes its interface.
|
||||
env.render(False)
|
||||
env.render()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
rewards += reward
|
||||
if terminated or truncated:
|
||||
@ -81,7 +79,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
|
||||
basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
||||
'num_basis': 5
|
||||
}
|
||||
base_env = gym.make(base_env_id)
|
||||
base_env = gym.make(base_env_id, render_mode='human' if render else None)
|
||||
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
|
||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||
@ -93,7 +91,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
|
||||
# It is also possible to change them mode multiple times when
|
||||
# e.g. only every nth trajectory should be displayed.
|
||||
if render:
|
||||
env.render(mode="human")
|
||||
env.render()
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset(seed=seed)
|
||||
|
@ -13,15 +13,13 @@ def example_mp(env_name, seed=1, render=True):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
env = gym.make(env_name)
|
||||
env = gym.make(env_name, render_mode='human' if render else None)
|
||||
|
||||
returns = 0
|
||||
obs = env.reset(seed=seed)
|
||||
# number of samples/full trajectories (multiple environment steps)
|
||||
for i in range(10):
|
||||
if render and i % 2 == 0:
|
||||
env.render(mode="human")
|
||||
else:
|
||||
env.render()
|
||||
ac = env.action_space.sample()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
|
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "fancy_gym"
|
||||
version = "0.1.4"
|
||||
version = "0.3.0"
|
||||
description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches."
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
@ -26,6 +26,7 @@ classifiers = [
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"toml",
|
||||
"mp_pytorch<=0.1.3",
|
||||
"mujoco==2.3.3",
|
||||
"gymnasium[mujoco]>=0.26.0"
|
||||
|
7
setup.py
7
setup.py
@ -1,5 +1,6 @@
|
||||
# We still provide a setup.py for backwards compatability.
|
||||
# But the pyproject.toml should be prefered.
|
||||
import toml
|
||||
import itertools
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
@ -8,6 +9,9 @@ from setuptools import setup, find_packages
|
||||
|
||||
print('[!] You are currently installing/building fancy_gym via setup.py. This is only provided for backwards-compatability. Please use the pyproject.toml instead.')
|
||||
|
||||
pyproject_content = toml.load("pyproject.toml")
|
||||
project_version = pyproject_content["project"]["version"]
|
||||
|
||||
# Environment-specific dependencies for dmc and metaworld
|
||||
extras = {
|
||||
'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'],
|
||||
@ -38,7 +42,7 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]:
|
||||
setup(
|
||||
author='Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou',
|
||||
name='fancy_gym',
|
||||
version='0.1.0',
|
||||
version=project_version,
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Science/Research',
|
||||
@ -55,6 +59,7 @@ setup(
|
||||
],
|
||||
extras_require=extras,
|
||||
install_requires=[
|
||||
'toml',
|
||||
'mp_pytorch<=0.1.3',
|
||||
'mujoco==2.3.3',
|
||||
'gymnasium[mujoco]>=0.26.0'
|
||||
|
Loading…
Reference in New Issue
Block a user