Compare commits

..

No commits in common. "master" and "fix_readme_links" have entirely different histories.

75 changed files with 682 additions and 1230 deletions

View File

@ -0,0 +1,26 @@
name: Ensure Tagged Commits on Release
on:
pull_request:
branches:
- release
jobs:
check_tag:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check if base commit of PR is tagged
run: |
BASE_COMMIT=$(jq -r .pull_request.base.sha < "$GITHUB_EVENT_PATH")
TAG=$(git tag --contains $BASE_COMMIT)
if [ -z "$TAG" ]; then
echo "Base commit of PR is not tagged. PRs onto release must be tagged with the version number."
exit 1
fi
echo "Base commit of PR is tagged. Check passed."

View File

@ -1,52 +0,0 @@
name: Ensure Version Consistency on PR to Release
on:
pull_request:
branches:
- release
jobs:
check_version_and_tag:
runs-on: ubuntu-latest
strategy:
fail-fast: true # Terminate the job immediately if any step fails
steps:
- name: Check out code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Necessary to fetch all tags for comparison
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install toml
- name: Extract version from pyproject.toml
run: |
echo "Extracting version from pyproject.toml"
VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])')
echo "Version in pyproject.toml is $VERSION"
echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Get tag for the PR's head commit
run: |
PR_HEAD_SHA=$(jq -r .pull_request.head.sha < "$GITHUB_EVENT_PATH")
TAG=$(git tag --contains $PR_HEAD_SHA)
echo "Tag on PR's head commit is $TAG"
echo "TAG=$TAG" >> $GITHUB_ENV
- name: Compare version and tag
run: |
if [ -z "$TAG" ]; then
echo "Head commit of PR is not tagged. Ensure the head commit of PRs onto release is tagged with the version number."
exit 1
elif [ "$VERSION" != "$TAG" ]; then
echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG)."
exit 1
else
echo "Version and git tag match. Check passed."
fi

View File

@ -1,40 +0,0 @@
name: Deploy static docs to Pages
on:
push:
branches: ["release"]
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read
pages: write
id-token: write
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
# Single deploy job since we're just deploying
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Pages
uses: actions/configure-pages@v4
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: 'docs/build/html'
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View File

@ -8,8 +8,6 @@ on:
jobs: jobs:
publish: publish:
name: Publish to PyPI name: Publish to PyPI
strategy:
fail-fast: true # Terminate the job immediately if any step fails
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Check out code - name: Check out code
@ -17,24 +15,19 @@ jobs:
with: with:
fetch-depth: 0 # This fetches all history for all branches and tags fetch-depth: 0 # This fetches all history for all branches and tags
- name: Set up Python - name: Check if commit is tagged
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Validate version against tag
run: | run: |
VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])')
TAG=$(git tag --contains HEAD) TAG=$(git tag --contains HEAD)
if [ -z "$TAG" ]; then if [ -z "$TAG" ]; then
echo "Commit is not tagged. Failing the workflow." echo "Commit is not tagged. Failing the workflow."
exit 1 exit 1
fi fi
if [ "$VERSION" != "$TAG" ]; then echo "Commit is tagged. Proceeding with the workflow."
echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG). Failing the workflow."
exit 1 - name: Set up Python
fi uses: actions/setup-python@v4
echo "Version and commit tag match. Proceeding with the workflow." with:
python-version: "3.x"
- name: Install pypa/build/setuptools/twine - name: Install pypa/build/setuptools/twine
run: >- run: >-
@ -43,6 +36,9 @@ jobs:
build setuptools twine build setuptools twine
--user --user
- name: Prevent fallback onto setup.py
run: rm setup.py
- name: Build a binary wheel and a source tarball - name: Build a binary wheel and a source tarball
run: python3 -m build run: python3 -m build

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,4 +1,4 @@
# Sphinx build info version 1 # Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 36919d67c12a677d3f16f60d980b0313 config: 28ec069496fc0ad05c8b9641549626a6
tags: 645f666f9bcd5a90fca523b33c5a78b7 tags: 645f666f9bcd5a90fca523b33c5a78b7

View File

@ -3,7 +3,7 @@
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.envs.registry &mdash; Fancy Gym 0.3.0 documentation</title> <title>fancy_gym.envs.registry &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../../../_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="../../../_static/icon.svg" class="logo" alt="Logo"/> <img src="../../../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">

View File

@ -3,7 +3,7 @@
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Overview: module code &mdash; Fancy Gym 0.3.0 documentation</title> <title>Overview: module code &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -18,12 +18,6 @@ A composite reward function serves as the performance metric for the RL system.
Variations of this environment are available, differing in reward structures and the optionality of randomizing the box's initial position. These variations are purposefully designed to challenge RL algorithms, enhancing their generalization and adaptation capabilities. Temporally sparse environments only provide a reward at the last timestep. Spatially sparse environments only provide a reward, if the goal is almost reached, the box is close enought to the goal and somewhat correctly aligned. Variations of this environment are available, differing in reward structures and the optionality of randomizing the box's initial position. These variations are purposefully designed to challenge RL algorithms, enhancing their generalization and adaptation capabilities. Temporally sparse environments only provide a reward at the last timestep. Spatially sparse environments only provide a reward, if the goal is almost reached, the box is close enought to the goal and somewhat correctly aligned.
These environments all provide smoothness metrics as part of the return infos:
- mean_squared_jerk: Averages the square of jerk (rate of acceleration change) across the motion. Lower values indicate smoother movement.
- maximum_jerk: Identifies the highest jerk value encountered.
- dimensionless_jerk: Normalizes the summed squared jerk over the motion's duration and peak velocity, offering a scale-independent metric of smoothness
| Name | Description | Horizon | Action Dimension | Observation Dimension | | Name | Description | Horizon | Action Dimension | Observation Dimension |
| ------------------------------------------ | -------------------------------------------------------------------- | ------- | ---------------- | --------------------- | | ------------------------------------------ | -------------------------------------------------------------------- | ------- | ---------------- | --------------------- |
| `fancy/BoxPushingDense-v0` | Custom Box-pushing task with dense rewards | 100 | 3 | 13 | | `fancy/BoxPushingDense-v0` | Custom Box-pushing task with dense rewards | 100 | 3 | 13 |
@ -55,9 +49,6 @@ Variations of the table tennis environment are available to cater to different r
| `fancy/TableTennisWind-v0` | Table Tennis task with wind effects, based on a custom environment for table tennis | 350 | 7 | 19 | | `fancy/TableTennisWind-v0` | Table Tennis task with wind effects, based on a custom environment for table tennis | 350 | 7 | 19 |
| `fancy/TableTennisGoalSwitching-v0` | Table Tennis task with goal switching, based on a custom environment for table tennis | 350 | 7 | 19 | | `fancy/TableTennisGoalSwitching-v0` | Table Tennis task with goal switching, based on a custom environment for table tennis | 350 | 7 | 19 |
| `fancy/TableTennisWindReplan-v0` | Table Tennis task with wind effects and replanning, based on a custom environment for table tennis | 350 | 7 | 19 | | `fancy/TableTennisWindReplan-v0` | Table Tennis task with wind effects and replanning, based on a custom environment for table tennis | 350 | 7 | 19 |
| `fancy/TableTennisRndRobot-v0` | Table Tennis task with random initial robot joint positions \* | 350 | 7 | 19 |
\* Random initialization of robot joint position and speed can be enabled by providing `random_pos_scale` / `random_vel_scale` to make. `TableTennisRndRobot` is equivalent to `TableTennis4D` except, that `random_pos_scale` is set to 0.1 instead of 0 per default.
--- ---
@ -98,9 +89,8 @@ A successful throw in this task is determined by the ball landing in the cup at
| `fancy/Reacher5dSparse-v0` | Sparse Reacher task with 5 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 5 | 20 | | `fancy/Reacher5dSparse-v0` | Sparse Reacher task with 5 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 5 | 20 |
| `fancy/Reacher7d-v0` | Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 | | `fancy/Reacher7d-v0` | Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 |
| `fancy/Reacher7dSparse-v0` | Sparse Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 | | `fancy/Reacher7dSparse-v0` | Sparse Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 |
| `fancy/HopperJump-v0` | Hopper Jump task with continuous rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* |
| `fancy/HopperJumpMarkov-v0` | `fancy/HopperJump-v0`, but with an alternative reward that is markovian. | 250 | 3 | 15 / 16\* |
| `fancy/HopperJumpSparse-v0` | Hopper Jump task with sparse rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* | | `fancy/HopperJumpSparse-v0` | Hopper Jump task with sparse rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* |
| `fancy/HopperJump-v0` | Hopper Jump task with continuous rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* |
| `fancy/AntJump-v0` | Ant Jump task, based on Gymnasium's `gym.envs.mujoco.Ant` | 200 | 8 | 119 | | `fancy/AntJump-v0` | Ant Jump task, based on Gymnasium's `gym.envs.mujoco.Ant` | 200 | 8 | 119 |
| `fancy/HalfCheetahJump-v0` | HalfCheetah Jump task, based on Gymnasium's `gym.envs.mujoco.HalfCheetah` | 100 | 6 | 112 | | `fancy/HalfCheetahJump-v0` | HalfCheetah Jump task, based on Gymnasium's `gym.envs.mujoco.HalfCheetah` | 100 | 6 | 112 |
| `fancy/HopperJumpOnBox-v0` | Hopper Jump on Box task, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 4 | 16 / 100\* | | `fancy/HopperJumpOnBox-v0` | Hopper Jump on Box task, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 4 | 16 / 100\* |

View File

@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
.. code:: bash .. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
Installation from master Installation from master
~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
.. code:: bash .. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld

View File

@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = { var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
VERSION: '0.3.0', VERSION: '0.2',
LANGUAGE: 'en', LANGUAGE: 'en',
COLLAPSE_INDEX: false, COLLAPSE_INDEX: false,
BUILDER: 'html', BUILDER: 'html',

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>API &mdash; Fancy Gym 0.3.0 documentation</title> <title>API &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" /> <link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/> <img src="_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DeepMind Control (DMC) &mdash; Fancy Gym 0.3.0 documentation</title> <title>DeepMind Control (DMC) &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AirHockey &mdash; Fancy Gym 0.3.0 documentation</title> <title>AirHockey &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/> <img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Classic Control &mdash; Fancy Gym 0.3.0 documentation</title> <title>Classic Control &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/> <img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Fancy &mdash; Fancy Gym 0.3.0 documentation</title> <title>Fancy &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/> <img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Mujoco &mdash; Fancy Gym 0.3.0 documentation</title> <title>Mujoco &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../../_static/icon.svg" class="logo" alt="Logo"/> <img src="../../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@ -135,12 +135,6 @@
<p>The observation space includes the sine and cosine values of the robotic joint angles, their velocities, and quaternion orientations for the end-effector and the box. The action space describes the applied torques for each joint.</p> <p>The observation space includes the sine and cosine values of the robotic joint angles, their velocities, and quaternion orientations for the end-effector and the box. The action space describes the applied torques for each joint.</p>
<p>A composite reward function serves as the performance metric for the RL system. It accounts for the distance to the goal, the boxs orientation, maintaining a rod within the box, achieving the rods desired orientation, and includes penalties for joint position and velocity limit violations, as well as an action cost for energy expenditure.</p> <p>A composite reward function serves as the performance metric for the RL system. It accounts for the distance to the goal, the boxs orientation, maintaining a rod within the box, achieving the rods desired orientation, and includes penalties for joint position and velocity limit violations, as well as an action cost for energy expenditure.</p>
<p>Variations of this environment are available, differing in reward structures and the optionality of randomizing the boxs initial position. These variations are purposefully designed to challenge RL algorithms, enhancing their generalization and adaptation capabilities. Temporally sparse environments only provide a reward at the last timestep. Spatially sparse environments only provide a reward, if the goal is almost reached, the box is close enought to the goal and somewhat correctly aligned.</p> <p>Variations of this environment are available, differing in reward structures and the optionality of randomizing the boxs initial position. These variations are purposefully designed to challenge RL algorithms, enhancing their generalization and adaptation capabilities. Temporally sparse environments only provide a reward at the last timestep. Spatially sparse environments only provide a reward, if the goal is almost reached, the box is close enought to the goal and somewhat correctly aligned.</p>
<p>These environments all provide smoothness metrics as part of the return infos:</p>
<ul class="simple">
<li><p>mean_squared_jerk: Averages the square of jerk (rate of acceleration change) across the motion. Lower values indicate smoother movement.</p></li>
<li><p>maximum_jerk: Identifies the highest jerk value encountered.</p></li>
<li><p>dimensionless_jerk: Normalizes the summed squared jerk over the motions duration and peak velocity, offering a scale-independent metric of smoothness</p></li>
</ul>
<table class="docutils align-default"> <table class="docutils align-default">
<thead> <thead>
<tr class="row-odd"><th class="head"><p>Name</p></th> <tr class="row-odd"><th class="head"><p>Name</p></th>
@ -234,15 +228,8 @@
<td><p>7</p></td> <td><p>7</p></td>
<td><p>19</p></td> <td><p>19</p></td>
</tr> </tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/TableTennisRndRobot-v0</span></code></p></td>
<td><p>Table Tennis task with random initial robot joint positions *</p></td>
<td><p>350</p></td>
<td><p>7</p></td>
<td><p>19</p></td>
</tr>
</tbody> </tbody>
</table> </table>
<p>* Random initialization of robot joint position and speed can be enabled by providing <code class="docutils literal notranslate"><span class="pre">random_pos_scale</span></code> / <code class="docutils literal notranslate"><span class="pre">random_vel_scale</span></code> to make. <code class="docutils literal notranslate"><span class="pre">TableTennisRndRobot</span></code> is equivalent to <code class="docutils literal notranslate"><span class="pre">TableTennis4D</span></code> except, that <code class="docutils literal notranslate"><span class="pre">random_pos_scale</span></code> is set to 0.1 instead of 0 per default.</p>
</section> </section>
<hr class="docutils" /> <hr class="docutils" />
<section id="beer-pong"> <section id="beer-pong">
@ -348,55 +335,49 @@
<td><p>7</p></td> <td><p>7</p></td>
<td><p>22</p></td> <td><p>22</p></td>
</tr> </tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJump-v0</span></code></p></td>
<td><p>Hopper Jump task with continuous rewards, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td>
<td><p>250</p></td>
<td><p>3</p></td>
<td><p>15 / 16*</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJumpMarkov-v0</span></code></p></td>
<td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJump-v0</span></code>, but with an alternative reward that is markovian.</p></td>
<td><p>250</p></td>
<td><p>3</p></td>
<td><p>15 / 16*</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJumpSparse-v0</span></code></p></td> <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJumpSparse-v0</span></code></p></td>
<td><p>Hopper Jump task with sparse rewards, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td> <td><p>Hopper Jump task with sparse rewards, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td>
<td><p>250</p></td> <td><p>250</p></td>
<td><p>3</p></td> <td><p>3</p></td>
<td><p>15 / 16*</p></td> <td><p>15 / 16*</p></td>
</tr> </tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/AntJump-v0</span></code></p></td> <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJump-v0</span></code></p></td>
<td><p>Hopper Jump task with continuous rewards, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td>
<td><p>250</p></td>
<td><p>3</p></td>
<td><p>15 / 16*</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/AntJump-v0</span></code></p></td>
<td><p>Ant Jump task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Ant</span></code></p></td> <td><p>Ant Jump task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Ant</span></code></p></td>
<td><p>200</p></td> <td><p>200</p></td>
<td><p>8</p></td> <td><p>8</p></td>
<td><p>119</p></td> <td><p>119</p></td>
</tr> </tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HalfCheetahJump-v0</span></code></p></td> <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HalfCheetahJump-v0</span></code></p></td>
<td><p>HalfCheetah Jump task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.HalfCheetah</span></code></p></td> <td><p>HalfCheetah Jump task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.HalfCheetah</span></code></p></td>
<td><p>100</p></td> <td><p>100</p></td>
<td><p>6</p></td> <td><p>6</p></td>
<td><p>112</p></td> <td><p>112</p></td>
</tr> </tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJumpOnBox-v0</span></code></p></td> <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperJumpOnBox-v0</span></code></p></td>
<td><p>Hopper Jump on Box task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td> <td><p>Hopper Jump on Box task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td>
<td><p>250</p></td> <td><p>250</p></td>
<td><p>4</p></td> <td><p>4</p></td>
<td><p>16 / 100*</p></td> <td><p>16 / 100*</p></td>
</tr> </tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperThrow-v0</span></code></p></td> <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperThrow-v0</span></code></p></td>
<td><p>Hopper Throw task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td> <td><p>Hopper Throw task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td>
<td><p>250</p></td> <td><p>250</p></td>
<td><p>3</p></td> <td><p>3</p></td>
<td><p>18 / 100*</p></td> <td><p>18 / 100*</p></td>
</tr> </tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperThrowInBasket-v0</span></code></p></td> <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/HopperThrowInBasket-v0</span></code></p></td>
<td><p>Hopper Throw in Basket task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td> <td><p>Hopper Throw in Basket task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Hopper</span></code></p></td>
<td><p>250</p></td> <td><p>250</p></td>
<td><p>3</p></td> <td><p>3</p></td>
<td><p>18 / 100*</p></td> <td><p>18 / 100*</p></td>
</tr> </tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/Walker2DJump-v0</span></code></p></td> <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">fancy/Walker2DJump-v0</span></code></p></td>
<td><p>Walker 2D Jump task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Walker2d</span></code></p></td> <td><p>Walker 2D Jump task, based on Gymnasiums <code class="docutils literal notranslate"><span class="pre">gym.envs.mujoco.Walker2d</span></code></p></td>
<td><p>300</p></td> <td><p>300</p></td>
<td><p>6</p></td> <td><p>6</p></td>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Metaworld &mdash; Fancy Gym 0.3.0 documentation</title> <title>Metaworld &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Gymnasium &mdash; Fancy Gym 0.3.0 documentation</title> <title>Gymnasium &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DeepMind Control Examples &mdash; Fancy Gym 0.3.0 documentation</title> <title>DeepMind Control Examples &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -126,7 +126,7 @@
<span class="linenos"> 17</span><span class="sd"> Returns:</span> <span class="linenos"> 17</span><span class="sd"> Returns:</span>
<span class="linenos"> 18</span> <span class="linenos"> 18</span>
<span class="linenos"> 19</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos"> 19</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos"> 20</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 21</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos"> 21</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 22</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos"> 22</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 23</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="linenos"> 23</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -135,7 +135,7 @@
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 27</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos"> 27</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos"> 30</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos"> 30</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 31</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos"> 31</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 32</span> <span class="linenos"> 32</span>
@ -193,68 +193,58 @@
<span class="linenos"> 84</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;rbf&#39;,</span> <span class="linenos"> 84</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;rbf&#39;,</span>
<span class="linenos"> 85</span> <span class="c1"># &#39;num_basis&#39;: 5</span> <span class="linenos"> 85</span> <span class="c1"># &#39;num_basis&#39;: 5</span>
<span class="linenos"> 86</span> <span class="c1"># }</span> <span class="linenos"> 86</span> <span class="c1"># }</span>
<span class="linenos"> 87</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos"> 87</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 88</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span> <span class="linenos"> 88</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 89</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span> <span class="linenos"> 89</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 90</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span> <span class="linenos"> 90</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 91</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos"> 91</span>
<span class="linenos"> 92</span> <span class="linenos"> 92</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 93</span> <span class="c1"># This renders the full MP trajectory</span> <span class="linenos"> 93</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 94</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span> <span class="linenos"> 94</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 95</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span> <span class="linenos"> 95</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 96</span> <span class="c1"># It is also possible to change them mode multiple times when</span> <span class="linenos"> 96</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 97</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span> <span class="linenos"> 97</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 98</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos"> 98</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos"> 99</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos"> 99</span>
<span class="linenos">100</span> <span class="linenos">100</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">101</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">102</span>
<span class="linenos">103</span> <span class="linenos">103</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span> <span class="linenos">104</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos">105</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">106</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">107</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos">108</span>
<span class="linenos">109</span> <span class="linenos">109</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">110</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span> <span class="linenos">111</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">112</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">113</span>
<span class="linenos">114</span> <span class="linenos">114</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos">115</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span> <span class="linenos">116</span>
<span class="linenos">117</span> <span class="linenos">117</span>
<span class="linenos">118</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span> <span class="linenos">118</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">119</span> <span class="c1"># # Standard DMC Suite tasks</span> <span class="linenos">119</span> <span class="c1"># Disclaimer: DMC environments require the seed to be specified in the beginning.</span>
<span class="linenos">120</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">120</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.</span>
<span class="linenos">121</span> <span class="c1">#</span> <span class="linenos">121</span>
<span class="linenos">122</span> <span class="c1"># # Manipulation tasks</span> <span class="linenos">122</span> <span class="c1"># For rendering DMC</span>
<span class="linenos">123</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span> <span class="linenos">123</span> <span class="c1"># export MUJOCO_GL=&quot;osmesa&quot;</span>
<span class="linenos">124</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">124</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
<span class="linenos">125</span> <span class="c1">#</span> <span class="linenos">125</span>
<span class="linenos">126</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span> <span class="linenos">126</span> <span class="c1"># # Standard DMC Suite tasks</span>
<span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">127</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">128</span> <span class="linenos">128</span> <span class="c1">#</span>
<span class="linenos">129</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span> <span class="linenos">129</span> <span class="c1"># # Manipulation tasks</span>
<span class="linenos">130</span> <span class="c1"># already registered above</span> <span class="linenos">130</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span>
<span class="linenos">131</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">131</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/manipulation-reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span> <span class="linenos">132</span> <span class="c1">#</span>
<span class="linenos">133</span> <span class="c1"># # Standard DMC Suite tasks</span> <span class="linenos">133</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">134</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">135</span> <span class="c1">#</span> <span class="linenos">135</span>
<span class="linenos">136</span> <span class="c1"># # Manipulation tasks</span> <span class="linenos">136</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">137</span> <span class="c1"># # Disclaimer: The vision versions are currently not integrated and yield an error</span> <span class="linenos">137</span> <span class="c1"># already registered above</span>
<span class="linenos">138</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control/reach_site_features&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">250</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">138</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">139</span> <span class="c1">#</span>
<span class="linenos">140</span> <span class="c1"># # Gym + DMC hybrid task provided in the MP framework</span>
<span class="linenos">141</span> <span class="n">example_dmc</span><span class="p">(</span><span class="s2">&quot;dm_control_ProMP/ball_in_cup-catch-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">142</span>
<span class="linenos">143</span> <span class="c1"># Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is</span>
<span class="linenos">144</span> <span class="c1"># already registered above</span>
<span class="linenos">145</span> <span class="n">example_custom_dmc_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">146</span>
<span class="linenos">147</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">148</span> <span class="n">main</span><span class="p">()</span>
</pre></div> </pre></div>
</div> </div>
</section> </section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>General Usage Examples &mdash; Fancy Gym 0.3.0 documentation</title> <title>General Usage Examples &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -130,7 +130,7 @@
<span class="linenos"> 21</span> <span class="linenos"> 21</span>
<span class="linenos"> 22</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos"> 22</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 23</span> <span class="linenos"> 23</span>
<span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos"> 24</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 25</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos"> 25</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 26</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos"> 26</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 27</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Observation shape: &quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="linenos"> 27</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Observation shape: &quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -194,21 +194,21 @@
<span class="linenos"> 85</span> <span class="c1"># do not return values above threshold</span> <span class="linenos"> 85</span> <span class="c1"># do not return values above threshold</span>
<span class="linenos"> 86</span> <span class="k">return</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">v</span><span class="p">)[:</span><span class="n">n_samples</span><span class="p">],</span> <span class="n">buffer</span><span class="o">.</span><span class="n">values</span><span class="p">()),</span> <span class="linenos"> 86</span> <span class="k">return</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">v</span><span class="p">)[:</span><span class="n">n_samples</span><span class="p">],</span> <span class="n">buffer</span><span class="o">.</span><span class="n">values</span><span class="p">()),</span>
<span class="linenos"> 87</span> <span class="linenos"> 87</span>
<span class="linenos"> 88</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span> <span class="linenos"> 88</span>
<span class="linenos"> 89</span> <span class="c1"># Basic gym task</span> <span class="linenos"> 89</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos"> 90</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;Pendulum-v1&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos"> 90</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">True</span>
<span class="linenos"> 91</span> <span class="linenos"> 91</span>
<span class="linenos"> 92</span> <span class="c1"># Mujoco task from framework</span> <span class="linenos"> 92</span> <span class="c1"># Basic gym task</span>
<span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;fancy/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos"> 93</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;Pendulum-v1&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 94</span> <span class="linenos"> 94</span>
<span class="linenos"> 95</span> <span class="c1"># # OpenAI Mujoco task</span> <span class="linenos"> 95</span> <span class="c1"># Mujoco task from framework</span>
<span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;HalfCheetah-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos"> 96</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;fancy/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos"> 97</span> <span class="linenos"> 97</span>
<span class="linenos"> 98</span> <span class="c1"># Vectorized multiprocessing environments</span> <span class="linenos"> 98</span> <span class="c1"># # OpenAI Mujoco task</span>
<span class="linenos"> 99</span> <span class="c1"># example_async(env_id=&quot;HoleReacher-v0&quot;, n_cpu=2, seed=int(&#39;533D&#39;, 16), n_samples=2 * 200)</span> <span class="linenos"> 99</span> <span class="n">example_general</span><span class="p">(</span><span class="s2">&quot;HalfCheetah-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">100</span> <span class="linenos">100</span>
<span class="linenos">101</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span> <span class="linenos">101</span> <span class="c1"># Vectorized multiprocessing environments</span>
<span class="linenos">102</span> <span class="n">main</span><span class="p">()</span> <span class="linenos">102</span> <span class="c1"># example_async(env_id=&quot;HoleReacher-v0&quot;, n_cpu=2, seed=int(&#39;533D&#39;, 16), n_samples=2 * 200)</span>
</pre></div> </pre></div>
</div> </div>
</section> </section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Metaworld Examples &mdash; Fancy Gym 0.3.0 documentation</title> <title>Metaworld Examples &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -111,7 +111,7 @@
<span class="linenos"> 2</span><span class="kn">import</span> <span class="nn">fancy_gym</span> <span class="linenos"> 2</span><span class="kn">import</span> <span class="nn">fancy_gym</span>
<span class="linenos"> 3</span> <span class="linenos"> 3</span>
<span class="linenos"> 4</span> <span class="linenos"> 4</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_meta</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="s2">&quot;fish-swim&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 6</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span> <span class="linenos"> 6</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 7</span><span class="sd"> Example for running a MetaWorld based env in the step based setting.</span> <span class="linenos"> 7</span><span class="sd"> Example for running a MetaWorld based env in the step based setting.</span>
<span class="linenos"> 8</span><span class="sd"> The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always</span> <span class="linenos"> 8</span><span class="sd"> The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always</span>
@ -127,7 +127,7 @@
<span class="linenos"> 18</span><span class="sd"> Returns:</span> <span class="linenos"> 18</span><span class="sd"> Returns:</span>
<span class="linenos"> 19</span> <span class="linenos"> 19</span>
<span class="linenos"> 20</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos"> 20</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos"> 21</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_id</span><span class="p">)</span>
<span class="linenos"> 22</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos"> 22</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 23</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos"> 23</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 24</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="linenos"> 24</span> <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;observation shape:&quot;</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">observation_space</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
@ -136,104 +136,111 @@
<span class="linenos"> 27</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos"> 27</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 28</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos"> 28</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 29</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos"> 29</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 30</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos"> 30</span> <span class="c1"># THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM</span>
<span class="linenos"> 31</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos"> 31</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
<span class="linenos"> 32</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos"> 32</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="linenos"> 33</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos"> 33</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 34</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span> <span class="linenos"> 34</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 35</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos"> 35</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 36</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span> <span class="linenos"> 36</span> <span class="nb">print</span><span class="p">(</span><span class="n">env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos"> 37</span> <span class="linenos"> 37</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 38</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos"> 38</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 39</span> <span class="k">del</span> <span class="n">env</span> <span class="linenos"> 39</span>
<span class="linenos"> 40</span> <span class="linenos"> 40</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 41</span> <span class="linenos"> 41</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos"> 42</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="linenos"> 42</span>
<span class="linenos"> 43</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span> <span class="linenos"> 43</span>
<span class="linenos"> 44</span><span class="sd"> Example for running a custom movement primitive based environments.</span> <span class="linenos"> 44</span><span class="k">def</span> <span class="nf">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 45</span><span class="sd"> Our already registered environments follow the same structure.</span> <span class="linenos"> 45</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 46</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span> <span class="linenos"> 46</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 47</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span> <span class="linenos"> 47</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 48</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span> <span class="linenos"> 48</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 49</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span> <span class="linenos"> 49</span><span class="sd"> Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.</span>
<span class="linenos"> 50</span><span class="sd"> Args:</span> <span class="linenos"> 50</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks)</span>
<span class="linenos"> 51</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span> <span class="linenos"> 51</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 52</span><span class="sd"> iterations: Number of rollout steps to run</span> <span class="linenos"> 52</span><span class="sd"> Args:</span>
<span class="linenos"> 53</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span> <span class="linenos"> 53</span><span class="sd"> seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 54</span> <span class="linenos"> 54</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 55</span><span class="sd"> Returns:</span> <span class="linenos"> 55</span><span class="sd"> render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)</span>
<span class="linenos"> 56</span> <span class="linenos"> 56</span>
<span class="linenos"> 57</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos"> 57</span><span class="sd"> Returns:</span>
<span class="linenos"> 58</span> <span class="linenos"> 58</span>
<span class="linenos"> 59</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span> <span class="linenos"> 59</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 60</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;metaworld/button-press-v2&quot;</span> <span class="linenos"> 60</span>
<span class="linenos"> 61</span> <span class="linenos"> 61</span> <span class="c1"># Base MetaWorld name, according to structure of above example</span>
<span class="linenos"> 62</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span> <span class="linenos"> 62</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;metaworld/button-press-v2&quot;</span>
<span class="linenos"> 63</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span> <span class="linenos"> 63</span>
<span class="linenos"> 64</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span> <span class="linenos"> 64</span> <span class="c1"># Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.</span>
<span class="linenos"> 65</span> <span class="c1"># # For a ProMP</span> <span class="linenos"> 65</span> <span class="c1"># You can also add other gym.Wrappers in case they are needed.</span>
<span class="linenos"> 66</span> <span class="c1"># trajectory_generator_kwargs = {&#39;trajectory_generator_type&#39;: &#39;promp&#39;}</span> <span class="linenos"> 66</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">meta</span><span class="o">.</span><span class="n">goal_object_change_mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos"> 67</span> <span class="c1"># phase_generator_kwargs = {&#39;phase_generator_type&#39;: &#39;linear&#39;}</span> <span class="linenos"> 67</span> <span class="c1"># # For a ProMP</span>
<span class="linenos"> 68</span> <span class="c1"># controller_kwargs = {&#39;controller_type&#39;: &#39;metaworld&#39;}</span> <span class="linenos"> 68</span> <span class="c1"># trajectory_generator_kwargs = {&#39;trajectory_generator_type&#39;: &#39;promp&#39;}</span>
<span class="linenos"> 69</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;zero_rbf&#39;,</span> <span class="linenos"> 69</span> <span class="c1"># phase_generator_kwargs = {&#39;phase_generator_type&#39;: &#39;linear&#39;}</span>
<span class="linenos"> 70</span> <span class="c1"># &#39;num_basis&#39;: 5,</span> <span class="linenos"> 70</span> <span class="c1"># controller_kwargs = {&#39;controller_type&#39;: &#39;metaworld&#39;}</span>
<span class="linenos"> 71</span> <span class="c1"># &#39;num_basis_zero_start&#39;: 1</span> <span class="linenos"> 71</span> <span class="c1"># basis_generator_kwargs = {&#39;basis_generator_type&#39;: &#39;zero_rbf&#39;,</span>
<span class="linenos"> 72</span> <span class="c1"># }</span> <span class="linenos"> 72</span> <span class="c1"># &#39;num_basis&#39;: 5,</span>
<span class="linenos"> 73</span> <span class="linenos"> 73</span> <span class="c1"># &#39;num_basis_zero_start&#39;: 1</span>
<span class="linenos"> 74</span> <span class="c1"># For a DMP</span> <span class="linenos"> 74</span> <span class="c1"># }</span>
<span class="linenos"> 75</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">}</span> <span class="linenos"> 75</span>
<span class="linenos"> 76</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span> <span class="linenos"> 76</span> <span class="c1"># For a DMP</span>
<span class="linenos"> 77</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span> <span class="linenos"> 77</span> <span class="n">trajectory_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">}</span>
<span class="linenos"> 78</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;metaworld&#39;</span><span class="p">}</span> <span class="linenos"> 78</span> <span class="n">phase_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos"> 79</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span> <span class="linenos"> 79</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">}</span>
<span class="linenos"> 80</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span> <span class="linenos"> 80</span> <span class="n">controller_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;metaworld&#39;</span><span class="p">}</span>
<span class="linenos"> 81</span> <span class="p">}</span> <span class="linenos"> 81</span> <span class="n">basis_generator_kwargs</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos"> 82</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos"> 82</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos"> 83</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span> <span class="linenos"> 83</span> <span class="p">}</span>
<span class="linenos"> 84</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span> <span class="linenos"> 84</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="p">{},</span>
<span class="linenos"> 85</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span> <span class="linenos"> 85</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos"> 86</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos"> 86</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos"> 87</span> <span class="linenos"> 87</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 88</span> <span class="c1"># This renders the full MP trajectory</span> <span class="linenos"> 88</span>
<span class="linenos"> 89</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span> <span class="linenos"> 89</span> <span class="c1"># This renders the full MP trajectory</span>
<span class="linenos"> 90</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span> <span class="linenos"> 90</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span>
<span class="linenos"> 91</span> <span class="c1"># It is also possible to change them mode multiple times when</span> <span class="linenos"> 91</span> <span class="c1"># Resetting to no rendering, can be achieved by render(mode=None).</span>
<span class="linenos"> 92</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span> <span class="linenos"> 92</span> <span class="c1"># It is also possible to change them mode multiple times when</span>
<span class="linenos"> 93</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos"> 93</span> <span class="c1"># e.g. only every nth trajectory should be displayed.</span>
<span class="linenos"> 94</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos"> 94</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 95</span> <span class="linenos"> 95</span> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Metaworld render interface bug does not allow to render() fixes its interface. &quot;</span>
<span class="linenos"> 96</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos"> 96</span> <span class="s2">&quot;A temporary workaround is to alter their code in MujocoEnv render() from &quot;</span>
<span class="linenos"> 97</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos"> 97</span> <span class="s2">&quot;`if not offscreen` to `if not offscreen or offscreen == &#39;human&#39;`.&quot;</span><span class="p">)</span>
<span class="linenos"> 98</span> <span class="linenos"> 98</span> <span class="c1"># TODO: Remove this, when Metaworld fixes its interface.</span>
<span class="linenos"> 99</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span> <span class="linenos"> 99</span> <span class="c1"># env.render(mode=&quot;human&quot;)</span>
<span class="linenos">100</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos">100</span>
<span class="linenos">101</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">101</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">102</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">102</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">103</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos">103</span>
<span class="linenos">104</span> <span class="linenos">104</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">105</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">105</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">106</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span> <span class="linenos">106</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">107</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">107</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">108</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span> <span class="linenos">108</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">109</span> <span class="linenos">109</span>
<span class="linenos">110</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos">110</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">111</span> <span class="k">del</span> <span class="n">env</span> <span class="linenos">111</span> <span class="nb">print</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">rewards</span><span class="p">)</span>
<span class="linenos">112</span> <span class="linenos">112</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">113</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span> <span class="linenos">113</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">114</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span> <span class="linenos">114</span>
<span class="linenos">115</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span> <span class="linenos">115</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">116</span> <span class="linenos">116</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">117</span> <span class="c1"># # Standard Meta world tasks</span> <span class="linenos">117</span>
<span class="linenos">118</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">118</span>
<span class="linenos">119</span> <span class="linenos">119</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">120</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span> <span class="linenos">120</span> <span class="c1"># Disclaimer: MetaWorld environments require the seed to be specified in the beginning.</span>
<span class="linenos">121</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld_ProMP/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">121</span> <span class="c1"># Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.</span>
<span class="linenos">122</span> <span class="c1">#</span> <span class="linenos">122</span>
<span class="linenos">123</span> <span class="c1"># # Custom MetaWorld task</span> <span class="linenos">123</span> <span class="c1"># For rendering it might be necessary to specify your OpenGL installation</span>
<span class="linenos">124</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">124</span> <span class="c1"># export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so</span>
<span class="linenos">125</span> <span class="linenos">125</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">126</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span> <span class="linenos">126</span>
<span class="linenos">127</span> <span class="n">main</span><span class="p">()</span> <span class="linenos">127</span> <span class="c1"># # Standard Meta world tasks</span>
<span class="linenos">128</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld/button-press-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">129</span>
<span class="linenos">130</span> <span class="c1"># # MP + MetaWorld hybrid task provided in the our framework</span>
<span class="linenos">131</span> <span class="n">example_meta</span><span class="p">(</span><span class="s2">&quot;metaworld_ProMP/ButtonPress-v2&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">132</span> <span class="c1">#</span>
<span class="linenos">133</span> <span class="c1"># # Custom MetaWorld task</span>
<span class="linenos">134</span> <span class="n">example_custom_meta_and_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
</pre></div> </pre></div>
</div> </div>
</section> </section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Movement Primitives Examples &mdash; Fancy Gym 0.3.0 documentation</title> <title>Movement Primitives Examples &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -135,253 +135,252 @@
<span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos"> 26</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 27</span> <span class="linenos"> 27</span>
<span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">1</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="linenos"> 28</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">1</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="linenos"> 29</span> <span class="c1"># This renders the full MP trajectory</span> <span class="linenos"> 29</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 30</span> <span class="c1"># It is only required to call render() once in the beginning, which renders every consecutive trajectory.</span> <span class="linenos"> 30</span>
<span class="linenos"> 31</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos"> 31</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span>
<span class="linenos"> 32</span> <span class="linenos"> 32</span> <span class="c1"># such as a ProMP</span>
<span class="linenos"> 33</span> <span class="c1"># Now the action space is not the raw action but the parametrization of the trajectory generator,</span> <span class="linenos"> 33</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 34</span> <span class="c1"># such as a ProMP</span> <span class="linenos"> 34</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span>
<span class="linenos"> 35</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos"> 35</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The &#39;reward&#39; is equal</span>
<span class="linenos"> 36</span> <span class="c1"># This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the</span> <span class="linenos"> 36</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span>
<span class="linenos"> 37</span> <span class="c1"># full observation space of the last step, if replanning/sub-trajectory learning is used. The &#39;reward&#39; is equal</span> <span class="linenos"> 37</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 38</span> <span class="c1"># to the return of a trajectory. Default is the sum over the step-wise rewards.</span> <span class="linenos"> 38</span> <span class="c1"># Aggregated returns</span>
<span class="linenos"> 39</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos"> 39</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 40</span> <span class="c1"># Aggregated returns</span> <span class="linenos"> 40</span>
<span class="linenos"> 41</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos"> 41</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 42</span> <span class="linenos"> 42</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 43</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos"> 43</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 44</span> <span class="nb">print</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span> <span class="linenos"> 44</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 45</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos"> 45</span>
<span class="linenos"> 46</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos"> 46</span>
<span class="linenos"> 47</span> <span class="linenos"> 47</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos"> 48</span> <span class="linenos"> 48</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos"> 49</span><span class="k">def</span> <span class="nf">example_custom_mp</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="linenos"> 49</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos"> 50</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span> <span class="linenos"> 50</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos"> 51</span><span class="sd"> Example for running a custom movement primitive based environments.</span> <span class="linenos"> 51</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos"> 52</span><span class="sd"> Our already registered environments follow the same structure.</span> <span class="linenos"> 52</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos"> 53</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span> <span class="linenos"> 53</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos"> 54</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span> <span class="linenos"> 54</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos"> 55</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span> <span class="linenos"> 55</span><span class="sd"> Args:</span>
<span class="linenos"> 56</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span> <span class="linenos"> 56</span><span class="sd"> seed: seed</span>
<span class="linenos"> 57</span><span class="sd"> Args:</span> <span class="linenos"> 57</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos"> 58</span><span class="sd"> seed: seed</span> <span class="linenos"> 58</span><span class="sd"> render: Render the episode</span>
<span class="linenos"> 59</span><span class="sd"> iterations: Number of rollout steps to run</span> <span class="linenos"> 59</span>
<span class="linenos"> 60</span><span class="sd"> render: Render the episode</span> <span class="linenos"> 60</span><span class="sd"> Returns:</span>
<span class="linenos"> 61</span> <span class="linenos"> 61</span>
<span class="linenos"> 62</span><span class="sd"> Returns:</span> <span class="linenos"> 62</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos"> 63</span> <span class="linenos"> 63</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span>
<span class="linenos"> 64</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos"> 64</span> <span class="c1"># E.g. here for way to many basis functions</span>
<span class="linenos"> 65</span> <span class="c1"># Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.</span> <span class="linenos"> 65</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos"> 66</span> <span class="c1"># E.g. here for way to many basis functions</span> <span class="linenos"> 66</span>
<span class="linenos"> 67</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">}},</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos"> 67</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos"> 68</span> <span class="linenos"> 68</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 69</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos"> 69</span>
<span class="linenos"> 70</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos"> 70</span> <span class="c1"># This time rendering every trajectory</span>
<span class="linenos"> 71</span> <span class="linenos"> 71</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos"> 72</span> <span class="c1"># This time rendering every trajectory</span> <span class="linenos"> 72</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos"> 73</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos"> 73</span>
<span class="linenos"> 74</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos"> 74</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos"> 75</span> <span class="linenos"> 75</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 76</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span> <span class="linenos"> 76</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos"> 77</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos"> 77</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos"> 78</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos"> 78</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos"> 79</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos"> 79</span>
<span class="linenos"> 80</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos"> 80</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos"> 81</span> <span class="linenos"> 81</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span>
<span class="linenos"> 82</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos"> 82</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos"> 83</span> <span class="nb">print</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">reward</span><span class="p">)</span> <span class="linenos"> 83</span>
<span class="linenos"> 84</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos"> 84</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos"> 85</span> <span class="linenos"> 85</span> <span class="k">return</span> <span class="n">obs</span>
<span class="linenos"> 86</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos"> 86</span>
<span class="linenos"> 87</span> <span class="k">return</span> <span class="n">obs</span> <span class="linenos"> 87</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span>
<span class="linenos"> 88</span> <span class="linenos"> 88</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span>
<span class="linenos"> 89</span><span class="k">class</span> <span class="nc">Custom_MPWrapper</span><span class="p">(</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">):</span> <span class="linenos"> 89</span> <span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 90</span> <span class="n">mp_config</span> <span class="o">=</span> <span class="p">{</span> <span class="linenos"> 90</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 91</span> <span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos"> 91</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos"> 92</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos"> 92</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos"> 93</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span> <span class="linenos"> 93</span> <span class="p">},</span>
<span class="linenos"> 94</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span> <span class="linenos"> 94</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 95</span> <span class="p">},</span> <span class="linenos"> 95</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos"> 96</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos"> 96</span> <span class="p">},</span>
<span class="linenos"> 97</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span> <span class="linenos"> 97</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos"> 98</span> <span class="p">},</span> <span class="linenos"> 98</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos"> 99</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos"> 99</span> <span class="p">},</span>
<span class="linenos">100</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span> <span class="linenos">100</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">101</span> <span class="p">},</span> <span class="linenos">101</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">102</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">102</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">103</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span> <span class="linenos">103</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">104</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span> <span class="linenos">104</span> <span class="p">}</span>
<span class="linenos">105</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span> <span class="linenos">105</span> <span class="p">},</span>
<span class="linenos">106</span> <span class="p">}</span> <span class="linenos">106</span> <span class="s1">&#39;DMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">107</span> <span class="p">},</span> <span class="linenos">107</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">108</span> <span class="s1">&#39;DMP&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">108</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">,</span>
<span class="linenos">109</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">109</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">500</span>
<span class="linenos">110</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;dmp&#39;</span><span class="p">,</span> <span class="linenos">110</span> <span class="p">},</span>
<span class="linenos">111</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">500</span> <span class="linenos">111</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">112</span> <span class="p">},</span> <span class="linenos">112</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span>
<span class="linenos">113</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">113</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mf">2.5</span>
<span class="linenos">114</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;exp&#39;</span><span class="p">,</span> <span class="linenos">114</span> <span class="p">},</span>
<span class="linenos">115</span> <span class="s1">&#39;alpha_phase&#39;</span><span class="p">:</span> <span class="mf">2.5</span> <span class="linenos">115</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">116</span> <span class="p">},</span> <span class="linenos">116</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">117</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">117</span> <span class="p">},</span>
<span class="linenos">118</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span> <span class="linenos">118</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">119</span> <span class="p">},</span> <span class="linenos">119</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span>
<span class="linenos">120</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">120</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span>
<span class="linenos">121</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;rbf&#39;</span><span class="p">,</span> <span class="linenos">121</span> <span class="p">}</span>
<span class="linenos">122</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span> <span class="linenos">122</span> <span class="p">}</span>
<span class="linenos">123</span> <span class="p">}</span> <span class="linenos">123</span> <span class="p">}</span>
<span class="linenos">124</span> <span class="p">}</span> <span class="linenos">124</span>
<span class="linenos">125</span> <span class="p">}</span> <span class="linenos">125</span>
<span class="linenos">126</span> <span class="linenos">126</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">127</span> <span class="linenos">127</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">128</span><span class="k">def</span> <span class="nf">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="linenos">128</span><span class="sd"> Example for running a custom movement primitive based environments.</span>
<span class="linenos">129</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span> <span class="linenos">129</span><span class="sd"> Our already registered environments follow the same structure.</span>
<span class="linenos">130</span><span class="sd"> Example for running a custom movement primitive based environments.</span> <span class="linenos">130</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span>
<span class="linenos">131</span><span class="sd"> Our already registered environments follow the same structure.</span> <span class="linenos">131</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span>
<span class="linenos">132</span><span class="sd"> Hence, this also allows to adjust hyperparameters of the movement primitives.</span> <span class="linenos">132</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span>
<span class="linenos">133</span><span class="sd"> Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.</span> <span class="linenos">133</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span>
<span class="linenos">134</span><span class="sd"> We appreciate PRs for custom environments (especially MP wrappers of existing tasks) </span> <span class="linenos">134</span><span class="sd"> Args:</span>
<span class="linenos">135</span><span class="sd"> for our repo: https://github.com/ALRhub/fancy_gym/</span> <span class="linenos">135</span><span class="sd"> seed: seed</span>
<span class="linenos">136</span><span class="sd"> Args:</span> <span class="linenos">136</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">137</span><span class="sd"> seed: seed</span> <span class="linenos">137</span><span class="sd"> render: Render the episode</span>
<span class="linenos">138</span><span class="sd"> iterations: Number of rollout steps to run</span> <span class="linenos">138</span>
<span class="linenos">139</span><span class="sd"> render: Render the episode</span> <span class="linenos">139</span><span class="sd"> Returns:</span>
<span class="linenos">140</span> <span class="linenos">140</span>
<span class="linenos">141</span><span class="sd"> Returns:</span> <span class="linenos">141</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">142</span> <span class="linenos">142</span>
<span class="linenos">143</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos">143</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">144</span> <span class="linenos">144</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">145</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span> <span class="linenos">145</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_DMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">146</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span> <span class="linenos">146</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">147</span> <span class="n">custom_env_id_DMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_DMP/Reacher5d-Custom-v0&quot;</span> <span class="linenos">147</span>
<span class="linenos">148</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span> <span class="linenos">148</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">,</span> <span class="s1">&#39;DMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span>
<span class="linenos">149</span> <span class="linenos">149</span>
<span class="linenos">150</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">Custom_MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">,</span> <span class="s1">&#39;DMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">)</span> <span class="linenos">150</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">151</span> <span class="linenos">151</span>
<span class="linenos">152</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos">152</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">153</span> <span class="linenos">153</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">154</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">154</span>
<span class="linenos">155</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">155</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">156</span> <span class="linenos">156</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">157</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos">157</span>
<span class="linenos">158</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos">158</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">159</span> <span class="linenos">159</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">160</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span> <span class="linenos">160</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">161</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos">161</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">162</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">162</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">163</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">163</span>
<span class="linenos">164</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos">164</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">165</span> <span class="linenos">165</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">166</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">166</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">167</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span> <span class="linenos">167</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">168</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">168</span>
<span class="linenos">169</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">169</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">170</span> <span class="linenos">170</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">171</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span> <span class="linenos">171</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">172</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos">172</span> <span class="k">pass</span>
<span class="linenos">173</span> <span class="k">except</span><span class="p">:</span> <span class="linenos">173</span>
<span class="linenos">174</span> <span class="k">pass</span> <span class="linenos">174</span>
<span class="linenos">175</span> <span class="linenos">175</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">176</span> <span class="linenos">176</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="linenos">177</span><span class="k">def</span> <span class="nf">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="linenos">177</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span>
<span class="linenos">178</span><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span> <span class="linenos">178</span><span class="sd"> Args:</span>
<span class="linenos">179</span><span class="sd"> Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.</span> <span class="linenos">179</span><span class="sd"> seed: seed</span>
<span class="linenos">180</span><span class="sd"> Args:</span> <span class="linenos">180</span><span class="sd"> iterations: Number of rollout steps to run</span>
<span class="linenos">181</span><span class="sd"> seed: seed</span> <span class="linenos">181</span><span class="sd"> render: Render the episode</span>
<span class="linenos">182</span><span class="sd"> iterations: Number of rollout steps to run</span> <span class="linenos">182</span>
<span class="linenos">183</span><span class="sd"> render: Render the episode</span> <span class="linenos">183</span><span class="sd"> Returns:</span>
<span class="linenos">184</span> <span class="linenos">184</span>
<span class="linenos">185</span><span class="sd"> Returns:</span> <span class="linenos">185</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">186</span> <span class="linenos">186</span>
<span class="linenos">187</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos">187</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span>
<span class="linenos">188</span> <span class="linenos">188</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">189</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-v0&quot;</span> <span class="linenos">189</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span>
<span class="linenos">190</span> <span class="n">custom_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/Reacher5d-Custom-v0&quot;</span> <span class="linenos">190</span>
<span class="linenos">191</span> <span class="n">custom_env_id_ProMP</span> <span class="o">=</span> <span class="s2">&quot;fancy_ProMP/Reacher5d-Custom-v0&quot;</span> <span class="linenos">191</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">192</span> <span class="linenos">192</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">193</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">upgrade</span><span class="p">(</span><span class="n">custom_env_id</span><span class="p">,</span> <span class="n">mp_wrapper</span><span class="o">=</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">reacher</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">,</span> <span class="n">add_mp_types</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;ProMP&#39;</span><span class="p">],</span> <span class="n">base_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">mp_config_override</span><span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ProMP&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">193</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span>
<span class="linenos">194</span> <span class="s1">&#39;trajectory_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">194</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="linenos">195</span> <span class="s1">&#39;trajectory_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;promp&#39;</span><span class="p">,</span> <span class="linenos">195</span> <span class="p">},</span>
<span class="linenos">196</span> <span class="s1">&#39;weights_scale&#39;</span><span class="p">:</span> <span class="mi">2</span> <span class="linenos">196</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">197</span> <span class="p">},</span> <span class="linenos">197</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span>
<span class="linenos">198</span> <span class="s1">&#39;phase_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">198</span> <span class="p">},</span>
<span class="linenos">199</span> <span class="s1">&#39;phase_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;linear&#39;</span> <span class="linenos">199</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">200</span> <span class="p">},</span> <span class="linenos">200</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span>
<span class="linenos">201</span> <span class="s1">&#39;controller_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">201</span> <span class="p">},</span>
<span class="linenos">202</span> <span class="s1">&#39;controller_type&#39;</span><span class="p">:</span> <span class="s1">&#39;velocity&#39;</span> <span class="linenos">202</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="linenos">203</span> <span class="p">},</span> <span class="linenos">203</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span>
<span class="linenos">204</span> <span class="s1">&#39;basis_generator_kwargs&#39;</span><span class="p">:</span> <span class="p">{</span> <span class="linenos">204</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
<span class="linenos">205</span> <span class="s1">&#39;basis_generator_type&#39;</span><span class="p">:</span> <span class="s1">&#39;zero_rbf&#39;</span><span class="p">,</span> <span class="linenos">205</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span>
<span class="linenos">206</span> <span class="s1">&#39;num_basis&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span> <span class="linenos">206</span> <span class="p">}</span>
<span class="linenos">207</span> <span class="s1">&#39;num_basis_zero_start&#39;</span><span class="p">:</span> <span class="mi">1</span> <span class="linenos">207</span> <span class="p">}})</span>
<span class="linenos">208</span> <span class="p">}</span> <span class="linenos">208</span>
<span class="linenos">209</span> <span class="p">}})</span> <span class="linenos">209</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
<span class="linenos">210</span> <span class="linenos">210</span>
<span class="linenos">211</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">custom_env_id_ProMP</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos">211</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">212</span> <span class="linenos">212</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">213</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">213</span>
<span class="linenos">214</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">214</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">215</span> <span class="linenos">215</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">216</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos">216</span>
<span class="linenos">217</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos">217</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">218</span> <span class="linenos">218</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">219</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span> <span class="linenos">219</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">220</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos">220</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">221</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">221</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">222</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">222</span>
<span class="linenos">223</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos">223</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">224</span> <span class="linenos">224</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">225</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">225</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">226</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span> <span class="linenos">226</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">227</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">227</span>
<span class="linenos">228</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">228</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">229</span> <span class="linenos">229</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">230</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos">230</span>
<span class="linenos">231</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos">231</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">232</span> <span class="linenos">232</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">233</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">233</span>
<span class="linenos">234</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">234</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">235</span> <span class="linenos">235</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos">236</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span> <span class="linenos">236</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">237</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos">237</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">238</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">238</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">239</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">239</span>
<span class="linenos">240</span> <span class="n">rewards</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos">240</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">241</span> <span class="linenos">241</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span>
<span class="linenos">242</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">242</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">243</span> <span class="nb">print</span><span class="p">(</span><span class="n">rewards</span><span class="p">)</span> <span class="linenos">243</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">244</span> <span class="n">rewards</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">244</span>
<span class="linenos">245</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">245</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span>
<span class="linenos">246</span> <span class="linenos">246</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">247</span> <span class="k">try</span><span class="p">:</span> <span class="c1"># Some mujoco-based envs don&#39;t correlcty implement .close</span> <span class="linenos">247</span> <span class="k">except</span><span class="p">:</span>
<span class="linenos">248</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos">248</span> <span class="k">pass</span>
<span class="linenos">249</span> <span class="k">except</span><span class="p">:</span> <span class="linenos">249</span>
<span class="linenos">250</span> <span class="k">pass</span> <span class="linenos">250</span>
<span class="linenos">251</span> <span class="linenos">251</span><span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
<span class="linenos">252</span> <span class="linenos">252</span> <span class="n">render</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">253</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> <span class="linenos">253</span> <span class="c1"># DMP</span>
<span class="linenos">254</span> <span class="c1"># DMP</span> <span class="linenos">254</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_DMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">255</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_DMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">255</span>
<span class="linenos">256</span> <span class="linenos">256</span> <span class="c1"># ProMP</span>
<span class="linenos">257</span> <span class="c1"># ProMP</span> <span class="linenos">257</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/HoleReacher-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">258</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/BoxPushingTemporalSparse-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/BoxPushingTemporalSparse-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">259</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/TableTennis4D-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">260</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/TableTennis4D-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">260</span>
<span class="linenos">261</span> <span class="linenos">261</span> <span class="c1"># ProDMP with Replanning</span>
<span class="linenos">262</span> <span class="c1"># ProDMP with Replanning</span> <span class="linenos">262</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">263</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennis4DReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennis4DReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">264</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennisWindReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">265</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProDMP/TableTennisWindReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">265</span>
<span class="linenos">266</span> <span class="linenos">266</span> <span class="c1"># Altered basis functions</span>
<span class="linenos">267</span> <span class="c1"># Altered basis functions</span> <span class="linenos">267</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">268</span> <span class="n">obs1</span> <span class="o">=</span> <span class="n">example_custom_mp</span><span class="p">(</span><span class="s2">&quot;fancy_ProMP/Reacher5d-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">268</span>
<span class="linenos">269</span> <span class="linenos">269</span> <span class="c1"># Custom MP</span>
<span class="linenos">270</span> <span class="c1"># Custom MP</span> <span class="linenos">270</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">271</span> <span class="n">example_fully_custom_mp</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">271</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span>
<span class="linenos">272</span> <span class="n">example_fully_custom_mp_alternative</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">272</span>
<span class="linenos">273</span> <span class="linenos">273</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">274</span><span class="k">if</span> <span class="vm">__name__</span><span class="o">==</span><span class="s1">&#39;__main__&#39;</span><span class="p">:</span> <span class="linenos">274</span> <span class="n">main</span><span class="p">()</span>
<span class="linenos">275</span> <span class="n">main</span><span class="p">()</span>
</pre></div> </pre></div>
</div> </div>
</section> </section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MP Params Tuning Example &mdash; Fancy Gym 0.3.0 documentation</title> <title>MP Params Tuning Example &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>OpenAI Envs Examples &mdash; Fancy Gym 0.3.0 documentation</title> <title>OpenAI Envs Examples &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -122,27 +122,27 @@
<span class="linenos">13</span><span class="sd"> Returns:</span> <span class="linenos">13</span><span class="sd"> Returns:</span>
<span class="linenos">14</span> <span class="linenos">14</span>
<span class="linenos">15</span><span class="sd"> &quot;&quot;&quot;</span> <span class="linenos">15</span><span class="sd"> &quot;&quot;&quot;</span>
<span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos">16</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
<span class="linenos">17</span> <span class="linenos">17</span>
<span class="linenos">18</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span> <span class="linenos">18</span> <span class="n">returns</span> <span class="o">=</span> <span class="mi">0</span>
<span class="linenos">19</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos">19</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">20</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span> <span class="linenos">20</span> <span class="c1"># number of samples/full trajectories (multiple environment steps)</span>
<span class="linenos">21</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span> <span class="linenos">21</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
<span class="linenos">22</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="linenos">22</span> <span class="k">if</span> <span class="n">render</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos">23</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">24</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">24</span> <span class="k">else</span><span class="p">:</span>
<span class="linenos">25</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">25</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
<span class="linenos">26</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span> <span class="linenos">26</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">27</span> <span class="linenos">27</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">28</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">28</span> <span class="n">returns</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="linenos">29</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span> <span class="linenos">29</span>
<span class="linenos">30</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">30</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">31</span> <span class="linenos">31</span> <span class="nb">print</span><span class="p">(</span><span class="n">returns</span><span class="p">)</span>
<span class="linenos">32</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="linenos">32</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">33</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;gym_ProMP/Reacher-v2&quot;</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">33</span>
<span class="linenos">34</span> <span class="linenos">34</span>
<span class="linenos">35</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span> <span class="linenos">35</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="linenos">36</span> <span class="n">main</span><span class="p">()</span> <span class="linenos">36</span> <span class="n">example_mp</span><span class="p">(</span><span class="s2">&quot;gym_ProMP/Reacher-v2&quot;</span><span class="p">)</span>
</pre></div> </pre></div>
</div> </div>
</section> </section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>PD Control Gain Tuning Example &mdash; Fancy Gym 0.3.0 documentation</title> <title>PD Control Gain Tuning Example &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Replanning Example &mdash; Fancy Gym 0.3.0 documentation</title> <title>Replanning Example &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -112,24 +112,24 @@
<span class="linenos"> 3</span> <span class="linenos"> 3</span>
<span class="linenos"> 4</span> <span class="linenos"> 4</span>
<span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> <span class="linenos"> 5</span><span class="k">def</span> <span class="nf">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos"> 6</span> <span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">env_name</span><span class="p">)</span>
<span class="linenos"> 7</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos"> 7</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos"> 8</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span> <span class="linenos"> 8</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
<span class="linenos"> 9</span> <span class="k">while</span> <span class="kc">True</span><span class="p">:</span> <span class="linenos"> 9</span> <span class="n">done</span> <span class="o">=</span> <span class="kc">False</span>
<span class="linenos">10</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">10</span> <span class="k">while</span> <span class="n">done</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
<span class="linenos">11</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">11</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">12</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos">12</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">13</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos">13</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">14</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">14</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">15</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">15</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">16</span> <span class="k">break</span> <span class="linenos">16</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">17</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos">17</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">18</span> <span class="k">del</span> <span class="n">env</span> <span class="linenos">18</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">19</span> <span class="linenos">19</span>
<span class="linenos">20</span> <span class="linenos">20</span>
<span class="linenos">21</span><span class="k">def</span> <span class="nf">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="linenos">21</span><span class="k">def</span> <span class="nf">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="linenos">22</span> <span class="c1"># id for a step-based environment</span> <span class="linenos">22</span> <span class="c1"># id for a step-based environment</span>
<span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;fancy/BoxPushingDense-v0&quot;</span> <span class="linenos">23</span> <span class="n">base_env_id</span> <span class="o">=</span> <span class="s2">&quot;BoxPushingDense-v0&quot;</span>
<span class="linenos">24</span> <span class="linenos">24</span>
<span class="linenos">25</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">box_pushing</span><span class="o">.</span><span class="n">mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span> <span class="linenos">25</span> <span class="n">wrappers</span> <span class="o">=</span> <span class="p">[</span><span class="n">fancy_gym</span><span class="o">.</span><span class="n">envs</span><span class="o">.</span><span class="n">mujoco</span><span class="o">.</span><span class="n">box_pushing</span><span class="o">.</span><span class="n">mp_wrapper</span><span class="o">.</span><span class="n">MPWrapper</span><span class="p">]</span>
<span class="linenos">26</span> <span class="linenos">26</span>
@ -147,34 +147,31 @@
<span class="linenos">38</span> <span class="s1">&#39;replanning_schedule&#39;</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">pos</span><span class="p">,</span> <span class="n">vel</span><span class="p">,</span> <span class="n">obs</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="n">t</span> <span class="o">%</span> <span class="mi">25</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="linenos">38</span> <span class="s1">&#39;replanning_schedule&#39;</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">pos</span><span class="p">,</span> <span class="n">vel</span><span class="p">,</span> <span class="n">obs</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="n">t</span> <span class="o">%</span> <span class="mi">25</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span>
<span class="linenos">39</span> <span class="s1">&#39;condition_on_desired&#39;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span> <span class="linenos">39</span> <span class="s1">&#39;condition_on_desired&#39;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span>
<span class="linenos">40</span> <span class="linenos">40</span>
<span class="linenos">41</span> <span class="n">base_env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s1">&#39;human&#39;</span> <span class="k">if</span> <span class="n">render</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span> <span class="linenos">41</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env_id</span><span class="o">=</span><span class="n">base_env_id</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span>
<span class="linenos">42</span> <span class="n">env</span> <span class="o">=</span> <span class="n">fancy_gym</span><span class="o">.</span><span class="n">make_bb</span><span class="p">(</span><span class="n">env</span><span class="o">=</span><span class="n">base_env</span><span class="p">,</span> <span class="n">wrappers</span><span class="o">=</span><span class="n">wrappers</span><span class="p">,</span> <span class="n">black_box_kwargs</span><span class="o">=</span><span class="n">black_box_kwargs</span><span class="p">,</span> <span class="linenos">42</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span>
<span class="linenos">43</span> <span class="n">traj_gen_kwargs</span><span class="o">=</span><span class="n">trajectory_generator_kwargs</span><span class="p">,</span> <span class="n">controller_kwargs</span><span class="o">=</span><span class="n">controller_kwargs</span><span class="p">,</span> <span class="linenos">43</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span>
<span class="linenos">44</span> <span class="n">phase_kwargs</span><span class="o">=</span><span class="n">phase_generator_kwargs</span><span class="p">,</span> <span class="n">basis_kwargs</span><span class="o">=</span><span class="n">basis_generator_kwargs</span><span class="p">,</span> <span class="linenos">44</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="linenos">45</span> <span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span> <span class="linenos">45</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span>
<span class="linenos">46</span> <span class="k">if</span> <span class="n">render</span><span class="p">:</span> <span class="linenos">46</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;human&quot;</span><span class="p">)</span>
<span class="linenos">47</span> <span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span> <span class="linenos">47</span>
<span class="linenos">48</span> <span class="linenos">48</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">49</span> <span class="n">obs</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">49</span>
<span class="linenos">50</span> <span class="linenos">50</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span>
<span class="linenos">51</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iteration</span><span class="p">):</span> <span class="linenos">51</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="linenos">52</span> <span class="n">ac</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="linenos">52</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span>
<span class="linenos">53</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">ac</span><span class="p">)</span> <span class="linenos">53</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
<span class="linenos">54</span> <span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span> <span class="linenos">54</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
<span class="linenos">55</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span> <span class="linenos">55</span>
<span class="linenos">56</span> <span class="linenos">56</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="linenos">57</span> <span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="linenos">57</span> <span class="k">del</span> <span class="n">env</span>
<span class="linenos">58</span> <span class="k">del</span> <span class="n">env</span> <span class="linenos">58</span>
<span class="linenos">59</span> <span class="linenos">59</span>
<span class="linenos">60</span><span class="k">def</span> <span class="nf">main</span><span class="p">(</span><span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> <span class="linenos">60</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="linenos">61</span> <span class="c1"># run a registered replanning environment</span> <span class="linenos">61</span> <span class="c1"># run a registered replanning environment</span>
<span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">62</span> <span class="n">example_run_replanning_env</span><span class="p">(</span><span class="n">env_name</span><span class="o">=</span><span class="s2">&quot;fancy_ProDMP/BoxPushingDenseReplan-v0&quot;</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">iterations</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="linenos">63</span> <span class="linenos">63</span>
<span class="linenos">64</span> <span class="c1"># run a custom replanning environment</span> <span class="linenos">64</span> <span class="c1"># run a custom replanning environment</span>
<span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="n">render</span><span class="p">)</span> <span class="linenos">65</span> <span class="n">example_custom_replanning_envs</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">iteration</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">render</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="linenos">66</span>
<span class="linenos">67</span><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="linenos">68</span> <span class="n">main</span><span class="p">()</span>
</pre></div> </pre></div>
</div> </div>
</section> </section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.envs &mdash; Fancy Gym 0.3.0 documentation</title> <title>fancy_gym.envs &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -39,7 +39,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.register &mdash; Fancy Gym 0.3.0 documentation</title> <title>fancy_gym.register &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>fancy_gym.upgrade &mdash; Fancy Gym 0.3.0 documentation</title> <title>fancy_gym.upgrade &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -40,7 +40,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -3,7 +3,7 @@
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Index &mdash; Fancy Gym 0.3.0 documentation</title> <title>Index &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" /> <link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -38,7 +38,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/> <img src="_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Basic Usage &mdash; Fancy Gym 0.3.0 documentation</title> <title>Basic Usage &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>What is Episodic RL? &mdash; Fancy Gym 0.3.0 documentation</title> <title>What is Episodic RL? &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Installation &mdash; Fancy Gym 0.3.0 documentation</title> <title>Installation &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@ -135,7 +135,7 @@ pip<span class="w"> </span>install<span class="w"> </span><span class="s1">&#39;
</div> </div>
<p>Pip can not automatically install up-to-date versions of metaworld, <p>Pip can not automatically install up-to-date versions of metaworld,
since they are not avaible on PyPI yet. Install metaworld via</p> since they are not avaible on PyPI yet. Install metaworld via</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
</pre></div> </pre></div>
</div> </div>
</section> </section>
@ -169,7 +169,7 @@ pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </s
</pre></div> </pre></div>
</div> </div>
<p>Metaworld has to be installed manually with</p> <p>Metaworld has to be installed manually with</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg<span class="o">=</span>metaworld <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg<span class="o">=</span>metaworld
</pre></div> </pre></div>
</div> </div>
</section> </section>

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Creating new MP Environments &mdash; Fancy Gym 0.3.0 documentation</title> <title>Creating new MP Environments &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/style.css" type="text/css" /> <link rel="stylesheet" href="../_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="../_static/icon.svg" class="logo" alt="Logo"/> <img src="../_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">

View File

@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" /> <meta charset="utf-8" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Fancy Gym &mdash; Fancy Gym 0.3.0 documentation</title> <title>Fancy Gym &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" /> <link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -40,7 +40,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/> <img src="_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="search.html" method="get">

Binary file not shown.

View File

@ -3,7 +3,7 @@
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Python Module Index &mdash; Fancy Gym 0.3.0 documentation</title> <title>Python Module Index &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" /> <link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/> <img src="_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get"> <form id="rtd-search-form" class="wy-form" action="search.html" method="get">

View File

@ -3,7 +3,7 @@
<head> <head>
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Search &mdash; Fancy Gym 0.3.0 documentation</title> <title>Search &mdash; Fancy Gym 0.2 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/style.css" type="text/css" /> <link rel="stylesheet" href="_static/style.css" type="text/css" />
@ -41,7 +41,7 @@
<img src="_static/icon.svg" class="logo" alt="Logo"/> <img src="_static/icon.svg" class="logo" alt="Logo"/>
</a> </a>
<div class="version"> <div class="version">
0.3.0 0.2
</div> </div>
<div role="search"> <div role="search">
<form id="rtd-search-form" class="wy-form" action="#" method="get"> <form id="rtd-search-form" class="wy-form" action="#" method="get">

File diff suppressed because one or more lines are too long

View File

@ -1,17 +1,13 @@
# This conf.py is in large parts inspired by the oen used by stable-baselines 3 # This conf.py is in large parts inspired by the oen used by stable-baselines 3
import toml
import datetime import datetime
project = 'Fancy Gym' project = 'Fancy Gym'
author = 'Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou' author = 'Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou'
copyright = f'2020-{datetime.date.today().year}, {author}' copyright = f'2020-{datetime.date.today().year}, {author}'
pyproject_content = toml.load("../../pyproject.toml") release = '0.2' # The full version, including alpha/beta/rc tags
proj_version = pyproject_content["project"]["version"] version = '0.2' # The short X.Y version
release = proj_version # The full version, including alpha/beta/rc tags
version = proj_version # The short X.Y version
extensions = [ extensions = [
'myst_parser', 'myst_parser',

View File

@ -18,12 +18,6 @@ A composite reward function serves as the performance metric for the RL system.
Variations of this environment are available, differing in reward structures and the optionality of randomizing the box's initial position. These variations are purposefully designed to challenge RL algorithms, enhancing their generalization and adaptation capabilities. Temporally sparse environments only provide a reward at the last timestep. Spatially sparse environments only provide a reward, if the goal is almost reached, the box is close enought to the goal and somewhat correctly aligned. Variations of this environment are available, differing in reward structures and the optionality of randomizing the box's initial position. These variations are purposefully designed to challenge RL algorithms, enhancing their generalization and adaptation capabilities. Temporally sparse environments only provide a reward at the last timestep. Spatially sparse environments only provide a reward, if the goal is almost reached, the box is close enought to the goal and somewhat correctly aligned.
These environments all provide smoothness metrics as part of the return infos:
- mean_squared_jerk: Averages the square of jerk (rate of acceleration change) across the motion. Lower values indicate smoother movement.
- maximum_jerk: Identifies the highest jerk value encountered.
- dimensionless_jerk: Normalizes the summed squared jerk over the motion's duration and peak velocity, offering a scale-independent metric of smoothness
| Name | Description | Horizon | Action Dimension | Observation Dimension | | Name | Description | Horizon | Action Dimension | Observation Dimension |
| ------------------------------------------ | -------------------------------------------------------------------- | ------- | ---------------- | --------------------- | | ------------------------------------------ | -------------------------------------------------------------------- | ------- | ---------------- | --------------------- |
| `fancy/BoxPushingDense-v0` | Custom Box-pushing task with dense rewards | 100 | 3 | 13 | | `fancy/BoxPushingDense-v0` | Custom Box-pushing task with dense rewards | 100 | 3 | 13 |
@ -55,9 +49,6 @@ Variations of the table tennis environment are available to cater to different r
| `fancy/TableTennisWind-v0` | Table Tennis task with wind effects, based on a custom environment for table tennis | 350 | 7 | 19 | | `fancy/TableTennisWind-v0` | Table Tennis task with wind effects, based on a custom environment for table tennis | 350 | 7 | 19 |
| `fancy/TableTennisGoalSwitching-v0` | Table Tennis task with goal switching, based on a custom environment for table tennis | 350 | 7 | 19 | | `fancy/TableTennisGoalSwitching-v0` | Table Tennis task with goal switching, based on a custom environment for table tennis | 350 | 7 | 19 |
| `fancy/TableTennisWindReplan-v0` | Table Tennis task with wind effects and replanning, based on a custom environment for table tennis | 350 | 7 | 19 | | `fancy/TableTennisWindReplan-v0` | Table Tennis task with wind effects and replanning, based on a custom environment for table tennis | 350 | 7 | 19 |
| `fancy/TableTennisRndRobot-v0` | Table Tennis task with random initial robot joint positions \* | 350 | 7 | 19 |
\* Random initialization of robot joint position and speed can be enabled by providing `random_pos_scale` / `random_vel_scale` to make. `TableTennisRndRobot` is equivalent to `TableTennis4D` except, that `random_pos_scale` is set to 0.1 instead of 0 per default.
--- ---
@ -98,9 +89,8 @@ A successful throw in this task is determined by the ball landing in the cup at
| `fancy/Reacher5dSparse-v0` | Sparse Reacher task with 5 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 5 | 20 | | `fancy/Reacher5dSparse-v0` | Sparse Reacher task with 5 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 5 | 20 |
| `fancy/Reacher7d-v0` | Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 | | `fancy/Reacher7d-v0` | Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 |
| `fancy/Reacher7dSparse-v0` | Sparse Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 | | `fancy/Reacher7dSparse-v0` | Sparse Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 |
| `fancy/HopperJump-v0` | Hopper Jump task with continuous rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* |
| `fancy/HopperJumpMarkov-v0` | `fancy/HopperJump-v0`, but with an alternative reward that is markovian. | 250 | 3 | 15 / 16\* |
| `fancy/HopperJumpSparse-v0` | Hopper Jump task with sparse rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* | | `fancy/HopperJumpSparse-v0` | Hopper Jump task with sparse rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* |
| `fancy/HopperJump-v0` | Hopper Jump task with continuous rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* |
| `fancy/AntJump-v0` | Ant Jump task, based on Gymnasium's `gym.envs.mujoco.Ant` | 200 | 8 | 119 | | `fancy/AntJump-v0` | Ant Jump task, based on Gymnasium's `gym.envs.mujoco.Ant` | 200 | 8 | 119 |
| `fancy/HalfCheetahJump-v0` | HalfCheetah Jump task, based on Gymnasium's `gym.envs.mujoco.HalfCheetah` | 100 | 6 | 112 | | `fancy/HalfCheetahJump-v0` | HalfCheetah Jump task, based on Gymnasium's `gym.envs.mujoco.HalfCheetah` | 100 | 6 | 112 |
| `fancy/HopperJumpOnBox-v0` | Hopper Jump on Box task, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 4 | 16 / 100\* | | `fancy/HopperJumpOnBox-v0` | Hopper Jump on Box task, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 4 | 16 / 100\* |

View File

@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via
.. code:: bash .. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
Installation from master Installation from master
~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~
@ -70,4 +70,4 @@ Metaworld has to be installed manually with
.. code:: bash .. code:: bash
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld

View File

@ -25,11 +25,10 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \
BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING
from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, TableTennisMarkov, \ from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \
MAX_EPISODE_STEPS_TABLE_TENNIS, MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER MAX_EPISODE_STEPS_TABLE_TENNIS
from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper as MPWrapper_TableTennis from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper as MPWrapper_TableTennis
from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper_Replan as MPWrapper_TableTennis_Replan from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper_Replan as MPWrapper_TableTennis_Replan
from .mujoco.table_tennis.mp_wrapper import TTRndRobot_MPWrapper as MPWrapper_TableTennis_Rnd
from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper as MPWrapper_TableTennis_VelObs from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper as MPWrapper_TableTennis_VelObs
from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper_Replan as MPWrapper_TableTennis_VelObs_Replan from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper_Replan as MPWrapper_TableTennis_VelObs_Replan
@ -136,19 +135,6 @@ register(
} }
) )
register(
id='fancy/HopperJumpMarkov-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpMarkovRew',
mp_wrapper=mujoco.hopper_jump.MPWrapper,
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"sparse": False,
"healthy_reward": 1.0,
"contact_weight": 0.0,
"height_weight": 3.0,
}
)
# TODO: Add [MPs] later when finished (old TODO I moved here during refactor) # TODO: Add [MPs] later when finished (old TODO I moved here during refactor)
register( register(
id='fancy/AntJump-v0', id='fancy/AntJump-v0',
@ -304,37 +290,6 @@ register(
} }
) )
register(
id='fancy/TableTennisRndRobot-v0',
entry_point='fancy_gym.envs.mujoco:TableTennisRandomInit',
mp_wrapper=MPWrapper_TableTennis_Rnd,
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS,
kwargs={
'random_pos_scale': 0.1,
'random_vel_scale': 0.0,
}
)
register(
id='fancy/TableTennisMarkov-v0',
mp_wrapper=MPWrapper_TableTennis,
entry_point='fancy_gym.envs.mujoco:TableTennisMarkov',
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER,
kwargs={
}
)
register(
id='fancy/TableTennisRndRobotMarkov-v0',
mp_wrapper=MPWrapper_TableTennis_Rnd,
entry_point='fancy_gym.envs.mujoco:TableTennisMarkov',
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER,
kwargs={
'random_pos_scale': 0.1,
'random_vel_scale': 0.0,
}
)
# Air Hockey environments # Air Hockey environments
for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]: for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]:
register( register(

View File

@ -1,14 +1,14 @@
from .ant_jump.ant_jump import AntJumpEnv from .ant_jump.ant_jump import AntJumpEnv
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvStepBasedEpisodicReward from .beerpong.beerpong import BeerPongEnv, BeerPongEnvStepBasedEpisodicReward
from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv
from .hopper_jump.hopper_jump import HopperJumpEnv, HopperJumpMarkovRew from .hopper_jump.hopper_jump import HopperJumpEnv
from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import HopperThrowEnv from .hopper_throw.hopper_throw import HopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv from .reacher.reacher import ReacherEnv
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, TableTennisMarkov, TableTennisRandomInit from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching
try: try:
from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv

View File

@ -115,7 +115,6 @@ class AntJumpEnv(AntEnvCustomXML):
contact_force_range=contact_force_range, contact_force_range=contact_force_range,
reset_noise_scale=reset_noise_scale, reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs) exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs)
self.render_active = False
def step(self, action): def step(self, action):
self.current_step += 1 self.current_step += 1
@ -154,15 +153,8 @@ class AntJumpEnv(AntEnvCustomXML):
} }
truncated = False truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return obs, reward, terminated, truncated, info return obs, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self): def _get_obs(self):
return np.append(super()._get_obs(), self.goal) return np.append(super()._get_obs(), self.goal)

View File

@ -44,7 +44,6 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
} }
def __init__(self, **kwargs): def __init__(self, **kwargs):
utils.EzPickle.__init__(self)
self._steps = 0 self._steps = 0
# Small Context -> Easier. Todo: Should we do different versions? # Small Context -> Easier. Todo: Should we do different versions?
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "beerpong_wo_cup.xml") # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "beerpong_wo_cup.xml")
@ -90,7 +89,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
observation_space=self.observation_space, observation_space=self.observation_space,
**kwargs **kwargs
) )
self.render_active = False utils.EzPickle.__init__(self)
@property @property
def start_pos(self): def start_pos(self):
@ -170,15 +169,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
truncated = False truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return ob, reward, terminated, truncated, infos return ob, reward, terminated, truncated, infos
def render(self):
self.render_active = True
return super().render()
def _get_obs(self): def _get_obs(self):
theta = self.data.qpos.flat[:7].copy() theta = self.data.qpos.flat[:7].copy()
theta_dot = self.data.qvel.flat[:7].copy() theta_dot = self.data.qvel.flat[:7].copy()

View File

@ -4,10 +4,8 @@ import numpy as np
from gymnasium import utils, spaces from gymnasium import utils, spaces
from gymnasium.envs.mujoco import MujocoEnv from gymnasium.envs.mujoco import MujocoEnv
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import calculate_jerk_profile, calculate_mean_squared_jerk, calculate_dimensionless_jerk, calculate_maximum_jerk
import mujoco import mujoco
@ -51,7 +49,6 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
self._desired_rod_quat = desired_rod_quat self._desired_rod_quat = desired_rod_quat
self._episode_energy = 0. self._episode_energy = 0.
self.velocity_profile = []
self.observation_space = spaces.Box( self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=(28,), dtype=np.float64 low=-np.inf, high=np.inf, shape=(28,), dtype=np.float64
@ -63,7 +60,6 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
frame_skip=self.frame_skip, frame_skip=self.frame_skip,
observation_space=self.observation_space, **kwargs) observation_space=self.observation_space, **kwargs)
self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
self.render_active = False
def step(self, action): def step(self, action):
action = 10 * np.clip(action, self.action_space.low, self.action_space.high) action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
@ -71,8 +67,6 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
unstable_simulation = False unstable_simulation = False
self.velocity_profile.append(self.data.qvel[:7].copy())
try: try:
self.do_simulation(resultant_action, self.frame_skip) self.do_simulation(resultant_action, self.frame_skip)
except Exception as e: except Exception as e:
@ -102,15 +96,11 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
obs = self._get_obs() obs = self._get_obs()
box_goal_pos_dist = 0. if not episode_end else np.linalg.norm(box_pos - target_pos) box_goal_pos_dist = 0. if not episode_end else np.linalg.norm(box_pos - target_pos)
box_goal_quat_dist = 0. if not episode_end else rotation_distance(box_quat, target_quat) box_goal_quat_dist = 0. if not episode_end else rotation_distance(box_quat, target_quat)
mean_squared_jerk, maximum_jerk, dimensionless_jerk = (0.0,0.0,0.0) if not episode_end else self.calculate_smoothness_metrics(np.array(self.velocity_profile), self.dt)
infos = { infos = {
'episode_end': episode_end, 'episode_end': episode_end,
'box_goal_pos_dist': box_goal_pos_dist, 'box_goal_pos_dist': box_goal_pos_dist,
'box_goal_rot_dist': box_goal_quat_dist, 'box_goal_rot_dist': box_goal_quat_dist,
'episode_energy': 0. if not episode_end else self._episode_energy, 'episode_energy': 0. if not episode_end else self._episode_energy,
'mean_squared_jerk': mean_squared_jerk,
'maximum_jerk': maximum_jerk,
'dimensionless_jerk': dimensionless_jerk,
'is_success': True if episode_end and box_goal_pos_dist < 0.05 and box_goal_quat_dist < 0.5 else False, 'is_success': True if episode_end and box_goal_pos_dist < 0.05 and box_goal_quat_dist < 0.5 else False,
'num_steps': self._steps 'num_steps': self._steps
} }
@ -118,35 +108,8 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
terminated = episode_end and infos['is_success'] terminated = episode_end and infos['is_success']
truncated = episode_end and not infos['is_success'] truncated = episode_end and not infos['is_success']
if self.render_active and self.render_mode=='human':
self.render()
return obs, reward, terminated, truncated, infos return obs, reward, terminated, truncated, infos
def render(self):
self.render_active = True
return super().render()
def calculate_smoothness_metrics(self, velocity_profile, dt):
"""
Calculates the smoothness metrics for the given velocity profile.
param velocity_profile: np.array
The array containing the movement velocity profile.
param dt: float
The sampling time interval of the data.
return mean_squared_jerk: float
The mean squared jerk estimate of the given movement's smoothness.
return maximum_jerk: float
The maximum jerk estimate of the given movement's smoothness.
return dimensionless_jerk: float
The dimensionless jerk estimate of the given movement's smoothness.
"""
jerk_profile = calculate_jerk_profile(velocity_profile, dt)
mean_squared_jerk = calculate_mean_squared_jerk(jerk_profile)
maximum_jerk = calculate_maximum_jerk(jerk_profile)
dimensionless_jerk = calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt)
return mean_squared_jerk, maximum_jerk, dimensionless_jerk
def reset_model(self): def reset_model(self):
# rest box to initial position # rest box to initial position
self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing) self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing)

View File

@ -51,19 +51,3 @@ def rot_to_quat(theta, axis):
quant[0] = np.sin(theta / 2.) quant[0] = np.sin(theta / 2.)
quant[1:] = np.cos(theta / 2.) * axis quant[1:] = np.cos(theta / 2.) * axis
return quant return quant
def calculate_jerk_profile(velocity_profile, dt):
jerk = np.diff(velocity_profile, 2, 0) / pow(dt, 2)
return jerk
def calculate_mean_squared_jerk(jerk_profile):
return np.mean(pow(jerk_profile, 2))
def calculate_maximum_jerk(jerk_profile):
return np.max(abs(jerk_profile))
def calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt):
sum_squared_jerk = np.sum(pow(jerk_profile, 2), 0)
duration = len(velocity_profile) * dt
peak_velocity = np.max(abs(velocity_profile), 0)
return np.mean(sum_squared_jerk * pow(duration, 3) / pow(peak_velocity, 2))

View File

@ -60,11 +60,7 @@ class HalfCheetahEnvCustomXML(HalfCheetahEnv):
default_camera_config=DEFAULT_CAMERA_CONFIG, default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs, **kwargs,
) )
self.render_active = False
def render(self):
self.render_active = True
return super().render()
class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML): class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
""" """
@ -124,9 +120,6 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
'max_height': self.max_height 'max_height': self.max_height
} }
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def _get_obs(self): def _get_obs(self):

View File

@ -88,12 +88,6 @@ class HopperEnvCustomXML(HopperEnv):
**kwargs, **kwargs,
) )
self.render_active = False
def render(self):
self.render_active = True
return super().render()
class HopperJumpEnv(HopperEnvCustomXML): class HopperJumpEnv(HopperEnvCustomXML):
""" """
@ -207,10 +201,6 @@ class HopperJumpEnv(HopperEnvCustomXML):
healthy=self.is_healthy, healthy=self.is_healthy,
contact_dist=self.contact_dist or 0 contact_dist=self.contact_dist or 0
) )
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def _get_obs(self): def _get_obs(self):
@ -272,100 +262,76 @@ class HopperJumpEnv(HopperEnvCustomXML):
return True return True
return False return False
class HopperJumpMarkovRew(HopperJumpEnv): # # TODO is that needed? if so test it
def step(self, action): # class HopperJumpStepEnv(HopperJumpEnv):
self._steps += 1 #
# def __init__(self,
self.do_simulation(action, self.frame_skip) # xml_file='hopper_jump.xml',
# forward_reward_weight=1.0,
height_after = self.get_body_com("torso")[2] # ctrl_cost_weight=1e-3,
# site_pos_after = self.data.get_site_xpos('foot_site') # healthy_reward=1.0,
site_pos_after = self.data.site('foot_site').xpos # height_weight=3,
self.max_height = max(height_after, self.max_height) # dist_weight=3,
# terminate_when_unhealthy=False,
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False # healthy_state_range=(-100.0, 100.0),
# healthy_z_range=(0.5, float('inf')),
if not self.init_floor_contact: # healthy_angle_range=(-float('inf'), float('inf')),
self.init_floor_contact = has_floor_contact # reset_noise_scale=5e-3,
if self.init_floor_contact and not self.has_left_floor: # exclude_current_positions_from_observation=False
self.has_left_floor = not has_floor_contact # ):
if not self.contact_with_floor and self.has_left_floor: #
self.contact_with_floor = has_floor_contact # self._height_weight = height_weight
# self._dist_weight = dist_weight
ctrl_cost = self.control_cost(action) # super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
costs = ctrl_cost # healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
terminated = False # exclude_current_positions_from_observation)
truncated = False #
# def step(self, action):
goal_dist = np.linalg.norm(site_pos_after - self.goal) # self._steps += 1
if self.contact_dist is None and self.contact_with_floor: #
self.contact_dist = goal_dist # self.do_simulation(action, self.frame_skip)
#
rewards = 0 # height_after = self.get_body_com("torso")[2]
if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP): # site_pos_after = self.data.site('foot_site').xpos.copy()
healthy_reward = self.healthy_reward # self.max_height = max(height_after, self.max_height)
distance_reward = -goal_dist * self._dist_weight #
height_reward = (self.max_height if self.sparse else height_after) * self._height_weight # ctrl_cost = self.control_cost(action)
contact_reward = -(self.contact_dist or 5) * self._contact_weight # healthy_reward = self.healthy_reward
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward) # height_reward = self._height_weight * height_after
# goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
observation = self._get_obs() # goal_dist_reward = -self._dist_weight * goal_dist
# dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
# While loop to simulate the process after jump to make the task Markovian #
if self.sparse and self.has_left_floor: # rewards = dist_reward + healthy_reward
while self._steps < MAX_EPISODE_STEPS_HOPPERJUMP: # costs = ctrl_cost
# Simulate to the end of the episode # done = False
self._steps += 1 #
# # This is only for logging the distance to goal when first having the contact
try: # has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
self.do_simulation(np.zeros_like(action), self.frame_skip) #
except Exception as e: # if not self.init_floor_contact:
print(e) # self.init_floor_contact = has_floor_contact
# if self.init_floor_contact and not self.has_left_floor:
height_after = self.get_body_com("torso")[2] # self.has_left_floor = not has_floor_contact
#site_pos_after = self.data.get_site_xpos('foot_site') # if not self.contact_with_floor and self.has_left_floor:
site_pos_after = self.data.site('foot_site').xpos # self.contact_with_floor = has_floor_contact
self.max_height = max(height_after, self.max_height) #
# if self.contact_dist is None and self.contact_with_floor:
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False # self.contact_dist = goal_dist
#
if not self.init_floor_contact: # ##############################################################
self.init_floor_contact = has_floor_contact #
if self.init_floor_contact and not self.has_left_floor: # observation = self._get_obs()
self.has_left_floor = not has_floor_contact # reward = rewards - costs
if not self.contact_with_floor and self.has_left_floor: # info = {
self.contact_with_floor = has_floor_contact # 'height': height_after,
# 'x_pos': site_pos_after,
ctrl_cost = self.control_cost(action) # 'max_height': copy.copy(self.max_height),
costs = ctrl_cost # 'goal': copy.copy(self.goal),
done = False # 'goal_dist': goal_dist,
# 'height_rew': height_reward,
goal_dist = np.linalg.norm(site_pos_after - self.goal) # 'healthy_reward': healthy_reward,
if self.contact_dist is None and self.contact_with_floor: # 'healthy': copy.copy(self.is_healthy),
self.contact_dist = goal_dist # 'contact_dist': copy.copy(self.contact_dist) or 0
# }
rewards = 0 # return observation, reward, done, info
# Task has reached the end, compute the sparse reward
done = True
healthy_reward = self.healthy_reward
distance_reward = -goal_dist * self._dist_weight
height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
contact_reward = -(self.contact_dist or 5) * self._contact_weight
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
reward = rewards - costs
info = dict(
height=height_after,
x_pos=site_pos_after,
max_height=self.max_height,
goal=self.goal[:1],
goal_dist=goal_dist,
height_rew=self.max_height,
healthy_reward=self.healthy_reward,
healthy=self.is_healthy,
contact_dist=self.contact_dist or 0,
num_steps=self._steps,
has_left_floor=self.has_left_floor
)
return observation, reward, terminated, truncated, info

View File

@ -140,9 +140,6 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML):
truncated = self.current_step >= self.max_episode_steps and not terminated truncated = self.current_step >= self.max_episode_steps and not terminated
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def _get_obs(self): def _get_obs(self):

View File

@ -61,8 +61,6 @@ class HopperThrowEnv(HopperEnvCustomXML):
exclude_current_positions_from_observation=exclude_current_positions_from_observation, exclude_current_positions_from_observation=exclude_current_positions_from_observation,
**kwargs) **kwargs)
self.render_active = False
def step(self, action): def step(self, action):
self.current_step += 1 self.current_step += 1
self.do_simulation(action, self.frame_skip) self.do_simulation(action, self.frame_skip)
@ -96,15 +94,8 @@ class HopperThrowEnv(HopperEnvCustomXML):
} }
truncated = False truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self): def _get_obs(self):
return np.append(super()._get_obs(), self.goal) return np.append(super()._get_obs(), self.goal)

View File

@ -68,7 +68,6 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
reset_noise_scale=reset_noise_scale, reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation, exclude_current_positions_from_observation=exclude_current_positions_from_observation,
**kwargs) **kwargs)
self.render_active = False
def step(self, action): def step(self, action):
@ -119,15 +118,8 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
} }
truncated = False truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self): def _get_obs(self):
return np.append(super()._get_obs(), self.basket_x) return np.append(super()._get_obs(), self.basket_x)

View File

@ -47,8 +47,6 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
**kwargs **kwargs
) )
self.render_active = False
def step(self, action): def step(self, action):
self._steps += 1 self._steps += 1
@ -79,15 +77,8 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
goal=self.goal if hasattr(self, "goal") else None goal=self.goal if hasattr(self, "goal") else None
) )
if self.render_active and self.render_mode=='human':
self.render()
return ob, reward, terminated, truncated, info return ob, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def distance_reward(self): def distance_reward(self):
vec = self.get_body_com("fingertip") - self.get_body_com("target") vec = self.get_body_com("fingertip") - self.get_body_com("target")
return -self._reward_weight * np.linalg.norm(vec) return -self._reward_weight * np.linalg.norm(vec)

View File

@ -151,15 +151,3 @@ class TTVelObs_MPWrapper_Replan(TT_MPWrapper_Replan):
[True] * 2, # target landing position [True] * 2, # target landing position
# [True] * 1, # time # [True] * 1, # time
]) ])
class TTRndRobot_MPWrapper(TT_MPWrapper):
@property
def context_mask(self):
return np.hstack([
[True] * 7, # joints position
[False] * 7, # joints velocity
[True] * 2, # position ball x, y
[False] * 1, # position ball z
[True] * 2, # target landing position
# [True] * 1, # time
])

View File

@ -5,12 +5,11 @@ from gymnasium import utils, spaces
from gymnasium.envs.mujoco import MujocoEnv from gymnasium.envs.mujoco import MujocoEnv
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, jnt_vel_low, jnt_vel_high from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high
import mujoco import mujoco
MAX_EPISODE_STEPS_TABLE_TENNIS = 350 MAX_EPISODE_STEPS_TABLE_TENNIS = 350
MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER = 300
CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]]) CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]])
CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65], CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65],
@ -19,9 +18,6 @@ CONTEXT_BOUNDS_SWICHING = np.array([[-1.0, -0.65, -1.0, 0.],
[-0.2, 0.65, -0.2, 0.65]]) [-0.2, 0.65, -0.2, 0.65]])
DEFAULT_ROBOT_INIT_POS = np.array([0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 1.5])
DEFAULT_ROBOT_INIT_VEL = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
class TableTennisEnv(MujocoEnv, utils.EzPickle): class TableTennisEnv(MujocoEnv, utils.EzPickle):
""" """
7 DoF table tennis environment 7 DoF table tennis environment
@ -38,11 +34,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
goal_switching_step: int = None, goal_switching_step: int = None,
enable_artificial_wind: bool = False, enable_artificial_wind: bool = False, **kwargs):
random_pos_scale: float = 0.0,
random_vel_scale: float = 0.0,
**kwargs,
):
utils.EzPickle.__init__(**locals()) utils.EzPickle.__init__(**locals())
self._steps = 0 self._steps = 0
@ -56,10 +48,6 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
self._id_set = False self._id_set = False
# initial robot state
self._random_pos_scale = random_pos_scale
self._random_vel_scale = random_vel_scale
# reward calculation # reward calculation
self.ball_landing_pos = None self.ball_landing_pos = None
self._goal_pos = np.zeros(2) self._goal_pos = np.zeros(2)
@ -83,8 +71,6 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
observation_space=self.observation_space, observation_space=self.observation_space,
**kwargs) **kwargs)
self.render_active = False
if ctxt_dim == 2: if ctxt_dim == 2:
self.context_bounds = CONTEXT_BOUNDS_2DIMS self.context_bounds = CONTEXT_BOUNDS_2DIMS
elif ctxt_dim == 4: elif ctxt_dim == 4:
@ -170,17 +156,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
"num_steps": self._steps, "num_steps": self._steps,
} }
terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS terminated, truncated = self._terminated, False
if self.render_active and self.render_mode=='human':
self.render()
return self._get_obs(), reward, terminated, truncated, info return self._get_obs(), reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _contact_checker(self, id_1, id_2): def _contact_checker(self, id_1, id_2):
for coni in range(0, self.data.ncon): for coni in range(0, self.data.ncon):
con = self.data.contact[coni] con = self.data.contact[coni]
@ -188,17 +167,6 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
return True return True
return False return False
def get_initial_robot_state(self):
robot_init_pos = DEFAULT_ROBOT_INIT_POS + \
self.np_random.uniform(-1.0, 1.0, size=7) *\
np.array([5.2, 4.0, 5.6, 4.0, 6.1, 3.2, 4.4]) *\
self._random_pos_scale
robot_init_vel = DEFAULT_ROBOT_INIT_VEL + self.np_random.uniform(-1.0, 1.0, size=7) * self._random_vel_scale
return np.clip(robot_init_pos, jnt_pos_low, jnt_pos_high), np.clip(robot_init_vel, jnt_vel_low, jnt_vel_high)
def reset_model(self): def reset_model(self):
self._steps = 0 self._steps = 0
self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False) self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False)
@ -215,10 +183,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
robot_init_pos, robot_init_vel = self.get_initial_robot_state() self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5])
self.data.qvel[:7] = np.zeros(7)
self.data.qpos[:7] = robot_init_pos
self.data.qvel[:7] = robot_init_vel
mujoco.mj_forward(self.model, self.data) mujoco.mj_forward(self.model, self.data)
@ -291,7 +257,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs, tau_bound, delay_bound): def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs, tau_bound, delay_bound):
obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj
penalty = self._get_traj_invalid_penalty(action, pos_traj, tau_bound, delay_bound) penalty = self._get_traj_invalid_penalty(action, pos_traj, tau_bound, delay_bound)
return obs, penalty, False, True, { return obs, penalty, True, False, {
"hit_ball": [False], "hit_ball": [False],
"ball_returned_success": [False], "ball_returned_success": [False],
"land_dist_error": [10.], "land_dist_error": [10.],
@ -308,179 +274,6 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
return False, pos_traj, vel_traj return False, pos_traj, vel_traj
return True, pos_traj, vel_traj return True, pos_traj, vel_traj
class TableTennisMarkov(TableTennisEnv):
def _get_reward2(self, hit_now, land_now):
# Phase 1 not hit ball
if not self._hit_ball:
# Not hit ball
min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1))
return 0.005 * (1 - np.tanh(min_r_b_dist**2))
# Phase 2 hit ball now
elif self._hit_ball and hit_now:
return 2
# Phase 3 hit ball already and not land yet
elif self._hit_ball and self._ball_landing_pos is None:
min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1))
return 0.02 * (1 - np.tanh(min_b_des_b_dist**2))
# Phase 4 hit ball already and land now
elif self._hit_ball and land_now:
over_net_bonus = int(self._ball_landing_pos[0] < 0)
min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2])
return 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
# Phase 5 hit ball already and land already
elif self._hit_ball and not land_now and self._ball_landing_pos is not None:
return 0
else:
raise NotImplementedError
def _get_reward(self, terminated):
# if not terminated:
# return 0
min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1))
if not self._hit_ball:
# Not hit ball
return 0.2 * (1 - np.tanh(min_r_b_dist**2))
elif self._ball_landing_pos is None:
# Hit ball but not landing pos
min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1))
return 2 + (1 - np.tanh(min_b_des_b_dist**2))
else:
# Hit ball and land
min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2])
over_net_bonus = int(self._ball_landing_pos[0] < 0)
return 2 + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
def _get_traj_invalid_penalty(self, action, pos_traj, tau_bound, delay_bound):
tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]]))
delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]]))
violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0))
violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0))
invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \
violate_high_bound_error + violate_low_bound_error
return -invalid_penalty
def get_invalid_traj_step_penalty(self, pos_traj):
violate_high_bound_error = (
np.maximum(pos_traj - jnt_pos_high, 0).mean())
violate_low_bound_error = (
np.maximum(jnt_pos_low - pos_traj, 0).mean())
invalid_penalty = violate_high_bound_error + violate_low_bound_error
def _update_game_state(self, action):
for _ in range(self.frame_skip):
if self._enable_artificial_wind:
self.data.qfrc_applied[-2] = self._artificial_force
try:
self.do_simulation(action, 1)
except Exception as e:
print("Simulation get unstable return with MujocoException: ", e)
unstable_simulation = True
self._terminated = True
break
# Update game state
if not self._terminated:
if not self._hit_ball:
self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \
self._contact_checker(self._ball_contact_id, self._bat_back_id)
if not self._hit_ball:
ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id)
if ball_land_on_floor_no_hit:
self._ball_landing_pos = self.data.body("target_ball").xpos.copy()
self._terminated = True
if self._hit_ball and not self._ball_contact_after_hit:
if self._contact_checker(self._ball_contact_id, self._floor_contact_id): # first check contact with floor
self._ball_contact_after_hit = True
self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
self._terminated = True
elif self._contact_checker(self._ball_contact_id, self._table_contact_id): # second check contact with table
self._ball_contact_after_hit = True
self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
if self._ball_landing_pos[0] < 0.: # ball lands on the opponent side
self._ball_return_success = True
self._terminated = True
# update ball trajectory & racket trajectory
self._ball_traj.append(self.data.body("target_ball").xpos.copy())
self._racket_traj.append(self.data.geom("bat").xpos.copy())
def ball_racket_contact(self):
return self._contact_checker(self._ball_contact_id, self._bat_front_id) or \
self._contact_checker(self._ball_contact_id, self._bat_back_id)
def step(self, action):
if not self._id_set:
self._set_ids()
unstable_simulation = False
hit_already = self._hit_ball
if self._steps == self._goal_switching_step and self.np_random.uniform() < 0.5:
new_goal_pos = self._generate_goal_pos(random=True)
new_goal_pos[1] = -new_goal_pos[1]
self._goal_pos = new_goal_pos
self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
mujoco.mj_forward(self.model, self.data)
self._update_game_state(action)
self._steps += 1
obs = self._get_obs()
# Compute reward
if unstable_simulation:
reward = -25
else:
# reward = self._get_reward(self._terminated)
# hit_now = not hit_already and self._hit_ball
hit_finish = self._hit_ball and not self.ball_racket_contact()
if hit_finish:
# Clean the ball and racket traj before hit
self._ball_traj = []
self._racket_traj = []
# Simulate the rest of the traj
reward = self._get_reward2(True, False)
while self._steps < MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER:
land_already = self._ball_landing_pos is not None
self._update_game_state(np.zeros_like(action))
self._steps += 1
land_now = (not land_already
and self._ball_landing_pos is not None)
temp_reward = self._get_reward2(False, land_now)
# print(temp_reward)
reward += temp_reward
# Uncomment the line below to visualize the sim after hit
# self.render(mode="human")
else:
reward = self._get_reward2(False, False)
# Update ball landing error
land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \
if self._ball_landing_pos is not None else 10.
info = {
"hit_ball": self._hit_ball,
"ball_returned_success": self._ball_return_success,
"land_dist_error": land_dist_err,
"is_success": self._ball_return_success and land_dist_err < 0.2,
"num_steps": self._steps,
}
terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER
return obs, reward, terminated, truncated, info
class TableTennisWind(TableTennisEnv): class TableTennisWind(TableTennisEnv):
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, **kwargs): def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, **kwargs):
@ -503,17 +296,7 @@ class TableTennisWind(TableTennisEnv):
]) ])
return obs return obs
class TableTennisGoalSwitching(TableTennisEnv): class TableTennisGoalSwitching(TableTennisEnv):
def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99, **kwargs): def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99, **kwargs):
super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step, **kwargs) super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step, **kwargs)
class TableTennisRandomInit(TableTennisEnv):
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
random_pos_scale: float = 1.0,
random_vel_scale: float = 0.0,
**kwargs):
super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip,
random_pos_scale=random_pos_scale,
random_vel_scale=random_vel_scale,
**kwargs)

View File

@ -2,10 +2,6 @@ import numpy as np
jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
jnt_vel_low = np.ones(7) * -7
jnt_vel_high = np.ones(7) * 7
delay_bound = [0.05, 0.15] delay_bound = [0.05, 0.15]
tau_bound = [0.5, 1.5] tau_bound = [0.5, 1.5]

View File

@ -79,8 +79,6 @@ class Walker2dEnvCustomXML(Walker2dEnv):
**kwargs, **kwargs,
) )
self.render_active = False
class Walker2dJumpEnv(Walker2dEnvCustomXML): class Walker2dJumpEnv(Walker2dEnvCustomXML):
""" """
@ -147,15 +145,8 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML):
} }
truncated = False truncated = False
if self.render_active and self.render_mode=='human':
self.render()
return observation, reward, terminated, truncated, info return observation, reward, terminated, truncated, info
def render(self):
self.render_active = True
return super().render()
def _get_obs(self): def _get_obs(self):
return np.append(super()._get_obs(), self.goal) return np.append(super()._get_obs(), self.goal)

View File

@ -3,14 +3,14 @@ import fancy_gym
def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False): def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False):
env = gym.make(env_name, render_mode='human' if render else None) env = gym.make(env_name)
env.reset(seed=seed) env.reset(seed=seed)
for i in range(iterations): for i in range(iterations):
while True: while True:
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(ac) obs, reward, terminated, truncated, info = env.step(ac)
if render: if render:
env.render() env.render(mode="human")
if terminated or truncated: if terminated or truncated:
env.reset() env.reset()
break break
@ -38,13 +38,13 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True):
'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0,
'condition_on_desired': True} 'condition_on_desired': True}
base_env = gym.make(base_env_id, render_mode='human' if render else None) base_env = gym.make(base_env_id)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs, env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs,
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed) seed=seed)
if render: if render:
env.render() env.render(mode="human")
obs = env.reset() obs = env.reset()

View File

@ -17,7 +17,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T
Returns: Returns:
""" """
env = gym.make(env_id, render_mode='human' if render else None) env = gym.make(env_id)
rewards = 0 rewards = 0
obs = env.reset(seed=seed) obs = env.reset(seed=seed)
print("observation shape:", env.observation_space.shape) print("observation shape:", env.observation_space.shape)
@ -26,7 +26,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T
for i in range(iterations): for i in range(iterations):
ac = env.action_space.sample() ac = env.action_space.sample()
if render: if render:
env.render() env.render(mode="human")
obs, reward, terminated, truncated, info = env.step(ac) obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward rewards += reward
@ -84,7 +84,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# basis_generator_kwargs = {'basis_generator_type': 'rbf', # basis_generator_kwargs = {'basis_generator_type': 'rbf',
# 'num_basis': 5 # 'num_basis': 5
# } # }
base_env = gym.make(base_env_id, render_mode='human' if render else None) base_env = gym.make(base_env_id)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
@ -96,7 +96,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# It is also possible to change them mode multiple times when # It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed. # e.g. only every nth trajectory should be displayed.
if render: if render:
env.render() env.render(mode="human")
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
@ -115,7 +115,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
env.close() env.close()
del env del env
def main(render = False): def main(render = True):
# # Standard DMC Suite tasks # # Standard DMC Suite tasks
example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render)
# #

View File

@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
""" """
env = gym.make(env_id, render_mode='human' if render else None) env = gym.make(env_id)
rewards = 0 rewards = 0
obs = env.reset(seed=seed) obs = env.reset(seed=seed)
print("Observation shape: ", env.observation_space.shape) print("Observation shape: ", env.observation_space.shape)
@ -85,7 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16),
# do not return values above threshold # do not return values above threshold
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
def main(render = False): def main(render = True):
# Basic gym task # Basic gym task
example_general("Pendulum-v1", seed=10, iterations=200, render=render) example_general("Pendulum-v1", seed=10, iterations=200, render=render)

View File

@ -2,7 +2,7 @@ import gymnasium as gym
import fancy_gym import fancy_gym
def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, render=True): def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True):
""" """
Example for running a MetaWorld based env in the step based setting. Example for running a MetaWorld based env in the step based setting.
The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
@ -18,7 +18,7 @@ def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, re
Returns: Returns:
""" """
env = gym.make(env_id, render_mode='human' if render else None) env = gym.make(env_id)
rewards = 0 rewards = 0
obs = env.reset(seed=seed) obs = env.reset(seed=seed)
print("observation shape:", env.observation_space.shape) print("observation shape:", env.observation_space.shape)
@ -27,7 +27,9 @@ def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, re
for i in range(iterations): for i in range(iterations):
ac = env.action_space.sample() ac = env.action_space.sample()
if render: if render:
env.render() # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
# TODO: Remove this, when Metaworld fixes its interface.
env.render(False)
obs, reward, terminated, truncated, info = env.step(ac) obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward rewards += reward
if terminated or truncated: if terminated or truncated:
@ -79,7 +81,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
basis_generator_kwargs = {'basis_generator_type': 'rbf', basis_generator_kwargs = {'basis_generator_type': 'rbf',
'num_basis': 5 'num_basis': 5
} }
base_env = gym.make(base_env_id, render_mode='human' if render else None) base_env = gym.make(base_env_id)
env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
@ -91,7 +93,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True):
# It is also possible to change them mode multiple times when # It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed. # e.g. only every nth trajectory should be displayed.
if render: if render:
env.render() env.render(mode="human")
rewards = 0 rewards = 0
obs = env.reset(seed=seed) obs = env.reset(seed=seed)

View File

@ -13,13 +13,15 @@ def example_mp(env_name, seed=1, render=True):
Returns: Returns:
""" """
env = gym.make(env_name, render_mode='human' if render else None) env = gym.make(env_name)
returns = 0 returns = 0
obs = env.reset(seed=seed) obs = env.reset(seed=seed)
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(10): for i in range(10):
if render and i % 2 == 0: if render and i % 2 == 0:
env.render(mode="human")
else:
env.render() env.render()
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(ac) obs, reward, terminated, truncated, info = env.step(ac)

View File

@ -52,7 +52,7 @@ class FixMetaworldIgnoresSeedOnResetWrapper(gym.Wrapper, gym.utils.RecordConstru
def reset(self, **kwargs): def reset(self, **kwargs):
if 'seed' in kwargs: if 'seed' in kwargs:
print('[Fancy Gym] You just called .reset on a Metaworld env and supplied a seed. Metaworld curretly does not correctly implement seeding. Do not rely on deterministic behavior.') print('[!] You just called .reset on a Metaworld env and supplied a seed. Metaworld curretly does not correctly implement seeding. Do not rely on deterministic behavior.')
self.env.seed(kwargs['seed']) self.env.seed(kwargs['seed'])
return self.env.reset(**kwargs) return self.env.reset(**kwargs)

View File

@ -1,6 +1,6 @@
[project] [project]
name = "fancy_gym" name = "fancy_gym"
version = "0.3.0" version = "0.1.4"
description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches." description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches."
readme = "README.md" readme = "README.md"
authors = [ authors = [
@ -26,7 +26,6 @@ classifiers = [
] ]
dependencies = [ dependencies = [
"toml",
"mp_pytorch<=0.1.3", "mp_pytorch<=0.1.3",
"mujoco==2.3.3", "mujoco==2.3.3",
"gymnasium[mujoco]>=0.26.0" "gymnasium[mujoco]>=0.26.0"
@ -41,7 +40,7 @@ requires-python = ">=3.7"
#"Repository" = "https://github.com/ALRhub/fancy_gym/" #"Repository" = "https://github.com/ALRhub/fancy_gym/"
[build-system] [build-system]
requires = ["setuptools>=61.0.0", "wheel", "toml"] requires = ["setuptools>=61.0.0", "wheel"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project.optional-dependencies] [project.optional-dependencies]

View File

@ -1,6 +1,5 @@
# We still provide a setup.py for backwards compatability. # We still provide a setup.py for backwards compatability.
# But the pyproject.toml should be prefered. # But the pyproject.toml should be prefered.
import toml
import itertools import itertools
from pathlib import Path from pathlib import Path
from typing import List from typing import List
@ -9,9 +8,6 @@ from setuptools import setup, find_packages
print('[!] You are currently installing/building fancy_gym via setup.py. This is only provided for backwards-compatability. Please use the pyproject.toml instead.') print('[!] You are currently installing/building fancy_gym via setup.py. This is only provided for backwards-compatability. Please use the pyproject.toml instead.')
pyproject_content = toml.load("pyproject.toml")
project_version = pyproject_content["project"]["version"]
# Environment-specific dependencies for dmc and metaworld # Environment-specific dependencies for dmc and metaworld
extras = { extras = {
'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'], 'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'],
@ -42,7 +38,7 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]:
setup( setup(
author='Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou', author='Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou',
name='fancy_gym', name='fancy_gym',
version=project_version, version='0.1.0',
classifiers=[ classifiers=[
'Development Status :: 4 - Beta', 'Development Status :: 4 - Beta',
'Intended Audience :: Science/Research', 'Intended Audience :: Science/Research',
@ -59,7 +55,6 @@ setup(
], ],
extras_require=extras, extras_require=extras,
install_requires=[ install_requires=[
'toml',
'mp_pytorch<=0.1.3', 'mp_pytorch<=0.1.3',
'mujoco==2.3.3', 'mujoco==2.3.3',
'gymnasium[mujoco]>=0.26.0' 'gymnasium[mujoco]>=0.26.0'