diff --git a/.github/workflows/ensure-release-tagged.yaml b/.github/workflows/ensure-release-tagged.yaml deleted file mode 100644 index e58e31e..0000000 --- a/.github/workflows/ensure-release-tagged.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: Ensure Tagged Commits on Release - -on: - pull_request: - branches: - - release - -jobs: - check_tag: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Check if base commit of PR is tagged - run: | - BASE_COMMIT=$(jq -r .pull_request.base.sha < "$GITHUB_EVENT_PATH") - TAG=$(git tag --contains $BASE_COMMIT) - if [ -z "$TAG" ]; then - echo "Base commit of PR is not tagged. PRs onto release must be tagged with the version number." - exit 1 - fi - echo "Base commit of PR is tagged. Check passed." - diff --git a/.github/workflows/ensure-version-consistency.yaml b/.github/workflows/ensure-version-consistency.yaml new file mode 100644 index 0000000..c3c85df --- /dev/null +++ b/.github/workflows/ensure-version-consistency.yaml @@ -0,0 +1,52 @@ +name: Ensure Version Consistency on PR to Release + +on: + pull_request: + branches: + - release + +jobs: + check_version_and_tag: + runs-on: ubuntu-latest + strategy: + fail-fast: true # Terminate the job immediately if any step fails + steps: + - name: Check out code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Necessary to fetch all tags for comparison + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install toml + + - name: Extract version from pyproject.toml + run: | + echo "Extracting version from pyproject.toml" + VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])') + echo "Version in pyproject.toml is $VERSION" + echo "VERSION=$VERSION" >> $GITHUB_ENV + + - name: Get tag for the PR's head commit + run: | + PR_HEAD_SHA=$(jq -r .pull_request.head.sha < "$GITHUB_EVENT_PATH") + TAG=$(git tag --contains $PR_HEAD_SHA) + echo "Tag on PR's head commit is $TAG" + echo "TAG=$TAG" >> $GITHUB_ENV + + - name: Compare version and tag + run: | + if [ -z "$TAG" ]; then + echo "Head commit of PR is not tagged. Ensure the head commit of PRs onto release is tagged with the version number." + exit 1 + elif [ "$VERSION" != "$TAG" ]; then + echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG)." + exit 1 + else + echo "Version and git tag match. Check passed." + fi diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 1b5fa08..902cec5 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -8,6 +8,8 @@ on: jobs: publish: name: Publish to PyPI + strategy: + fail-fast: true # Terminate the job immediately if any step fails runs-on: ubuntu-latest steps: - name: Check out code @@ -15,19 +17,24 @@ jobs: with: fetch-depth: 0 # This fetches all history for all branches and tags - - name: Check if commit is tagged + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Validate version against tag run: | + VERSION=$(python -c 'import toml; print(toml.load("pyproject.toml")["project"]["version"])') TAG=$(git tag --contains HEAD) if [ -z "$TAG" ]; then echo "Commit is not tagged. Failing the workflow." exit 1 fi - echo "Commit is tagged. Proceeding with the workflow." - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.x" + if [ "$VERSION" != "$TAG" ]; then + echo "Version in pyproject.toml ($VERSION) does not match the git tag ($TAG). Failing the workflow." + exit 1 + fi + echo "Version and commit tag match. Proceeding with the workflow." - name: Install pypa/build/setuptools/twine run: >- @@ -36,9 +43,6 @@ jobs: build setuptools twine --user - - name: Prevent fallback onto setup.py - run: rm setup.py - - name: Build a binary wheel and a source tarball run: python3 -m build diff --git a/README.md b/README.md index af6d2d3..73dce66 100644 --- a/README.md +++ b/README.md @@ -10,25 +10,25 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org) (a mainta **Key Features**: -- **New Challenging Environments**: `fancy_gym` includes several new environments ([Panda Box Pushing](https://dominik-roth.eu/fancy/envs/fancy/mujoco.html#box-pushing), [Table Tennis](https://dominik-roth.eu/fancy/envs/fancy/mujoco.html#table-tennis), [etc.](https://dominik-roth.eu/fancy/envs/fancy/index.html)) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. +- **New Challenging Environments**: `fancy_gym` includes several new environments ([Panda Box Pushing](https://alrhub.github.io/fancy_gym/envs/fancy/mujoco.html#box-pushing), [Table Tennis](https://alrhub.github.io/fancy_gym/envs/fancy/mujoco.html#table-tennis), [etc.](https://alrhub.github.io/fancy_gym/envs/fancy/index.html)) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. - **Support for Movement Primitives**: `fancy_gym` supports a range of movement primitives (MPs), including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP). - **Upgrade to Movement Primitives**: With our framework, it’s straightforward to transform standard Gymnasium environments into environments that support movement primitives. -- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](dominik-roth.eu/fancy/envs/dmc.html) - and [Metaworld](https://dominik-roth.eu/fancy/envs/meta.html), whether you want to use them in the regular step-based setting or using MPs. -- **Contribute Your Own Environments**: If you’re inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://dominik-roth.eu/fancy/guide/upgrading_envs.html) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`. +- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://alrhub.github.io/fancy_gym/envs/dmc.html) + and [Metaworld](https://alrhub.github.io/fancy_gym/envs/meta.html), whether you want to use them in the regular step-based setting or using MPs. +- **Contribute Your Own Environments**: If you’re inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://alrhub.github.io/fancy_gym/guide/upgrading_envs.html) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`. ## Quickstart Guide | ⚠ We recommend installing `fancy_gym` into a virtual environment as provided by [venv](https://docs.python.org/3/library/venv.html), [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/). | | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -Install via pip [or use an alternative installation method](https://dominik-roth.eu/fancy/guide/installation.html) +Install via pip [or use an alternative installation method](https://alrhub.github.io/fancy_gym/guide/installation.html) ```bash pip install 'fancy_gym[all]' ``` -Try out one of our step-based environments [or explore our other envs](https://dominik-roth.eu/fancy/envs/fancy/index.html) +Try out one of our step-based environments [or explore our other envs](https://alrhub.github.io/fancy_gym/envs/fancy/index.html) ```python import gymnasium as gym @@ -48,7 +48,7 @@ Try out one of our step-based environments [or explore our other envs](https://d observation, info = env.reset() ``` -Explore the MP-based variant [or learn more about Movement Primitives (MPs)](https://dominik-roth.eu/fancy/guide/episodic_rl.html) +Explore the MP-based variant [or learn more about Movement Primitives (MPs)](https://alrhub.github.io/fancy_gym/guide/episodic_rl.html) ```python import gymnasium as gym @@ -66,7 +66,7 @@ Explore the MP-based variant [or learn more about Movement Primitives (MPs)](htt ## Documentation -Documentation for `fancy_gym` can be found [here](https://dominik-roth.eu/fancy); Usage Examples can be found [here](https://dominik-roth.eu/fancy/examples/general.html). +Documentation for `fancy_gym` can be found [here](https://alrhub.github.io/fancy_gym/); Usage Examples can be found [here](https://alrhub.github.io/fancy_gym/examples/general.html). ## Citing the Project diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index 24c61da..ae5c3a7 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/examples/dmc.doctree b/docs/build/doctrees/examples/dmc.doctree index 958cf19..21fc3d1 100644 Binary files a/docs/build/doctrees/examples/dmc.doctree and b/docs/build/doctrees/examples/dmc.doctree differ diff --git a/docs/build/doctrees/examples/general.doctree b/docs/build/doctrees/examples/general.doctree index ffba9f8..8d0f367 100644 Binary files a/docs/build/doctrees/examples/general.doctree and b/docs/build/doctrees/examples/general.doctree differ diff --git a/docs/build/doctrees/examples/metaworld.doctree b/docs/build/doctrees/examples/metaworld.doctree index 481db99..7b82b77 100644 Binary files a/docs/build/doctrees/examples/metaworld.doctree and b/docs/build/doctrees/examples/metaworld.doctree differ diff --git a/docs/build/doctrees/examples/movement_primitives.doctree b/docs/build/doctrees/examples/movement_primitives.doctree index 5f08c93..c3dae92 100644 Binary files a/docs/build/doctrees/examples/movement_primitives.doctree and b/docs/build/doctrees/examples/movement_primitives.doctree differ diff --git a/docs/build/doctrees/examples/open_ai.doctree b/docs/build/doctrees/examples/open_ai.doctree index 303687b..c1321c3 100644 Binary files a/docs/build/doctrees/examples/open_ai.doctree and b/docs/build/doctrees/examples/open_ai.doctree differ diff --git a/docs/build/doctrees/examples/replanning_envs.doctree b/docs/build/doctrees/examples/replanning_envs.doctree index 75a3f46..5a6c386 100644 Binary files a/docs/build/doctrees/examples/replanning_envs.doctree and b/docs/build/doctrees/examples/replanning_envs.doctree differ diff --git a/docs/build/doctrees/guide/installation.doctree b/docs/build/doctrees/guide/installation.doctree index d1366cd..fa389f2 100644 Binary files a/docs/build/doctrees/guide/installation.doctree and b/docs/build/doctrees/guide/installation.doctree differ diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo index 9b8f8aa..b5dcb4b 100644 --- a/docs/build/html/.buildinfo +++ b/docs/build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 28ec069496fc0ad05c8b9641549626a6 +config: 36919d67c12a677d3f16f60d980b0313 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/html/_modules/fancy_gym/envs/registry.html b/docs/build/html/_modules/fancy_gym/envs/registry.html index 2955545..bdf26be 100644 --- a/docs/build/html/_modules/fancy_gym/envs/registry.html +++ b/docs/build/html/_modules/fancy_gym/envs/registry.html @@ -3,7 +3,7 @@ - fancy_gym.envs.registry — Fancy Gym 0.2 documentation + fancy_gym.envs.registry — Fancy Gym 0.3.0 documentation @@ -38,7 +38,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/_modules/index.html b/docs/build/html/_modules/index.html index 0bd248e..e7cfe06 100644 --- a/docs/build/html/_modules/index.html +++ b/docs/build/html/_modules/index.html @@ -3,7 +3,7 @@ - Overview: module code — Fancy Gym 0.2 documentation + Overview: module code — Fancy Gym 0.3.0 documentation @@ -38,7 +38,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/_sources/guide/installation.rst.txt b/docs/build/html/_sources/guide/installation.rst.txt index 0077ba4..1c10973 100644 --- a/docs/build/html/_sources/guide/installation.rst.txt +++ b/docs/build/html/_sources/guide/installation.rst.txt @@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via .. code:: bash - pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld + pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld Installation from master ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -70,4 +70,4 @@ Metaworld has to be installed manually with .. code:: bash - pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld \ No newline at end of file + pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld diff --git a/docs/build/html/_static/documentation_options.js b/docs/build/html/_static/documentation_options.js index 4c7b1ee..44e1488 100644 --- a/docs/build/html/_static/documentation_options.js +++ b/docs/build/html/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '0.2', + VERSION: '0.3.0', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/build/html/api.html b/docs/build/html/api.html index 3545a26..262f31a 100644 --- a/docs/build/html/api.html +++ b/docs/build/html/api.html @@ -4,7 +4,7 @@ - API — Fancy Gym 0.2 documentation + API — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/envs/dmc.html b/docs/build/html/envs/dmc.html index 851815c..522efd8 100644 --- a/docs/build/html/envs/dmc.html +++ b/docs/build/html/envs/dmc.html @@ -4,7 +4,7 @@ - DeepMind Control (DMC) — Fancy Gym 0.2 documentation + DeepMind Control (DMC) — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/envs/fancy/airhockey.html b/docs/build/html/envs/fancy/airhockey.html index 27629cf..3fcdc19 100644 --- a/docs/build/html/envs/fancy/airhockey.html +++ b/docs/build/html/envs/fancy/airhockey.html @@ -4,7 +4,7 @@ - AirHockey — Fancy Gym 0.2 documentation + AirHockey — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/envs/fancy/classic_control.html b/docs/build/html/envs/fancy/classic_control.html index 09b2c23..d9d5a40 100644 --- a/docs/build/html/envs/fancy/classic_control.html +++ b/docs/build/html/envs/fancy/classic_control.html @@ -4,7 +4,7 @@ - Classic Control — Fancy Gym 0.2 documentation + Classic Control — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/envs/fancy/index.html b/docs/build/html/envs/fancy/index.html index 0091290..8b1418a 100644 --- a/docs/build/html/envs/fancy/index.html +++ b/docs/build/html/envs/fancy/index.html @@ -4,7 +4,7 @@ - Fancy — Fancy Gym 0.2 documentation + Fancy — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/envs/fancy/mujoco.html b/docs/build/html/envs/fancy/mujoco.html index e132bec..11d2273 100644 --- a/docs/build/html/envs/fancy/mujoco.html +++ b/docs/build/html/envs/fancy/mujoco.html @@ -4,7 +4,7 @@ - Mujoco — Fancy Gym 0.2 documentation + Mujoco — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/envs/meta.html b/docs/build/html/envs/meta.html index 1dc1d49..ffde63f 100644 --- a/docs/build/html/envs/meta.html +++ b/docs/build/html/envs/meta.html @@ -4,7 +4,7 @@ - Metaworld — Fancy Gym 0.2 documentation + Metaworld — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/envs/open_ai.html b/docs/build/html/envs/open_ai.html index 296a946..4182d6d 100644 --- a/docs/build/html/envs/open_ai.html +++ b/docs/build/html/envs/open_ai.html @@ -4,7 +4,7 @@ - Gymnasium — Fancy Gym 0.2 documentation + Gymnasium — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/examples/dmc.html b/docs/build/html/examples/dmc.html index 81c9feb..57ef9a1 100644 --- a/docs/build/html/examples/dmc.html +++ b/docs/build/html/examples/dmc.html @@ -4,7 +4,7 @@ - DeepMind Control Examples — Fancy Gym 0.2 documentation + DeepMind Control Examples — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
@@ -126,7 +126,7 @@ 17 Returns: 18 19 """ - 20 env = gym.make(env_id) + 20 env = gym.make(env_id, render_mode='human' if render else None) 21 rewards = 0 22 obs = env.reset(seed=seed) 23 print("observation shape:", env.observation_space.shape) @@ -135,7 +135,7 @@ 26 for i in range(iterations): 27 ac = env.action_space.sample() 28 if render: - 29 env.render(mode="human") + 29 env.render() 30 obs, reward, terminated, truncated, info = env.step(ac) 31 rewards += reward 32 @@ -193,58 +193,68 @@ 84 # basis_generator_kwargs = {'basis_generator_type': 'rbf', 85 # 'num_basis': 5 86 # } - 87 env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, - 88 traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, - 89 phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, - 90 seed=seed) - 91 - 92 # This renders the full MP trajectory - 93 # It is only required to call render() once in the beginning, which renders every consecutive trajectory. - 94 # Resetting to no rendering, can be achieved by render(mode=None). - 95 # It is also possible to change them mode multiple times when - 96 # e.g. only every nth trajectory should be displayed. - 97 if render: - 98 env.render(mode="human") - 99 -100 rewards = 0 -101 obs = env.reset() -102 -103 # number of samples/full trajectories (multiple environment steps) -104 for i in range(iterations): -105 ac = env.action_space.sample() -106 obs, reward, terminated, truncated, info = env.step(ac) -107 rewards += reward -108 -109 if terminated or truncated: -110 print(base_env_id, rewards) -111 rewards = 0 -112 obs = env.reset() -113 -114 env.close() -115 del env -116 + 87 base_env = gym.make(base_env_id, render_mode='human' if render else None) + 88 env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, + 89 traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + 90 phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + 91 seed=seed) + 92 + 93 # This renders the full MP trajectory + 94 # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + 95 # Resetting to no rendering, can be achieved by render(mode=None). + 96 # It is also possible to change them mode multiple times when + 97 # e.g. only every nth trajectory should be displayed. + 98 if render: + 99 env.render() +100 +101 rewards = 0 +102 obs = env.reset() +103 +104 # number of samples/full trajectories (multiple environment steps) +105 for i in range(iterations): +106 ac = env.action_space.sample() +107 obs, reward, terminated, truncated, info = env.step(ac) +108 rewards += reward +109 +110 if terminated or truncated: +111 print(base_env_id, rewards) +112 rewards = 0 +113 obs = env.reset() +114 +115 env.close() +116 del env 117 -118if __name__ == '__main__': -119 # Disclaimer: DMC environments require the seed to be specified in the beginning. -120 # Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics. -121 -122 # For rendering DMC -123 # export MUJOCO_GL="osmesa" -124 render = True -125 -126 # # Standard DMC Suite tasks -127 example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) -128 # -129 # # Manipulation tasks -130 # # Disclaimer: The vision versions are currently not integrated and yield an error -131 example_dmc("dm_control/manipulation-reach_site_features", seed=10, iterations=250, render=render) -132 # -133 # # Gym + DMC hybrid task provided in the MP framework -134 example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) -135 -136 # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is -137 # already registered above -138 example_custom_dmc_and_mp(seed=11, iterations=1, render=render) +118def main(render = False): +119 # # Standard DMC Suite tasks +120 example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) +121 # +122 # # Manipulation tasks +123 # # Disclaimer: The vision versions are currently not integrated and yield an error +124 example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render) +125 # +126 # # Gym + DMC hybrid task provided in the MP framework +127 example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) +128 +129 # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is +130 # already registered above +131 example_custom_dmc_and_mp(seed=11, iterations=1, render=render) +132 +133 # # Standard DMC Suite tasks +134 example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) +135 # +136 # # Manipulation tasks +137 # # Disclaimer: The vision versions are currently not integrated and yield an error +138 example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render) +139 # +140 # # Gym + DMC hybrid task provided in the MP framework +141 example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) +142 +143 # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is +144 # already registered above +145 example_custom_dmc_and_mp(seed=11, iterations=1, render=render) +146 +147if __name__ == '__main__': +148 main()
diff --git a/docs/build/html/examples/general.html b/docs/build/html/examples/general.html index 11bd623..31a6318 100644 --- a/docs/build/html/examples/general.html +++ b/docs/build/html/examples/general.html @@ -4,7 +4,7 @@ - General Usage Examples — Fancy Gym 0.2 documentation + General Usage Examples — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
@@ -130,7 +130,7 @@ 21 22 """ 23 - 24 env = gym.make(env_id) + 24 env = gym.make(env_id, render_mode='human' if render else None) 25 rewards = 0 26 obs = env.reset(seed=seed) 27 print("Observation shape: ", env.observation_space.shape) @@ -194,21 +194,21 @@ 85 # do not return values above threshold 86 return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), 87 - 88 - 89if __name__ == '__main__': - 90 render = True + 88def main(render = False): + 89 # Basic gym task + 90 example_general("Pendulum-v1", seed=10, iterations=200, render=render) 91 - 92 # Basic gym task - 93 example_general("Pendulum-v1", seed=10, iterations=200, render=render) + 92 # Mujoco task from framework + 93 example_general("fancy/Reacher5d-v0", seed=10, iterations=200, render=render) 94 - 95 # Mujoco task from framework - 96 example_general("fancy/Reacher5d-v0", seed=10, iterations=200, render=render) + 95 # # OpenAI Mujoco task + 96 example_general("HalfCheetah-v2", seed=10, render=render) 97 - 98 # # OpenAI Mujoco task - 99 example_general("HalfCheetah-v2", seed=10, render=render) + 98 # Vectorized multiprocessing environments + 99 # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) 100 -101 # Vectorized multiprocessing environments -102 # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) +101if __name__ == '__main__': +102 main()
diff --git a/docs/build/html/examples/metaworld.html b/docs/build/html/examples/metaworld.html index 7045b53..ff026d7 100644 --- a/docs/build/html/examples/metaworld.html +++ b/docs/build/html/examples/metaworld.html @@ -4,7 +4,7 @@ - Metaworld Examples — Fancy Gym 0.2 documentation + Metaworld Examples — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
@@ -111,7 +111,7 @@ 2import fancy_gym 3 4 - 5def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True): + 5def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, render=True): 6 """ 7 Example for running a MetaWorld based env in the step based setting. 8 The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always @@ -127,7 +127,7 @@ 18 Returns: 19 20 """ - 21 env = gym.make(env_id) + 21 env = gym.make(env_id, render_mode='human' if render else None) 22 rewards = 0 23 obs = env.reset(seed=seed) 24 print("observation shape:", env.observation_space.shape) @@ -136,111 +136,104 @@ 27 for i in range(iterations): 28 ac = env.action_space.sample() 29 if render: - 30 # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM - 31 # TODO: Remove this, when Metaworld fixes its interface. - 32 env.render(False) - 33 obs, reward, terminated, truncated, info = env.step(ac) - 34 rewards += reward - 35 if terminated or truncated: - 36 print(env_id, rewards) - 37 rewards = 0 - 38 obs = env.reset() - 39 - 40 env.close() - 41 del env - 42 - 43 - 44def example_custom_meta_and_mp(seed=1, iterations=1, render=True): - 45 """ - 46 Example for running a custom movement primitive based environments. - 47 Our already registered environments follow the same structure. - 48 Hence, this also allows to adjust hyperparameters of the movement primitives. - 49 Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. - 50 We appreciate PRs for custom environments (especially MP wrappers of existing tasks) - 51 for our repo: https://github.com/ALRhub/fancy_gym/ - 52 Args: - 53 seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code) - 54 iterations: Number of rollout steps to run - 55 render: Render the episode (TODO: currently not working due to an issue in MetaWorld code) + 30 env.render() + 31 obs, reward, terminated, truncated, info = env.step(ac) + 32 rewards += reward + 33 if terminated or truncated: + 34 print(env_id, rewards) + 35 rewards = 0 + 36 obs = env.reset(seed=seed+i+1) + 37 + 38 env.close() + 39 del env + 40 + 41 + 42def example_custom_meta_and_mp(seed=1, iterations=1, render=True): + 43 """ + 44 Example for running a custom movement primitive based environments. + 45 Our already registered environments follow the same structure. + 46 Hence, this also allows to adjust hyperparameters of the movement primitives. + 47 Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. + 48 We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + 49 for our repo: https://github.com/ALRhub/fancy_gym/ + 50 Args: + 51 seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code) + 52 iterations: Number of rollout steps to run + 53 render: Render the episode (TODO: currently not working due to an issue in MetaWorld code) + 54 + 55 Returns: 56 - 57 Returns: + 57 """ 58 - 59 """ - 60 - 61 # Base MetaWorld name, according to structure of above example - 62 base_env_id = "metaworld/button-press-v2" - 63 - 64 # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. - 65 # You can also add other gym.Wrappers in case they are needed. - 66 wrappers = [fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper] - 67 # # For a ProMP - 68 # trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'} - 69 # phase_generator_kwargs = {'phase_generator_type': 'linear'} - 70 # controller_kwargs = {'controller_type': 'metaworld'} - 71 # basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', - 72 # 'num_basis': 5, - 73 # 'num_basis_zero_start': 1 - 74 # } - 75 - 76 # For a DMP - 77 trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'} - 78 phase_generator_kwargs = {'phase_generator_type': 'exp', - 79 'alpha_phase': 2} - 80 controller_kwargs = {'controller_type': 'metaworld'} - 81 basis_generator_kwargs = {'basis_generator_type': 'rbf', - 82 'num_basis': 5 - 83 } - 84 env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, - 85 traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, - 86 phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, - 87 seed=seed) - 88 - 89 # This renders the full MP trajectory - 90 # It is only required to call render() once in the beginning, which renders every consecutive trajectory. - 91 # Resetting to no rendering, can be achieved by render(mode=None). - 92 # It is also possible to change them mode multiple times when - 93 # e.g. only every nth trajectory should be displayed. - 94 if render: - 95 raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. " - 96 "A temporary workaround is to alter their code in MujocoEnv render() from " - 97 "`if not offscreen` to `if not offscreen or offscreen == 'human'`.") - 98 # TODO: Remove this, when Metaworld fixes its interface. - 99 # env.render(mode="human") -100 -101 rewards = 0 -102 obs = env.reset() -103 -104 # number of samples/full trajectories (multiple environment steps) -105 for i in range(iterations): -106 ac = env.action_space.sample() -107 obs, reward, terminated, truncated, info = env.step(ac) -108 rewards += reward + 59 # Base MetaWorld name, according to structure of above example + 60 base_env_id = "metaworld/button-press-v2" + 61 + 62 # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. + 63 # You can also add other gym.Wrappers in case they are needed. + 64 wrappers = [fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper] + 65 # # For a ProMP + 66 # trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'} + 67 # phase_generator_kwargs = {'phase_generator_type': 'linear'} + 68 # controller_kwargs = {'controller_type': 'metaworld'} + 69 # basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', + 70 # 'num_basis': 5, + 71 # 'num_basis_zero_start': 1 + 72 # } + 73 + 74 # For a DMP + 75 trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'} + 76 phase_generator_kwargs = {'phase_generator_type': 'exp', + 77 'alpha_phase': 2} + 78 controller_kwargs = {'controller_type': 'metaworld'} + 79 basis_generator_kwargs = {'basis_generator_type': 'rbf', + 80 'num_basis': 5 + 81 } + 82 base_env = gym.make(base_env_id, render_mode='human' if render else None) + 83 env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, + 84 traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + 85 phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + 86 seed=seed) + 87 + 88 # This renders the full MP trajectory + 89 # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + 90 # Resetting to no rendering, can be achieved by render(mode=None). + 91 # It is also possible to change them mode multiple times when + 92 # e.g. only every nth trajectory should be displayed. + 93 if render: + 94 env.render() + 95 + 96 rewards = 0 + 97 obs = env.reset(seed=seed) + 98 + 99 # number of samples/full trajectories (multiple environment steps) +100 for i in range(iterations): +101 ac = env.action_space.sample() +102 obs, reward, terminated, truncated, info = env.step(ac) +103 rewards += reward +104 +105 if terminated or truncated: +106 print(base_env_id, rewards) +107 rewards = 0 +108 obs = env.reset(seed=seed+i+1) 109 -110 if terminated or truncated: -111 print(base_env_id, rewards) -112 rewards = 0 -113 obs = env.reset() -114 -115 env.close() -116 del env -117 -118 -119if __name__ == '__main__': -120 # Disclaimer: MetaWorld environments require the seed to be specified in the beginning. -121 # Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior. -122 -123 # For rendering it might be necessary to specify your OpenGL installation -124 # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so -125 render = False -126 -127 # # Standard Meta world tasks -128 example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render) -129 -130 # # MP + MetaWorld hybrid task provided in the our framework -131 example_meta("metaworld_ProMP/ButtonPress-v2", seed=10, iterations=1, render=render) -132 # -133 # # Custom MetaWorld task -134 example_custom_meta_and_mp(seed=10, iterations=1, render=render) +110 env.close() +111 del env +112 +113def main(render = False): +114 # For rendering it might be necessary to specify your OpenGL installation +115 # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so +116 +117 # # Standard Meta world tasks +118 example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render) +119 +120 # # MP + MetaWorld hybrid task provided in the our framework +121 example_meta("metaworld_ProMP/button-press-v2", seed=10, iterations=1, render=render) +122 # +123 # # Custom MetaWorld task +124 example_custom_meta_and_mp(seed=10, iterations=1, render=render) +125 +126if __name__ == '__main__': +127 main()
diff --git a/docs/build/html/examples/movement_primitives.html b/docs/build/html/examples/movement_primitives.html index 1943b1b..be0ce86 100644 --- a/docs/build/html/examples/movement_primitives.html +++ b/docs/build/html/examples/movement_primitives.html @@ -4,7 +4,7 @@ - Movement Primitives Examples — Fancy Gym 0.2 documentation + Movement Primitives Examples — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
@@ -135,252 +135,253 @@ 26 for i in range(iterations): 27 28 if render and i % 1 == 0: - 29 env.render() - 30 - 31 # Now the action space is not the raw action but the parametrization of the trajectory generator, - 32 # such as a ProMP - 33 ac = env.action_space.sample() - 34 # This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the - 35 # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal - 36 # to the return of a trajectory. Default is the sum over the step-wise rewards. - 37 obs, reward, terminated, truncated, info = env.step(ac) - 38 # Aggregated returns - 39 returns += reward - 40 - 41 if terminated or truncated: - 42 print(reward) - 43 obs = env.reset() - 44 env.close() - 45 - 46 - 47def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): - 48 """ - 49 Example for running a custom movement primitive based environments. - 50 Our already registered environments follow the same structure. - 51 Hence, this also allows to adjust hyperparameters of the movement primitives. - 52 Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. - 53 We appreciate PRs for custom environments (especially MP wrappers of existing tasks) - 54 for our repo: https://github.com/ALRhub/fancy_gym/ - 55 Args: - 56 seed: seed - 57 iterations: Number of rollout steps to run - 58 render: Render the episode - 59 - 60 Returns: + 29 # This renders the full MP trajectory + 30 # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + 31 env.render() + 32 + 33 # Now the action space is not the raw action but the parametrization of the trajectory generator, + 34 # such as a ProMP + 35 ac = env.action_space.sample() + 36 # This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the + 37 # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal + 38 # to the return of a trajectory. Default is the sum over the step-wise rewards. + 39 obs, reward, terminated, truncated, info = env.step(ac) + 40 # Aggregated returns + 41 returns += reward + 42 + 43 if terminated or truncated: + 44 print(reward) + 45 obs = env.reset() + 46 env.close() + 47 + 48 + 49def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): + 50 """ + 51 Example for running a custom movement primitive based environments. + 52 Our already registered environments follow the same structure. + 53 Hence, this also allows to adjust hyperparameters of the movement primitives. + 54 Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. + 55 We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + 56 for our repo: https://github.com/ALRhub/fancy_gym/ + 57 Args: + 58 seed: seed + 59 iterations: Number of rollout steps to run + 60 render: Render the episode 61 - 62 """ - 63 # Changing the arguments of the black box env is possible by providing them to gym through mp_config_override. - 64 # E.g. here for way to many basis functions - 65 env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}, render_mode='human' if render else None) - 66 - 67 returns = 0 - 68 obs = env.reset() - 69 - 70 # This time rendering every trajectory - 71 if render: - 72 env.render() - 73 - 74 # number of samples/full trajectories (multiple environment steps) - 75 for i in range(iterations): - 76 ac = env.action_space.sample() - 77 obs, reward, terminated, truncated, info = env.step(ac) - 78 returns += reward - 79 - 80 if terminated or truncated: - 81 print(i, reward) - 82 obs = env.reset() - 83 - 84 env.close() - 85 return obs - 86 - 87class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper): - 88 mp_config = { - 89 'ProMP': { - 90 'trajectory_generator_kwargs': { - 91 'trajectory_generator_type': 'promp', - 92 'weights_scale': 2 - 93 }, - 94 'phase_generator_kwargs': { - 95 'phase_generator_type': 'linear' - 96 }, - 97 'controller_kwargs': { - 98 'controller_type': 'velocity' - 99 }, -100 'basis_generator_kwargs': { -101 'basis_generator_type': 'zero_rbf', -102 'num_basis': 5, -103 'num_basis_zero_start': 1 -104 } -105 }, -106 'DMP': { -107 'trajectory_generator_kwargs': { -108 'trajectory_generator_type': 'dmp', -109 'weights_scale': 500 -110 }, -111 'phase_generator_kwargs': { -112 'phase_generator_type': 'exp', -113 'alpha_phase': 2.5 -114 }, -115 'controller_kwargs': { -116 'controller_type': 'velocity' -117 }, -118 'basis_generator_kwargs': { -119 'basis_generator_type': 'rbf', -120 'num_basis': 5 -121 } -122 } -123 } -124 -125 -126def example_fully_custom_mp(seed=1, iterations=1, render=True): -127 """ -128 Example for running a custom movement primitive based environments. -129 Our already registered environments follow the same structure. -130 Hence, this also allows to adjust hyperparameters of the movement primitives. -131 Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. -132 We appreciate PRs for custom environments (especially MP wrappers of existing tasks) -133 for our repo: https://github.com/ALRhub/fancy_gym/ -134 Args: -135 seed: seed -136 iterations: Number of rollout steps to run -137 render: Render the episode -138 -139 Returns: + 62 Returns: + 63 + 64 """ + 65 # Changing the arguments of the black box env is possible by providing them to gym through mp_config_override. + 66 # E.g. here for way to many basis functions + 67 env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}, render_mode='human' if render else None) + 68 + 69 returns = 0 + 70 obs = env.reset() + 71 + 72 # This time rendering every trajectory + 73 if render: + 74 env.render() + 75 + 76 # number of samples/full trajectories (multiple environment steps) + 77 for i in range(iterations): + 78 ac = env.action_space.sample() + 79 obs, reward, terminated, truncated, info = env.step(ac) + 80 returns += reward + 81 + 82 if terminated or truncated: + 83 print(i, reward) + 84 obs = env.reset() + 85 + 86 env.close() + 87 return obs + 88 + 89class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper): + 90 mp_config = { + 91 'ProMP': { + 92 'trajectory_generator_kwargs': { + 93 'trajectory_generator_type': 'promp', + 94 'weights_scale': 2 + 95 }, + 96 'phase_generator_kwargs': { + 97 'phase_generator_type': 'linear' + 98 }, + 99 'controller_kwargs': { +100 'controller_type': 'velocity' +101 }, +102 'basis_generator_kwargs': { +103 'basis_generator_type': 'zero_rbf', +104 'num_basis': 5, +105 'num_basis_zero_start': 1 +106 } +107 }, +108 'DMP': { +109 'trajectory_generator_kwargs': { +110 'trajectory_generator_type': 'dmp', +111 'weights_scale': 500 +112 }, +113 'phase_generator_kwargs': { +114 'phase_generator_type': 'exp', +115 'alpha_phase': 2.5 +116 }, +117 'controller_kwargs': { +118 'controller_type': 'velocity' +119 }, +120 'basis_generator_kwargs': { +121 'basis_generator_type': 'rbf', +122 'num_basis': 5 +123 } +124 } +125 } +126 +127 +128def example_fully_custom_mp(seed=1, iterations=1, render=True): +129 """ +130 Example for running a custom movement primitive based environments. +131 Our already registered environments follow the same structure. +132 Hence, this also allows to adjust hyperparameters of the movement primitives. +133 Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. +134 We appreciate PRs for custom environments (especially MP wrappers of existing tasks) +135 for our repo: https://github.com/ALRhub/fancy_gym/ +136 Args: +137 seed: seed +138 iterations: Number of rollout steps to run +139 render: Render the episode 140 -141 """ +141 Returns: 142 -143 base_env_id = "fancy/Reacher5d-v0" -144 custom_env_id = "fancy/Reacher5d-Custom-v0" -145 custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0" -146 custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" -147 -148 fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id) +143 """ +144 +145 base_env_id = "fancy/Reacher5d-v0" +146 custom_env_id = "fancy/Reacher5d-Custom-v0" +147 custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0" +148 custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" 149 -150 env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) +150 fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id) 151 -152 rewards = 0 -153 obs = env.reset() -154 -155 if render: -156 env.render() -157 -158 # number of samples/full trajectories (multiple environment steps) -159 for i in range(iterations): -160 ac = env.action_space.sample() -161 obs, reward, terminated, truncated, info = env.step(ac) -162 rewards += reward -163 -164 if terminated or truncated: -165 print(rewards) -166 rewards = 0 -167 obs = env.reset() -168 -169 try: # Some mujoco-based envs don't correlcty implement .close -170 env.close() -171 except: -172 pass -173 -174 -175def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): -176 """ -177 Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration. -178 Args: -179 seed: seed -180 iterations: Number of rollout steps to run -181 render: Render the episode -182 -183 Returns: +152 env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) +153 +154 rewards = 0 +155 obs = env.reset() +156 +157 if render: +158 env.render() +159 +160 # number of samples/full trajectories (multiple environment steps) +161 for i in range(iterations): +162 ac = env.action_space.sample() +163 obs, reward, terminated, truncated, info = env.step(ac) +164 rewards += reward +165 +166 if terminated or truncated: +167 print(rewards) +168 rewards = 0 +169 obs = env.reset() +170 +171 try: # Some mujoco-based envs don't correlcty implement .close +172 env.close() +173 except: +174 pass +175 +176 +177def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): +178 """ +179 Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration. +180 Args: +181 seed: seed +182 iterations: Number of rollout steps to run +183 render: Render the episode 184 -185 """ +185 Returns: 186 -187 base_env_id = "fancy/Reacher5d-v0" -188 custom_env_id = "fancy/Reacher5d-Custom-v0" -189 custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" -190 -191 fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': { -192 'trajectory_generator_kwargs': { -193 'trajectory_generator_type': 'promp', -194 'weights_scale': 2 -195 }, -196 'phase_generator_kwargs': { -197 'phase_generator_type': 'linear' -198 }, -199 'controller_kwargs': { -200 'controller_type': 'velocity' -201 }, -202 'basis_generator_kwargs': { -203 'basis_generator_type': 'zero_rbf', -204 'num_basis': 5, -205 'num_basis_zero_start': 1 -206 } -207 }}) -208 -209 env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) +187 """ +188 +189 base_env_id = "fancy/Reacher5d-v0" +190 custom_env_id = "fancy/Reacher5d-Custom-v0" +191 custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" +192 +193 fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': { +194 'trajectory_generator_kwargs': { +195 'trajectory_generator_type': 'promp', +196 'weights_scale': 2 +197 }, +198 'phase_generator_kwargs': { +199 'phase_generator_type': 'linear' +200 }, +201 'controller_kwargs': { +202 'controller_type': 'velocity' +203 }, +204 'basis_generator_kwargs': { +205 'basis_generator_type': 'zero_rbf', +206 'num_basis': 5, +207 'num_basis_zero_start': 1 +208 } +209 }}) 210 -211 rewards = 0 -212 obs = env.reset() -213 -214 if render: -215 env.render() -216 -217 # number of samples/full trajectories (multiple environment steps) -218 for i in range(iterations): -219 ac = env.action_space.sample() -220 obs, reward, terminated, truncated, info = env.step(ac) -221 rewards += reward -222 -223 if terminated or truncated: -224 print(rewards) -225 rewards = 0 -226 obs = env.reset() -227 -228 if render: -229 env.render() -230 -231 rewards = 0 -232 obs = env.reset() -233 -234 # number of samples/full trajectories (multiple environment steps) -235 for i in range(iterations): -236 ac = env.action_space.sample() -237 obs, reward, terminated, truncated, info = env.step(ac) -238 rewards += reward -239 -240 if terminated or truncated: -241 print(rewards) -242 rewards = 0 -243 obs = env.reset() -244 -245 try: # Some mujoco-based envs don't correlcty implement .close -246 env.close() -247 except: -248 pass -249 -250 -251def main(): -252 render = False -253 # DMP -254 example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) -255 -256 # ProMP -257 example_mp("fancy_ProMP/HoleReacher-v0", seed=10, iterations=5, render=render) -258 example_mp("fancy_ProMP/BoxPushingTemporalSparse-v0", seed=10, iterations=1, render=render) -259 example_mp("fancy_ProMP/TableTennis4D-v0", seed=10, iterations=20, render=render) -260 -261 # ProDMP with Replanning -262 example_mp("fancy_ProDMP/BoxPushingDenseReplan-v0", seed=10, iterations=4, render=render) -263 example_mp("fancy_ProDMP/TableTennis4DReplan-v0", seed=10, iterations=20, render=render) -264 example_mp("fancy_ProDMP/TableTennisWindReplan-v0", seed=10, iterations=20, render=render) -265 -266 # Altered basis functions -267 obs1 = example_custom_mp("fancy_ProMP/Reacher5d-v0", seed=10, iterations=1, render=render) -268 -269 # Custom MP -270 example_fully_custom_mp(seed=10, iterations=1, render=render) -271 example_fully_custom_mp_alternative(seed=10, iterations=1, render=render) -272 -273if __name__=='__main__': -274 main() +211 env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) +212 +213 rewards = 0 +214 obs = env.reset() +215 +216 if render: +217 env.render() +218 +219 # number of samples/full trajectories (multiple environment steps) +220 for i in range(iterations): +221 ac = env.action_space.sample() +222 obs, reward, terminated, truncated, info = env.step(ac) +223 rewards += reward +224 +225 if terminated or truncated: +226 print(rewards) +227 rewards = 0 +228 obs = env.reset() +229 +230 if render: +231 env.render() +232 +233 rewards = 0 +234 obs = env.reset() +235 +236 # number of samples/full trajectories (multiple environment steps) +237 for i in range(iterations): +238 ac = env.action_space.sample() +239 obs, reward, terminated, truncated, info = env.step(ac) +240 rewards += reward +241 +242 if terminated or truncated: +243 print(rewards) +244 rewards = 0 +245 obs = env.reset() +246 +247 try: # Some mujoco-based envs don't correlcty implement .close +248 env.close() +249 except: +250 pass +251 +252 +253def main(render=False): +254 # DMP +255 example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) +256 +257 # ProMP +258 example_mp("fancy_ProMP/HoleReacher-v0", seed=10, iterations=5, render=render) +259 example_mp("fancy_ProMP/BoxPushingTemporalSparse-v0", seed=10, iterations=1, render=render) +260 example_mp("fancy_ProMP/TableTennis4D-v0", seed=10, iterations=20, render=render) +261 +262 # ProDMP with Replanning +263 example_mp("fancy_ProDMP/BoxPushingDenseReplan-v0", seed=10, iterations=4, render=render) +264 example_mp("fancy_ProDMP/TableTennis4DReplan-v0", seed=10, iterations=20, render=render) +265 example_mp("fancy_ProDMP/TableTennisWindReplan-v0", seed=10, iterations=20, render=render) +266 +267 # Altered basis functions +268 obs1 = example_custom_mp("fancy_ProMP/Reacher5d-v0", seed=10, iterations=1, render=render) +269 +270 # Custom MP +271 example_fully_custom_mp(seed=10, iterations=1, render=render) +272 example_fully_custom_mp_alternative(seed=10, iterations=1, render=render) +273 +274if __name__=='__main__': +275 main()
diff --git a/docs/build/html/examples/mp_params_tuning.html b/docs/build/html/examples/mp_params_tuning.html index 14659d6..df5c2c7 100644 --- a/docs/build/html/examples/mp_params_tuning.html +++ b/docs/build/html/examples/mp_params_tuning.html @@ -4,7 +4,7 @@ - MP Params Tuning Example — Fancy Gym 0.2 documentation + MP Params Tuning Example — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/examples/open_ai.html b/docs/build/html/examples/open_ai.html index 74ceddc..fa220a1 100644 --- a/docs/build/html/examples/open_ai.html +++ b/docs/build/html/examples/open_ai.html @@ -4,7 +4,7 @@ - OpenAI Envs Examples — Fancy Gym 0.2 documentation + OpenAI Envs Examples — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
@@ -122,27 +122,27 @@ 13 Returns: 14 15 """ -16 env = gym.make(env_name) +16 env = gym.make(env_name, render_mode='human' if render else None) 17 18 returns = 0 19 obs = env.reset(seed=seed) 20 # number of samples/full trajectories (multiple environment steps) 21 for i in range(10): 22 if render and i % 2 == 0: -23 env.render(mode="human") -24 else: -25 env.render() -26 ac = env.action_space.sample() -27 obs, reward, terminated, truncated, info = env.step(ac) -28 returns += reward -29 -30 if terminated or truncated: -31 print(returns) -32 obs = env.reset() -33 +23 env.render() +24 ac = env.action_space.sample() +25 obs, reward, terminated, truncated, info = env.step(ac) +26 returns += reward +27 +28 if terminated or truncated: +29 print(returns) +30 obs = env.reset() +31 +32def main(render=True): +33 example_mp("gym_ProMP/Reacher-v2", render=render) 34 35if __name__ == '__main__': -36 example_mp("gym_ProMP/Reacher-v2") +36 main()
diff --git a/docs/build/html/examples/pd_control_gain_tuning.html b/docs/build/html/examples/pd_control_gain_tuning.html index 6467abc..b83ab3f 100644 --- a/docs/build/html/examples/pd_control_gain_tuning.html +++ b/docs/build/html/examples/pd_control_gain_tuning.html @@ -4,7 +4,7 @@ - PD Control Gain Tuning Example — Fancy Gym 0.2 documentation + PD Control Gain Tuning Example — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/examples/replanning_envs.html b/docs/build/html/examples/replanning_envs.html index 9c842fd..afdaf29 100644 --- a/docs/build/html/examples/replanning_envs.html +++ b/docs/build/html/examples/replanning_envs.html @@ -4,7 +4,7 @@ - Replanning Example — Fancy Gym 0.2 documentation + Replanning Example — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
@@ -112,24 +112,24 @@ 3 4 5def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False): - 6 env = gym.make(env_name) + 6 env = gym.make(env_name, render_mode='human' if render else None) 7 env.reset(seed=seed) 8 for i in range(iterations): - 9 done = False -10 while done is False: -11 ac = env.action_space.sample() -12 obs, reward, terminated, truncated, info = env.step(ac) -13 if render: -14 env.render(mode="human") -15 if terminated or truncated: -16 env.reset() + 9 while True: +10 ac = env.action_space.sample() +11 obs, reward, terminated, truncated, info = env.step(ac) +12 if render: +13 env.render() +14 if terminated or truncated: +15 env.reset() +16 break 17 env.close() 18 del env 19 20 21def example_custom_replanning_envs(seed=0, iteration=100, render=True): 22 # id for a step-based environment -23 base_env_id = "BoxPushingDense-v0" +23 base_env_id = "fancy/BoxPushingDense-v0" 24 25 wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] 26 @@ -147,31 +147,34 @@ 38 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, 39 'condition_on_desired': True} 40 -41 env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, -42 traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, -43 phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, -44 seed=seed) -45 if render: -46 env.render(mode="human") -47 -48 obs = env.reset() -49 -50 for i in range(iteration): -51 ac = env.action_space.sample() -52 obs, reward, terminated, truncated, info = env.step(ac) -53 if terminated or truncated: -54 env.reset() -55 -56 env.close() -57 del env -58 +41 base_env = gym.make(base_env_id, render_mode='human' if render else None) +42 env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs, +43 traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, +44 phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, +45 seed=seed) +46 if render: +47 env.render() +48 +49 obs = env.reset() +50 +51 for i in range(iteration): +52 ac = env.action_space.sample() +53 obs, reward, terminated, truncated, info = env.step(ac) +54 if terminated or truncated: +55 env.reset() +56 +57 env.close() +58 del env 59 -60if __name__ == "__main__": +60def main(render=False): 61 # run a registered replanning environment -62 example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False) +62 example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=render) 63 64 # run a custom replanning environment -65 example_custom_replanning_envs(seed=0, iteration=8, render=True) +65 example_custom_replanning_envs(seed=0, iteration=8, render=render) +66 +67if __name__ == "__main__": +68 main()
diff --git a/docs/build/html/generated/fancy_gym.envs.html b/docs/build/html/generated/fancy_gym.envs.html index 0ec17e6..5d096e5 100644 --- a/docs/build/html/generated/fancy_gym.envs.html +++ b/docs/build/html/generated/fancy_gym.envs.html @@ -4,7 +4,7 @@ - fancy_gym.envs — Fancy Gym 0.2 documentation + fancy_gym.envs — Fancy Gym 0.3.0 documentation @@ -39,7 +39,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/generated/fancy_gym.register.html b/docs/build/html/generated/fancy_gym.register.html index 92be35d..643537a 100644 --- a/docs/build/html/generated/fancy_gym.register.html +++ b/docs/build/html/generated/fancy_gym.register.html @@ -4,7 +4,7 @@ - fancy_gym.register — Fancy Gym 0.2 documentation + fancy_gym.register — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/generated/fancy_gym.upgrade.html b/docs/build/html/generated/fancy_gym.upgrade.html index e72c7f5..6badcb5 100644 --- a/docs/build/html/generated/fancy_gym.upgrade.html +++ b/docs/build/html/generated/fancy_gym.upgrade.html @@ -4,7 +4,7 @@ - fancy_gym.upgrade — Fancy Gym 0.2 documentation + fancy_gym.upgrade — Fancy Gym 0.3.0 documentation @@ -40,7 +40,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index b9bad09..c64d420 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -3,7 +3,7 @@ - Index — Fancy Gym 0.2 documentation + Index — Fancy Gym 0.3.0 documentation @@ -38,7 +38,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/guide/basic_usage.html b/docs/build/html/guide/basic_usage.html index 7467226..8a13e38 100644 --- a/docs/build/html/guide/basic_usage.html +++ b/docs/build/html/guide/basic_usage.html @@ -4,7 +4,7 @@ - Basic Usage — Fancy Gym 0.2 documentation + Basic Usage — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/guide/episodic_rl.html b/docs/build/html/guide/episodic_rl.html index 3515aa7..78f99a3 100644 --- a/docs/build/html/guide/episodic_rl.html +++ b/docs/build/html/guide/episodic_rl.html @@ -4,7 +4,7 @@ - What is Episodic RL? — Fancy Gym 0.2 documentation + What is Episodic RL? — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/guide/installation.html b/docs/build/html/guide/installation.html index b9baffe..b05be13 100644 --- a/docs/build/html/guide/installation.html +++ b/docs/build/html/guide/installation.html @@ -4,7 +4,7 @@ - Installation — Fancy Gym 0.2 documentation + Installation — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
@@ -135,7 +135,7 @@ pip install '

Pip can not automatically install up-to-date versions of metaworld, since they are not avaible on PyPI yet. Install metaworld via

-
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
+
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
 
@@ -169,7 +169,7 @@ pip install -e

Metaworld has to be installed manually with

-
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld
+
pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld
 
diff --git a/docs/build/html/guide/upgrading_envs.html b/docs/build/html/guide/upgrading_envs.html index 3444d4e..8e0c2b9 100644 --- a/docs/build/html/guide/upgrading_envs.html +++ b/docs/build/html/guide/upgrading_envs.html @@ -4,7 +4,7 @@ - Creating new MP Environments — Fancy Gym 0.2 documentation + Creating new MP Environments — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/index.html b/docs/build/html/index.html index 2430040..df9a562 100644 --- a/docs/build/html/index.html +++ b/docs/build/html/index.html @@ -4,7 +4,7 @@ - Fancy Gym — Fancy Gym 0.2 documentation + Fancy Gym — Fancy Gym 0.3.0 documentation @@ -40,7 +40,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index 372130a..7c889a6 100644 Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ diff --git a/docs/build/html/py-modindex.html b/docs/build/html/py-modindex.html index 5df2185..e62f50e 100644 --- a/docs/build/html/py-modindex.html +++ b/docs/build/html/py-modindex.html @@ -3,7 +3,7 @@ - Python Module Index — Fancy Gym 0.2 documentation + Python Module Index — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/search.html b/docs/build/html/search.html index b9ff6cd..c632ba3 100644 --- a/docs/build/html/search.html +++ b/docs/build/html/search.html @@ -3,7 +3,7 @@ - Search — Fancy Gym 0.2 documentation + Search — Fancy Gym 0.3.0 documentation @@ -41,7 +41,7 @@
- 0.2 + 0.3.0
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 2666042..209c151 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api", "envs/dmc", "envs/fancy/airhockey", "envs/fancy/classic_control", "envs/fancy/index", "envs/fancy/mujoco", "envs/meta", "envs/open_ai", "examples/dmc", "examples/general", "examples/metaworld", "examples/movement_primitives", "examples/mp_params_tuning", "examples/open_ai", "examples/pd_control_gain_tuning", "examples/replanning_envs", "generated/fancy_gym.envs", "generated/fancy_gym.register", "generated/fancy_gym.upgrade", "guide/basic_usage", "guide/episodic_rl", "guide/installation", "guide/upgrading_envs", "index"], "filenames": ["api.rst", "envs/dmc.md", "envs/fancy/airhockey.rst", "envs/fancy/classic_control.md", "envs/fancy/index.rst", "envs/fancy/mujoco.md", "envs/meta.md", "envs/open_ai.md", "examples/dmc.rst", "examples/general.rst", "examples/metaworld.rst", "examples/movement_primitives.rst", "examples/mp_params_tuning.rst", "examples/open_ai.rst", "examples/pd_control_gain_tuning.rst", "examples/replanning_envs.rst", "generated/fancy_gym.envs.rst", "generated/fancy_gym.register.rst", "generated/fancy_gym.upgrade.rst", "guide/basic_usage.rst", "guide/episodic_rl.rst", "guide/installation.rst", "guide/upgrading_envs.rst", "index.rst"], "titles": ["API", "DeepMind Control (DMC)", "AirHockey", "Classic Control", "Fancy", "Mujoco", "Metaworld", "Gymnasium", "DeepMind Control Examples", "General Usage Examples", "Metaworld Examples", "Movement Primitives Examples", "MP Params Tuning Example", "OpenAI Envs Examples", "PD Control Gain Tuning Example", "Replanning Example", "fancy_gym.envs", "fancy_gym.register", "fancy_gym.upgrade", "Basic Usage", "What is Episodic RL?", "Installation", "Creating new MP Environments", "Fancy Gym"], "terms": {"These": [1, 2, 3, 5, 7, 20], "ar": [1, 2, 3, 4, 5, 7, 8, 10, 11, 14, 17, 19, 20, 21, 22], "wrapper": [1, 8, 10, 11, 15, 17, 18, 22], "select": [1, 7, 22], "order": 1, "us": [1, 2, 5, 6, 9, 11, 15, 17, 18, 19, 20, 21, 22, 23], "our": [1, 8, 9, 10, 11, 20, 23], "motion": [1, 5, 20], "primit": [1, 8, 10, 13, 17, 18, 20, 22, 23], "gym": [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 22], "interfac": [1, 6, 10, 11, 22, 23], "them": [1, 5, 6, 7, 8, 10, 11, 19, 23], "when": [1, 5, 8, 9, 10, 17, 22], "instal": [1, 10, 23], "fancy_gym": [1, 6, 8, 9, 10, 11, 12, 13, 14, 15, 19, 21, 22, 23], "option": [1, 5, 17, 18, 19, 21], "extra": 1, "e": [1, 8, 10, 11, 21, 22], "g": [1, 8, 10, 11, 22], "pip": [1, 21, 23], "all": [1, 5, 6, 9, 10, 19, 21, 23], "regular": [1, 19, 23], "task": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 19, 22], "avaibl": [1, 6, 21], "via": [1, 3, 6, 19, 21, 22, 23], "shimmi": 1, "name": [1, 3, 5, 6, 7, 8, 10, 19], "descript": [1, 3, 5, 6, 7, 19], "action": [1, 3, 5, 6, 7, 8, 9, 10, 11, 14, 15, 19, 20, 22, 23], "dim": 1, "observ": [1, 2, 3, 5, 6, 8, 9, 10, 11, 19, 20, 22, 23], "dm_control": [1, 8, 19], "acrobot": 1, "swingup": 1, "v0": [1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 22, 23], "underactu": 1, "doubl": 1, "pendulum": [1, 9], "torqu": [1, 5, 20], "appli": [1, 5], "second": 1, "joint": [1, 5, 22], "swing": 1, "up": [1, 4, 6, 21], "balanc": 1, "1": [1, 5, 7, 8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "6": [1, 5, 6], "swingup_spars": 1, "similar": 1, "spars": [1, 5], "reward": [1, 3, 5, 8, 9, 10, 11, 13, 15, 19, 22, 23], "achiev": [1, 5, 8, 10], "ball_in_cup": [1, 8, 19], "catch": [1, 8, 19], "planar": 1, "ball": [1, 5], "cup": [1, 5], "where": [1, 2, 3, 6], "receptacl": 1, "must": [1, 6], "2": [1, 3, 5, 7, 8, 9, 10, 11, 13, 22], "8": [1, 5, 15], "cartpol": 1, "cart": 1, "pole": 1, "goal": [1, 3, 5, 10], "i": [1, 2, 5, 6, 8, 9, 10, 11, 13, 15, 17, 18, 19, 22, 23], "an": [1, 5, 6, 7, 8, 10, 17, 18, 19, 20, 22, 23], "unactu": 1, "move": 1, "start": [1, 22], "upright": 1, "5": [1, 3, 5, 8, 10, 11, 14, 15, 19, 22], "balance_spars": 1, "downward": 1, "requir": [1, 2, 3, 5, 6, 8, 10, 19, 20, 22], "two_pol": 1, "extens": 1, "domain": 1, "two": [1, 5], "serial": 1, "connect": 1, "increas": [1, 9], "challeng": [1, 2, 5, 23], "three_pol": 1, "three": [1, 2], "further": [1, 19, 20], "11": [1, 8], "cheetah": 1, "run": [1, 8, 9, 10, 11, 13, 15], "biped": 1, "robot": [1, 2, 5, 6, 20, 23], "The": [1, 2, 3, 5, 6, 8, 10, 11, 17, 18, 19, 20, 22, 23], "proport": 1, "forward": 1, "veloc": [1, 5, 11, 14, 15, 19, 20, 22], "maximum": [1, 15], "speed": 1, "17": 1, "dog": 1, "stand": 1, "focus": [1, 2], "postur": 1, "38": 1, "223": 1, "walk": 1, "coordin": [1, 5], "movement": [1, 8, 10, 13, 17, 18, 20, 22, 23], "trot": 1, "perform": [1, 2, 5], "gait": 1, "combin": 1, "stabil": 1, "fetch": 1, "plai": [1, 5, 6], "involv": [1, 2, 6], "locomot": 1, "object": [1, 5, 6, 20], "interact": [1, 19], "232": 1, "finger": 1, "spin": 1, "rotat": 1, "bodi": 1, "hing": 1, "9": [1, 3], "turn_easi": 1, "align": [1, 5, 20], "tip": 1, "free": [1, 19, 22], "target": [1, 14], "easier": 1, "version": [1, 7, 8, 10, 13, 17, 18, 19, 21, 22], "larger": 1, "12": 1, "turn_hard": 1, "smaller": 1, "difficulti": [1, 23], "fish": [1, 8, 10], "right": [1, 20], "itself": [1, 3], "fluid": 1, "21": [1, 5], "swim": [1, 8, 10], "incorpor": 1, "dynam": [1, 2, 20, 23], "24": 1, "hopper": [1, 5], "One": 1, "leg": 1, "minim": 1, "torso": 1, "height": 1, "4": [1, 5, 6, 7, 9, 11, 15, 22], "15": [1, 5, 14], "hop": 1, "humanoid": 1, "simplifi": 1, "maintain": [1, 5, 19, 23], "67": 1, "specifi": [1, 5, 8, 10, 18], "aim": [1, 2], "high": [1, 3, 14], "horizont": 1, "run_pure_st": 1, "focu": [1, 3], "pure": 1, "state": [1, 15, 19], "55": 1, "humanoid_cmu": 1, "advanc": [1, 5, 6], "cmu": 1, "model": [1, 2], "56": 1, "137": 1, "lqr": 1, "lqr_2_1": 1, "linear": [1, 8, 10, 11, 22], "quadrat": 1, "regul": 1, "mass": 1, "actuat": [1, 2], "posit": [1, 5, 14, 19, 20, 22], "optim": [1, 20], "lqr_6_2": 1, "more": [1, 9, 13, 19, 20, 22, 23], "complex": [1, 2, 3, 5], "manipul": [1, 5, 6, 8, 9], "bring_bal": 1, "bring": 1, "locat": [1, 5], "initi": [1, 5], "variat": [1, 4], "44": 1, "bring_peg": 1, "peg": [1, 6], "insert_bal": 1, "insert": [1, 6], "basket": [1, 5], "insert_peg": 1, "slot": 1, "classic": [1, 4, 20, 23], "invert": 1, "limit": [1, 2, 5], "multipl": [1, 5, 8, 10, 11, 13, 18, 19, 22], "3": [1, 2, 5, 22], "point_mass": 1, "easi": [1, 22, 23], "point": [1, 3, 17, 22], "correspond": 1, "global": 1, "x": [1, 5], "y": [1, 5], "ax": [1, 5, 14], "hard": 1, "random": [1, 5], "gain": [1, 23], "per": [1, 5], "episod": [1, 5, 8, 9, 10, 11, 14, 19, 23], "memoryless": 1, "agent": [1, 2, 3], "quadrup": 1, "four": 1, "78": 1, "escap": 1, "environment": 1, "101": 1, "90": 1, "reacher": [1, 5, 7, 11, 13, 19], "link": [1, 3, 5], "sphere": 1, "stacker": 1, "stack_2": 1, "stack": [1, 9], "box": [1, 4, 6, 11, 20, 23], "correct": [1, 14], "placement": 1, "gripper": 1, "49": 1, "stack_4": 1, "63": 1, "swimmer": 1, "swimmer6": 1, "six": 1, "nose": 1, "insid": 1, "25": [1, 3, 5, 15], "swimmer15": 1, "fifteen": 1, "extend": 1, "14": 1, "61": 1, "walker": [1, 5], "trajectori": [1, 3, 7, 8, 10, 11, 13, 14, 19, 20, 22, 23], "horizon": [1, 3, 5, 6, 7], "dimens": [1, 3, 5, 6, 7, 22], "context": [1, 3, 5, 6, 11, 19, 20, 22], "dm_control_prodmp": 1, "A": [1, 3, 5, 6, 7, 10, 22], "promp": [1, 7, 8, 10, 11, 13, 17, 18, 19, 20, 22, 23], "wrap": [1, 7], "1000": [1, 8, 9, 10, 11, 19, 23], "10": [1, 8, 9, 10, 11, 13, 23], "dm_control_dmp": [1, 19], "dmp": [1, 3, 6, 8, 9, 10, 11, 17, 18, 19, 20, 22, 23], "fanci": [2, 3, 5, 9, 11, 19], "provid": [2, 3, 5, 7, 8, 10, 11, 17, 18, 19, 21], "access": [2, 19, 22, 23], "rang": [2, 5, 8, 9, 10, 11, 13, 15, 19, 22, 23], "environ": [2, 4, 8, 9, 10, 11, 13, 14, 15, 17, 18, 20, 21], "air": 2, "hockei": 2, "close": [2, 5, 6, 8, 10, 11, 15], "gap": 2, "between": [2, 5, 14, 19], "simul": [2, 3, 6], "learn": [2, 3, 5, 6, 11, 19, 20, 23], "real": [2, 14], "world": [2, 10], "applic": 2, "variou": [2, 5, 23], "aspect": 2, "oper": [2, 20], "deal": 2, "disturb": 2, "nois": 2, "safeti": 2, "avail": [2, 5, 19, 22], "through": [2, 11], "allow": [2, 3, 8, 10, 11, 17, 18, 19, 22], "develop": 2, "capabl": [2, 5], "differ": [2, 5, 8, 10, 14, 18, 20], "level": [2, 19], "includ": [2, 5, 9, 17, 18, 23], "hit": [2, 5], "defend": 2, "both": [2, 22, 23], "degre": [2, 5, 23], "freedom": [2, 5], "dof": [2, 5], "seven": [2, 5], "7": [2, 5], "configur": [2, 5, 17, 18, 22], "base": [2, 4, 8, 9, 10, 11, 13, 15, 17, 18, 20, 22, 23], "kuka": 2, "iiwa14": 2, "which": [2, 3, 5, 8, 10, 11, 13, 17], "repres": [2, 20, 22], "higher": [2, 23], "control": [2, 4, 19, 20, 22, 23], "akin": 2, "set": [2, 8, 9, 10, 17, 19, 20, 23], "particip": 2, "strategi": 2, "enabl": [2, 11, 19], "react": 2, "adapt": [2, 4, 5], "within": [2, 5], "final": [2, 5], "phase": 2, "tournament": 2, "test": [2, 19, 21], "comprehens": [2, 5, 23], "game": [2, 5, 6], "scenario": 2, "top": [2, 5, 6], "team": 2, "actual": 2, "system": [2, 5], "For": [2, 5, 8, 10, 13, 22], "detail": [2, 19, 22], "inform": [2, 5, 13, 14, 19], "rule": 2, "stage": 2, "submiss": [2, 23], "pleas": [2, 14, 18, 22], "visit": 2, "offici": 2, "websit": 2, "follow": [2, 8, 10, 11, 22], "7dof": 2, "3dof": 2, "airhockit2023": 2, "foundat": [3, 5, 21, 23], "platform": 3, "explor": [3, 23], "experi": 3, "rl": [3, 5, 23], "algorithm": [3, 5], "design": [3, 4, 5, 6, 20], "simpl": 3, "research": [3, 5, 23], "practition": 3, "fundament": 3, "principl": 3, "without": [3, 19, 22], "dimension": [3, 22], "physic": [3, 8], "simplereach": 3, "reach": [3, 5, 6, 19], "ani": [3, 9, 17, 18, 19], "until": 3, "150": [3, 6], "time": [3, 5, 8, 10, 11, 19, 23], "thi": [3, 5, 6, 8, 9, 10, 11, 14, 19, 20, 22, 23], "space": [3, 5, 11, 20, 22], "precis": [3, 5], "toward": 3, "end": [3, 5], "200": [3, 5, 9], "longsimplereach": 3, "18": [3, 5], "viapointreach": 3, "leverag": [3, 9], "support": [3, 6, 10, 19, 20, 22, 23], "self": [3, 22], "collis": 3, "detect": 3, "onli": [3, 5, 8, 10, 17, 19, 21, 22], "100": [3, 5, 7, 15], "199": 3, "viapoint": 3, "respect": 3, "holereach": [3, 9, 11], "effector": [3, 5], "need": [3, 5, 8, 10, 18, 22], "narrow": 3, "hole": [3, 6], "colld": 3, "wall": [3, 6], "fancy_dmp": [3, 5, 11], "holereacherfixedgo": 3, "fix": [3, 5, 10], "attractor": 3, "30": 3, "add": [4, 8, 10, 19, 22], "coupl": 4, "new": [4, 11, 18, 19, 20, 23], "some": [4, 11, 14, 19], "exist": [4, 6, 8, 10, 11, 17, 18, 19, 22], "while": [4, 5, 15, 19, 20], "other": [4, 8, 10, 19, 22, 23], "were": 4, "build": [4, 22], "u": 4, "from": [4, 5, 6, 8, 9, 10, 14, 19, 20, 22, 23], "ground": 4, "push": [4, 6, 23], "boxpushingdens": [4, 5, 15, 23], "mujoco": [4, 9, 11, 15, 21, 23], "step": [4, 8, 9, 10, 11, 13, 14, 15, 17, 18, 20, 22, 23], "tabl": [4, 23], "tenni": [4, 23], "beer": 4, "pong": 4, "mp": [4, 8, 10, 11, 14, 17, 18, 19, 20, 23], "airhockei": [4, 23], "present": [5, 20, 23], "reinforc": [5, 6, 23], "util": 5, "versatil": 5, "franka": 5, "emika": 5, "panda": [5, 23], "arm": [5, 6], "boast": 5, "orient": 5, "defin": [5, 11, 18, 22], "its": [5, 10], "constrain": 5, "certain": 5, "along": 5, "encompass": 5, "full": [5, 8, 10, 11, 13, 19, 22, 23], "360": 5, "z": 5, "axi": [5, 14], "": [5, 20, 23], "mission": 5, "accuraci": 5, "centimet": 5, "0": [5, 8, 9, 10, 11, 13, 14, 15, 19, 22], "radian": 5, "sine": 5, "cosin": 5, "valu": [5, 9, 14, 19], "angl": 5, "quaternion": 5, "describ": 5, "each": [5, 19], "composit": 5, "function": [5, 9, 11], "serv": 5, "metric": 5, "It": [5, 8, 10, 11, 22], "account": 5, "distanc": 5, "rod": 5, "desir": [5, 15], "penalti": 5, "violat": 5, "well": [5, 19, 22], "cost": 5, "energi": 5, "expenditur": 5, "structur": [5, 6, 8, 10, 11], "purposefulli": 5, "enhanc": [5, 20], "gener": [5, 11, 15, 19, 20, 22, 23], "tempor": 5, "last": [5, 11], "timestep": 5, "spatial": 5, "almost": 5, "enought": 5, "somewhat": 5, "correctli": 5, "custom": [5, 8, 9, 10, 11, 15, 18, 19, 22, 23], "dens": 5, "13": 5, "boxpushingtemporalspars": [5, 11], "boxpushingtemporalspatialspars": 5, "offer": [5, 23], "equip": [5, 6], "respond": 5, "incom": 5, "return": [5, 8, 9, 10, 11, 12, 13, 19, 22], "accur": 5, "oppon": 5, "side": [5, 6], "meter": 5, "65": 5, "compris": [5, 6], "decis": 5, "consid": 5, "successfulli": 5, "complet": [5, 20], "land": 5, "also": [5, 6, 8, 9, 10, 11, 17, 18, 19, 21], "tight": 5, "margin": 5, "20": [5, 11], "reflect": 5, "condit": [5, 15], "whether": [5, 17, 22, 23], "wa": 5, "proxim": 5, "cater": 5, "addit": [5, 17, 18, 19], "overcom": 5, "tabletennis2d": 5, "2d": 5, "350": 5, "19": 5, "tabletennis2dreplan": 5, "replan": [5, 11, 19, 23], "tabletennis4d": [5, 11, 12], "4d": 5, "22": 5, "tabletennis4dreplan": [5, 11], "tabletenniswind": 5, "wind": 5, "effect": [5, 22], "tabletennisgoalswitch": 5, "switch": 5, "tabletenniswindreplan": [5, 11], "upon": [5, 23], "throw": 5, "place": [5, 6], "larg": 5, "establish": 5, "42": [5, 18], "05": [5, 14], "angular": 5, "rel": [5, 22], "bottom": 5, "current": [5, 6, 8, 10, 19, 20, 22], "method": [5, 8, 10, 11, 20, 23], "paramet": [5, 8, 10, 11, 18, 22, 23], "expand": 5, "weight": 5, "basi": [5, 11, 20], "durat": 5, "releas": 5, "implement": [5, 11, 19, 22], "form": 5, "squar": 5, "sum": [5, 11], "across": 5, "penal": 5, "excess": 5, "forc": 5, "encourag": [5, 23], "effici": [5, 6], "t": [5, 11, 14, 15], "befor": 5, "non": [5, 18], "markovian": 5, "compon": [5, 6], "assess": 5, "chosen": [5, 20], "ensur": 5, "fall": 5, "reason": 5, "overal": 5, "specif": [5, 13, 20], "success": 5, "determin": [5, 22], "conclus": 5, "showcas": 5, "abil": 5, "predict": [5, 20], "execut": [5, 11, 19, 20, 23], "popular": 5, "parti": [5, 21], "beerpong": 5, "300": 5, "29": 5, "beerpongstepbas": 5, "beerpongfixedreleas": 5, "modifi": 5, "gymnasium": [5, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 22, 23], "v2": [5, 6, 7, 9, 10, 13, 19], "reacherspars": 5, "same": [5, 8, 10, 11, 17, 18, 19, 22], "longreach": 5, "27": 5, "longreacherspars": 5, "reacher5d": [5, 9, 11, 14, 19], "env": [5, 6, 8, 9, 10, 11, 14, 15, 17, 18, 19, 22, 23], "reacherenv": 5, "reacher5dspars": 5, "reacher7d": 5, "reacher7dspars": 5, "hopperjumpspars": 5, "jump": 5, "250": [5, 8], "16": [5, 9], "hopperjump": 5, "continu": 5, "antjump": 5, "ant": 5, "119": 5, "halfcheetahjump": 5, "halfcheetah": [5, 9], "112": 5, "hopperjumponbox": 5, "hopperthrow": 5, "hopperthrowinbasket": 5, "walker2djump": 5, "walker2d": 5, "depend": [5, 20, 21], "most": 5, "variant": [5, 6, 19, 23], "refer": [5, 6, 7], "fancy_promp": [5, 11, 12, 14, 19, 23], "fancy_prodmp": [5, 11, 12, 15], "dial": 6, "turn": [6, 19], "open": [6, 19, 22], "sourc": [6, 17, 18], "benchmark": [6, 23], "meta": [6, 10], "multi": 6, "50": [6, 7], "divers": 6, "featur": 6, "univers": 6, "tabletop": 6, "sawyer": 6, "varieti": [6, 11], "everydai": 6, "share": 6, "pivot": 6, "reus": 6, "acquir": 6, "relat": 6, "make": [6, 8, 9, 10, 11, 12, 13, 14, 15, 19, 22, 23], "ml1": [6, 19], "standard": [6, 8, 10, 23], "assembli": 6, "assembl": 6, "39": 6, "basketbal": 6, "bin": 6, "pick": [6, 18], "button": [6, 10], "press": [6, 10], "topdown": 6, "down": 6, "perspect": 6, "coffe": 6, "machin": 6, "pull": 6, "lever": 6, "disassembl": 6, "door": 6, "lock": 6, "unlock": 6, "hand": [6, 22], "drawer": 6, "faucet": 6, "hammer": 6, "handl": [6, 14], "out": [6, 23], "back": [6, 11], "backward": 6, "plate": 6, "slide": 6, "unplug": 6, "soccer": 6, "stick": 6, "against": 6, "shelf": 6, "sweep": 6, "contain": 6, "window": 6, "metaworld_promp": [6, 10], "metaworld_prodmp": [6, 19], "now": [6, 10, 11], "lunar": 7, "lander": 7, "lunarland": 7, "we": [7, 8, 10, 11, 18, 19, 20, 21, 22, 23], "farama": [7, 21], "previous": 7, "openai": [7, 9, 19, 23], "doc": 7, "overview": 7, "counterpart": 7, "gym_promp": [7, 13, 19], "continuousmountaincar": 7, "fetchslidedens": 7, "v1": [7, 9, 10], "fetchreachdens": 7, "import": [8, 9, 10, 11, 12, 13, 14, 15, 19, 22, 23], "def": [8, 9, 10, 11, 12, 13, 15, 22], "example_dmc": 8, "env_id": [8, 9, 10, 11, 13, 14, 15], "seed": [8, 9, 10, 11, 13, 14, 15, 19], "iter": [8, 9, 10, 11, 15], "render": [8, 9, 10, 11, 13, 14, 15, 19, 23], "true": [8, 9, 10, 11, 12, 13, 14, 15, 17, 19], "dmc": [8, 9, 21, 23], "ha": [8, 10, 21, 22], "domain_nam": [8, 9], "task_nam": [8, 9, 10], "environment_nam": [8, 9], "arg": [8, 9, 10, 11, 13, 17, 18], "either": [8, 9, 14], "determinist": [8, 9, 10, 11], "behaviour": [8, 9, 10, 11], "number": [8, 9, 10, 11, 13, 15, 19, 22], "rollout": [8, 9, 10, 11], "ob": [8, 9, 10, 11, 13, 15], "reset": [8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "print": [8, 9, 10, 11, 13, 17, 19, 22], "shape": [8, 9, 10, 14, 22], "observation_spac": [8, 9, 10, 22], "action_spac": [8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "ac": [8, 10, 11, 13, 15, 22], "sampl": [8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "mode": [8, 10, 11, 13, 14, 15, 19], "human": [8, 10, 11, 13, 15, 19, 23], "termin": [8, 9, 10, 11, 13, 15, 19, 22, 23], "truncat": [8, 9, 10, 11, 13, 15, 19, 22, 23], "info": [8, 9, 10, 11, 13, 15, 19, 22, 23], "del": [8, 10, 15], "example_custom_dmc_and_mp": 8, "alreadi": [8, 10, 11, 13, 17, 18, 19, 22], "regist": [8, 10, 11, 13, 15, 18, 22, 23], "henc": [8, 10, 11, 19], "adjust": [8, 10, 11], "hyperparamet": [8, 10, 11], "yet": [8, 10, 11, 21, 22], "recommend": [8, 10, 11, 22, 23], "abov": [8, 9, 10, 11, 19], "you": [8, 10, 11, 17, 18, 19, 21, 22, 23], "just": [8, 10, 11, 19], "interest": [8, 10, 11], "chain": [8, 10], "those": [8, 10, 11, 21], "appreci": [8, 10, 11, 23], "pr": [8, 10, 11, 22, 23], "especi": [8, 10, 11], "repo": [8, 10, 11], "http": [8, 10, 11, 21, 23], "github": [8, 10, 11, 21, 23], "com": [8, 10, 11, 21, 23], "alrhub": [8, 10, 11, 21, 23], "accord": [8, 10], "base_env_id": [8, 10, 11, 15], "replac": [8, 10], "your": [8, 10, 14, 22, 23], "inherit": [8, 10], "rawinterfacewrapp": [8, 10, 17, 18, 22], "can": [8, 10, 11, 15, 17, 18, 19, 21, 22, 23], "case": [8, 10, 19, 22], "thei": [8, 10, 11, 20, 21], "suit": [8, 20, 23], "mpwrapper": [8, 10, 11, 15], "trajectory_generator_kwarg": [8, 10, 11, 15], "trajectory_generator_typ": [8, 10, 11, 15], "phase_generator_kwarg": [8, 10, 11, 15, 22], "phase_generator_typ": [8, 10, 11, 15, 22], "controller_kwarg": [8, 10, 11, 14, 15, 22], "controller_typ": [8, 10, 11, 15], "motor": 8, "p_gain": [8, 14, 22], "d_gain": [8, 14, 22], "basis_generator_kwarg": [8, 10, 11, 15, 22], "basis_generator_typ": [8, 10, 11, 15], "zero_rbf": [8, 10, 11], "num_basi": [8, 10, 11, 15, 22], "num_basis_zero_start": [8, 10, 11, 22], "exp": [8, 10, 11, 15], "alpha_phas": [8, 10, 11], "rbf": [8, 10, 11], "make_bb": [8, 10, 15], "black_box_kwarg": [8, 10, 15], "traj_gen_kwarg": [8, 10, 15], "phase_kwarg": [8, 10, 15], "basis_kwarg": [8, 10, 15], "call": [8, 10, 19], "onc": [8, 10, 19, 20], "begin": [8, 10, 19], "everi": [8, 10, 11, 19, 20], "consecut": [8, 10], "none": [8, 10, 11, 17, 18, 19], "possibl": [8, 10, 11], "chang": [8, 10, 11, 19, 22], "nth": [8, 10], "should": [8, 10, 18, 22], "displai": [8, 10], "__name__": [8, 9, 10, 11, 12, 13, 15], "__main__": [8, 9, 10, 11, 12, 13, 15], "disclaim": [8, 10], "afterward": [8, 10], "doe": [8, 10], "affect": [8, 10], "underli": [8, 10, 19], "export": [8, 10], "mujoco_gl": 8, "osmesa": 8, "vision": 8, "integr": [8, 22, 23], "yield": 8, "error": 8, "reach_site_featur": 8, "hybrid": [8, 10, 19], "framework": [8, 9, 10, 20, 22, 23], "dm_control_promp": 8, "becaus": [8, 10], "longer": [8, 19], "combo": 8, "collect": [9, 14, 19, 23], "defaultdict": 9, "numpi": [9, 14, 22], "np": [9, 14, 22], "example_gener": 9, "make_env": 9, "id": [9, 15, 17, 18, 19, 22], "example_async": 9, "n_cpu": 9, "int": [9, 22], "533d": 9, "n_sampl": 9, "800": 9, "vector": 9, "multiprocess": 9, "faster": 9, "Be": 9, "awar": 9, "reduc": 9, "total": [9, 19], "length": [9, 19], "individu": [9, 20], "cpu": 9, "core": 9, "parallel": 9, "tupl": [9, 22], "done": [9, 15], "type": [9, 17, 18, 19, 22], "ndarrai": [9, 22], "asyncvectorenv": 9, "make_rank": 9, "OR": 9, "plot": [9, 12, 14], "zero": [9, 14], "buffer": 9, "list": [9, 17, 18, 19], "would": 9, "than": 9, "request": 9, "num_env": 9, "repeat": 9, "ceil": 9, "append": 9, "f": [9, 14], "do": [9, 22], "threshold": 9, "map": 9, "lambda": [9, 15], "v": 9, "basic": [9, 10, 23], "example_meta": 10, "alwai": [10, 19], "found": [10, 19, 20, 23], "here": [10, 11, 19, 20, 22, 23], "arxiv": 10, "org": 10, "pdf": 10, "1910": 10, "10897": 10, "io": 10, "todo": [10, 14], "work": [10, 14, 19], "due": 10, "issu": [10, 19], "code": 10, "TO": 10, "BE": 10, "fals": [10, 11, 15, 17], "FOR": 10, "THE": 10, "remov": 10, "example_custom_meta_and_mp": 10, "goal_object_change_mp_wrapp": 10, "rais": [10, 19, 22], "valueerror": 10, "bug": 10, "temporari": 10, "workaround": 10, "alter": [10, 11], "mujocoenv": 10, "offscreen": 10, "mai": 10, "behavior": 10, "might": [10, 14], "necessari": [10, 19, 22], "opengl": 10, "ld_preload": 10, "usr": 10, "lib": 10, "x86_64": 10, "linux": 10, "gnu": 10, "libglew": 10, "so": [10, 22], "500": [10, 11], "buttonpress": 10, "example_mp": [11, 13], "env_nam": [11, 13, 15], "black": [11, 20, 23], "equival": 11, "have": [11, 20, 21, 22], "creat": [11, 17, 19, 23], "take": 11, "care": 11, "extern": 11, "render_mod": [11, 23], "els": [11, 13], "raw": [11, 17, 18], "parametr": [11, 20], "give": 11, "sub": [11, 19], "equal": 11, "default": [11, 17, 18, 19, 22], "over": 11, "wise": [11, 19], "aggreg": 11, "example_custom_mp": 11, "argument": [11, 17, 19], "mp_config_overrid": [11, 14, 17, 18], "wai": [11, 14, 19], "mani": 11, "class": [11, 17, 18, 22], "custom_mpwrapp": 11, "mp_config": [11, 22], "weights_scal": [11, 15], "example_fully_custom_mp": 11, "custom_env_id": 11, "custom_env_id_dmp": 11, "custom_env_id_promp": 11, "upgrad": [11, 17, 22, 23], "mp_wrapper": [11, 15, 17, 18, 22], "add_mp_typ": [11, 17, 18], "base_id": [11, 18], "try": [11, 19, 23], "don": 11, "correlcti": 11, "except": [11, 19], "pass": [11, 17], "example_fully_custom_mp_altern": 11, "instead": [11, 17, 18, 20, 22], "mp_arg": 11, "dure": 11, "registr": [11, 18], "main": 11, "prodmp": [11, 15, 17, 18, 19, 20, 22, 23], "boxpushingdensereplan": [11, 15], "obs1": 11, "compare_bases_shap": 12, "env1_id": 12, "env2_id": 12, "env1": 12, "traj_gen": [12, 13], "show_scaled_basi": 12, "env2": 12, "stuff": 13, "look": [13, 19, 22], "boolean": [13, 22], "ordereddict": 14, "matplotlib": 14, "pyplot": 14, "plt": 14, "howev": [14, 19, 22], "verifi": 14, "extract": 14, "below": 14, "w": 14, "po": [14, 15], "vel": [14, 15], "get_trajectori": 14, "base_shap": 14, "actual_po": 14, "len": 14, "actual_vel": 14, "act": 14, "ion": 14, "fig": 14, "figur": 14, "add_subplot": 14, "img": 14, "imshow": 14, "rgb_arrai": 14, "show": [14, 19], "des_po": 14, "des_vel": 14, "enumer": 14, "zip": 14, "tracking_control": 14, "get_act": 14, "current_po": [14, 22], "current_vel": [14, 22], "clip": 14, "low": 14, "set_data": 14, "canva": 14, "draw": 14, "flush_ev": 14, "figsiz": 14, "subplot": 14, "131": 14, "titl": [14, 23], "p1": 14, "c": 14, "c0": 14, "label": 14, "p2": 14, "c1": 14, "xlabel": 14, "gca": 14, "get_legend_handles_label": 14, "by_label": 14, "legend": 14, "kei": [14, 19], "132": 14, "133": 14, "std": 14, "example_run_replanning_env": 15, "example_custom_replanning_env": 15, "box_push": 15, "max_planning_tim": 15, "plan": 15, "replanning_schedul": 15, "trigger": 15, "condition_on_desir": 15, "boundari": [15, 23], "next": 15, "str": [17, 18], "entry_point": [17, 22], "union": [17, 22], "callabl": 17, "black_box": [17, 18], "raw_interface_wrapp": [17, 18], "registri": [17, 18], "defaultmpwrapp": [17, 18], "register_step_bas": 17, "bool": [17, 22], "dict": [17, 18], "kwarg": 17, "If": [17, 19, 21, 22, 23], "want": [17, 21, 23], "uniqu": [17, 18, 20], "identifi": [17, 18], "entri": 17, "srtep": 17, "dictionari": [17, 18, 19], "overrid": [17, 18], "keyword": 17, "constructor": 17, "note": [17, 18], "otherwis": [17, 18], "given": [17, 19, 22], "string": 17, "notat": 17, "warn": 17, "messag": 17, "suggest": 17, "exampl": [17, 18, 19, 22], "To": [17, 18, 19, 23], "myenv": [17, 18], "myenvclass": 17, "my_modul": 17, "expect": 18, "known_mp": 18, "Will": [18, 23], "match": [18, 22], "wish": 18, "one": [18, 22, 23], "alongsid": 18, "custommpwrapp": 18, "param": [18, 23], "prepar": 19, "ad": 19, "namespac": 19, "legaci": [19, 21], "metaworld": [19, 20, 21, 23], "n": 19, "cumul": 19, "part": [19, 22], "mainli": 19, "meant": 19, "debug": 19, "log": 19, "train": 19, "step_act": 19, "output": 19, "step_observ": 19, "intermedi": 19, "step_reward": 19, "trajectory_length": 19, "origin": 19, "In": [19, 22], "miss": 19, "fill": 19, "_": 19, "keep": 19, "mind": 19, "process": 19, "split": 19, "lean": 19, "still": [19, 22], "beta": 19, "feel": [19, 22], "problem": 19, "occur": 19, "directli": [19, 22], "gym_": 19, "again": 19, "conveni": 19, "variabl": 19, "store": 19, "all_movement_primitive_environ": 19, "all_fancy_movement_primitive_environ": 19, "all_gym_movement_primitive_environ": 19, "deepmind": [19, 23], "all_dmc_movement_primitive_environ": 19, "all_metaworld_movement_primitive_environ": 19, "movement_primitive_environments_for_n": 19, "my_custom_namespac": 19, "tradit": 20, "concept": 20, "stochast": 20, "search": 20, "commonli": 20, "produc": 20, "like": [20, 21], "probabilist": [20, 23], "convert": 20, "track": 20, "pd": [20, 23], "tailor": 20, "addition": 20, "special": 20, "overarch": 20, "remain": 20, "polici": 20, "craft": 20, "accommod": 20, "contextu": [20, 22], "At": 20, "onset": 20, "subset": 20, "demand": 20, "virtual": 21, "venv": 21, "3rd": 21, "altern": [21, 23], "poetri": 21, "conda": 21, "few": 21, "choos": 21, "box2d": 21, "jax": 21, "automat": 21, "date": 21, "sinc": 21, "git": 21, "d155d0051630bb365ea6a824e02c66c068947439": 21, "egg": 21, "clone": 21, "repositori": 21, "go": 21, "folder": 21, "cd": 21, "manual": 21, "guid": 22, "explain": 22, "how": 22, "abc": 22, "abstractmethod": 22, "properti": 22, "context_mask": 22, "mask": 22, "filter": 22, "unwant": 22, "unnecessari": 22, "after": 22, "first": 22, "receiv": 22, "arrai": 22, "indic": 22, "ones": 22, "dtype": 22, "float": 22, "exclus": 22, "regardless": 22, "indirectli": 22, "notimplementederror": 22, "overitten": 22, "attribut": 22, "document": 22, "mp_pytorch": 22, "userguid": 22, "anoth": 22, "merg": 22, "num_basis_zero_go": 22, "rough": 22, "outlin": 22, "shown": 22, "simpli": 22, "cool_new_env": 22, "my_custom_mpwrapp": 22, "my_custom_env": 22, "custom_prodmp": 22, "built": 23, "fork": 23, "renown": 23, "librari": 23, "sever": 23, "etc": 23, "With": 23, "straightforward": 23, "transform": 23, "compat": 23, "contribut": 23, "own": 23, "re": 23, "inspir": 23, "assist": 23, "highli": 23, "randomli": 23, "sleep": 23, "metadata": 23, "render_fp": 23, "about": 23, "pypi": 23, "master": 23, "what": 23, "usag": 23, "tune": 23, "public": 23, "softwar": 23, "author": 23, "otto": 23, "fabian": 23, "celik": 23, "onur": 23, "roth": 23, "dominik": 23, "zhou": 23, "hongyi": 23, "abstract": 23, "unifi": 23, "approach": 23, "url": 23, "organ": 23, "autonom": 23, "lab": 23, "alr": 23, "kit": 23}, "objects": {"fancy_gym": [[16, 0, 0, "-", "envs"], [17, 1, 1, "", "register"], [18, 1, 1, "", "upgrade"]]}, "objtypes": {"0": "py:module", "1": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"]}, "titleterms": {"api": [0, 23], "deepmind": [1, 8], "control": [1, 3, 8, 14], "dmc": 1, "step": [1, 3, 5, 6, 7, 19], "base": [1, 3, 5, 6, 7, 19], "environ": [1, 3, 5, 6, 7, 19, 22, 23], "mp": [1, 3, 5, 6, 7, 12, 22], "airhockei": 2, "classic": 3, "fanci": [4, 23], "mujoco": 5, "box": [5, 19], "push": 5, "tabl": 5, "tenni": 5, "beer": 5, "pong": 5, "variat": 5, "exist": 5, "metaworld": [6, 10], "gymnasium": 7, "exampl": [8, 9, 10, 11, 12, 13, 14, 15, 23], "gener": 9, "usag": [9, 19], "movement": 11, "primit": 11, "param": 12, "tune": [12, 14], "openai": 13, "env": [13, 16], "pd": 14, "gain": 14, "replan": 15, "fancy_gym": [16, 17, 18], "regist": 17, "upgrad": 18, "basic": 19, "black": 19, "what": 20, "i": 20, "episod": 20, "rl": 20, "instal": 21, "from": 21, "pypi": 21, "recommend": 21, "master": 21, "creat": 22, "new": 22, "gym": 23, "kei": 23, "featur": 23, "quickstart": 23, "guid": 23, "user": 23, "cite": 23, "project": 23, "icon": 23, "attribut": 23}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"API": [[0, "api"], [23, null]], "DeepMind Control (DMC)": [[1, "deepmind-control-dmc"]], "Step-Based Environments": [[1, "step-based-environments"], [3, "step-based-environments"], [5, "step-based-environments"], [6, "step-based-environments"], [7, "step-based-environments"], [19, "step-based-environments"]], "MP Environments": [[1, "mp-environments"], [3, "mp-environments"], [5, "mp-environments"], [6, "mp-environments"], [7, "mp-environments"]], "AirHockey": [[2, "airhockey"]], "Classic Control": [[3, "classic-control"]], "Fancy": [[4, "fancy"]], "Mujoco": [[5, "mujoco"]], "Box Pushing": [[5, "box-pushing"]], "Table Tennis": [[5, "table-tennis"]], "Beer Pong": [[5, "beer-pong"]], "Variations of existing environments": [[5, "variations-of-existing-environments"]], "Metaworld": [[6, "metaworld"]], "Gymnasium": [[7, "gymnasium"]], "DeepMind Control Examples": [[8, "deepmind-control-examples"]], "General Usage Examples": [[9, "general-usage-examples"]], "Metaworld Examples": [[10, "metaworld-examples"]], "Movement Primitives Examples": [[11, "movement-primitives-examples"]], "MP Params Tuning Example": [[12, "mp-params-tuning-example"]], "OpenAI Envs Examples": [[13, "openai-envs-examples"]], "PD Control Gain Tuning Example": [[14, "pd-control-gain-tuning-example"]], "Replanning Example": [[15, "replanning-example"]], "fancy_gym.envs": [[16, "module-fancy_gym.envs"]], "fancy_gym.register": [[17, "fancy-gym-register"]], "fancy_gym.upgrade": [[18, "fancy-gym-upgrade"]], "Basic Usage": [[19, "basic-usage"]], "Black-Box Environments": [[19, "black-box-environments"]], "What is Episodic RL?": [[20, "what-is-episodic-rl"]], "Installation": [[21, "installation"]], "Installation from PyPI (recommended)": [[21, "installation-from-pypi-recommended"]], "Installation from master": [[21, "installation-from-master"]], "Creating new MP Environments": [[22, "creating-new-mp-environments"]], "Fancy Gym": [[23, "fancy-gym"]], "Key Features": [[23, "key-features"]], "Quickstart Guide": [[23, "quickstart-guide"]], "User Guide": [[23, null]], "Environments": [[23, null]], "Examples": [[23, null]], "Citing the Project": [[23, "citing-the-project"]], "Icon Attribution": [[23, "icon-attribution"]]}, "indexentries": {"fancy_gym.envs": [[16, "module-fancy_gym.envs"]], "module": [[16, "module-fancy_gym.envs"]], "register() (in module fancy_gym)": [[17, "fancy_gym.register"]], "upgrade() (in module fancy_gym)": [[18, "fancy_gym.upgrade"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["api", "envs/dmc", "envs/fancy/airhockey", "envs/fancy/classic_control", "envs/fancy/index", "envs/fancy/mujoco", "envs/meta", "envs/open_ai", "examples/dmc", "examples/general", "examples/metaworld", "examples/movement_primitives", "examples/mp_params_tuning", "examples/open_ai", "examples/pd_control_gain_tuning", "examples/replanning_envs", "generated/fancy_gym.envs", "generated/fancy_gym.register", "generated/fancy_gym.upgrade", "guide/basic_usage", "guide/episodic_rl", "guide/installation", "guide/upgrading_envs", "index"], "filenames": ["api.rst", "envs/dmc.md", "envs/fancy/airhockey.rst", "envs/fancy/classic_control.md", "envs/fancy/index.rst", "envs/fancy/mujoco.md", "envs/meta.md", "envs/open_ai.md", "examples/dmc.rst", "examples/general.rst", "examples/metaworld.rst", "examples/movement_primitives.rst", "examples/mp_params_tuning.rst", "examples/open_ai.rst", "examples/pd_control_gain_tuning.rst", "examples/replanning_envs.rst", "generated/fancy_gym.envs.rst", "generated/fancy_gym.register.rst", "generated/fancy_gym.upgrade.rst", "guide/basic_usage.rst", "guide/episodic_rl.rst", "guide/installation.rst", "guide/upgrading_envs.rst", "index.rst"], "titles": ["API", "DeepMind Control (DMC)", "AirHockey", "Classic Control", "Fancy", "Mujoco", "Metaworld", "Gymnasium", "DeepMind Control Examples", "General Usage Examples", "Metaworld Examples", "Movement Primitives Examples", "MP Params Tuning Example", "OpenAI Envs Examples", "PD Control Gain Tuning Example", "Replanning Example", "fancy_gym.envs", "fancy_gym.register", "fancy_gym.upgrade", "Basic Usage", "What is Episodic RL?", "Installation", "Creating new MP Environments", "Fancy Gym"], "terms": {"These": [1, 2, 3, 5, 7, 20], "ar": [1, 2, 3, 4, 5, 7, 8, 10, 11, 14, 17, 19, 20, 21, 22], "wrapper": [1, 8, 10, 11, 15, 17, 18, 22], "select": [1, 7, 22], "order": 1, "us": [1, 2, 5, 6, 9, 11, 15, 17, 18, 19, 20, 21, 22, 23], "our": [1, 8, 9, 10, 11, 20, 23], "motion": [1, 5, 20], "primit": [1, 8, 10, 13, 17, 18, 20, 22, 23], "gym": [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 22], "interfac": [1, 6, 11, 22, 23], "them": [1, 5, 6, 7, 8, 10, 11, 19, 23], "when": [1, 5, 8, 9, 10, 17, 22], "instal": [1, 10, 23], "fancy_gym": [1, 6, 8, 9, 10, 11, 12, 13, 14, 15, 19, 21, 22, 23], "option": [1, 5, 17, 18, 19, 21], "extra": 1, "e": [1, 8, 10, 11, 21, 22], "g": [1, 8, 10, 11, 22], "pip": [1, 21, 23], "all": [1, 5, 6, 9, 10, 19, 21, 23], "regular": [1, 19, 23], "task": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 19, 22], "avaibl": [1, 6, 21], "via": [1, 3, 6, 19, 21, 22, 23], "shimmi": 1, "name": [1, 3, 5, 6, 7, 8, 10, 19], "descript": [1, 3, 5, 6, 7, 19], "action": [1, 3, 5, 6, 7, 8, 9, 10, 11, 14, 15, 19, 20, 22, 23], "dim": 1, "observ": [1, 2, 3, 5, 6, 8, 9, 10, 11, 19, 20, 22, 23], "dm_control": [1, 8, 19], "acrobot": 1, "swingup": 1, "v0": [1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 22, 23], "underactu": 1, "doubl": 1, "pendulum": [1, 9], "torqu": [1, 5, 20], "appli": [1, 5], "second": 1, "joint": [1, 5, 22], "swing": 1, "up": [1, 4, 6, 21], "balanc": 1, "1": [1, 5, 7, 8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "6": [1, 5, 6], "swingup_spars": 1, "similar": 1, "spars": [1, 5], "reward": [1, 3, 5, 8, 9, 10, 11, 13, 15, 19, 22, 23], "achiev": [1, 5, 8, 10], "ball_in_cup": [1, 8, 19], "catch": [1, 8, 19], "planar": 1, "ball": [1, 5], "cup": [1, 5], "where": [1, 2, 3, 6], "receptacl": 1, "must": [1, 6], "2": [1, 3, 5, 7, 8, 9, 10, 11, 13, 22], "8": [1, 5, 15], "cartpol": 1, "cart": 1, "pole": 1, "goal": [1, 3, 5, 10], "i": [1, 2, 5, 6, 8, 9, 10, 11, 13, 15, 17, 18, 19, 22, 23], "an": [1, 5, 6, 7, 8, 10, 17, 18, 19, 20, 22, 23], "unactu": 1, "move": 1, "start": [1, 22], "upright": 1, "5": [1, 3, 5, 8, 10, 11, 14, 15, 19, 22], "balance_spars": 1, "downward": 1, "requir": [1, 2, 3, 5, 6, 8, 10, 11, 19, 20, 22], "two_pol": 1, "extens": 1, "domain": 1, "two": [1, 5], "serial": 1, "connect": 1, "increas": [1, 9], "challeng": [1, 2, 5, 23], "three_pol": 1, "three": [1, 2], "further": [1, 19, 20], "11": [1, 8], "cheetah": 1, "run": [1, 8, 9, 10, 11, 13, 15], "biped": 1, "robot": [1, 2, 5, 6, 20, 23], "The": [1, 2, 3, 5, 6, 8, 10, 11, 17, 18, 19, 20, 22, 23], "proport": 1, "forward": 1, "veloc": [1, 5, 11, 14, 15, 19, 20, 22], "maximum": [1, 15], "speed": 1, "17": 1, "dog": 1, "stand": 1, "focus": [1, 2], "postur": 1, "38": 1, "223": 1, "walk": 1, "coordin": [1, 5], "movement": [1, 8, 10, 13, 17, 18, 20, 22, 23], "trot": 1, "perform": [1, 2, 5], "gait": 1, "combin": 1, "stabil": 1, "fetch": 1, "plai": [1, 5, 6], "involv": [1, 2, 6], "locomot": 1, "object": [1, 5, 6, 20], "interact": [1, 19], "232": 1, "finger": 1, "spin": 1, "rotat": 1, "bodi": 1, "hing": 1, "9": [1, 3], "turn_easi": 1, "align": [1, 5, 20], "tip": 1, "free": [1, 19, 22], "target": [1, 14], "easier": 1, "version": [1, 7, 8, 10, 13, 17, 18, 19, 21, 22], "larger": 1, "12": 1, "turn_hard": 1, "smaller": 1, "difficulti": [1, 23], "fish": [1, 8], "right": [1, 20], "itself": [1, 3], "fluid": 1, "21": [1, 5], "swim": [1, 8], "incorpor": 1, "dynam": [1, 2, 20, 23], "24": 1, "hopper": [1, 5], "One": 1, "leg": 1, "minim": 1, "torso": 1, "height": 1, "4": [1, 5, 6, 7, 9, 11, 15, 22], "15": [1, 5, 14], "hop": 1, "humanoid": 1, "simplifi": 1, "maintain": [1, 5, 19, 23], "67": 1, "specifi": [1, 5, 8, 10, 18], "aim": [1, 2], "high": [1, 3, 14], "horizont": 1, "run_pure_st": 1, "focu": [1, 3], "pure": 1, "state": [1, 15, 19], "55": 1, "humanoid_cmu": 1, "advanc": [1, 5, 6], "cmu": 1, "model": [1, 2], "56": 1, "137": 1, "lqr": 1, "lqr_2_1": 1, "linear": [1, 8, 10, 11, 22], "quadrat": 1, "regul": 1, "mass": 1, "actuat": [1, 2], "posit": [1, 5, 14, 19, 20, 22], "optim": [1, 20], "lqr_6_2": 1, "more": [1, 9, 13, 19, 20, 22, 23], "complex": [1, 2, 3, 5], "manipul": [1, 5, 6, 8, 9], "bring_bal": 1, "bring": 1, "locat": [1, 5], "initi": [1, 5], "variat": [1, 4], "44": 1, "bring_peg": 1, "peg": [1, 6], "insert_bal": 1, "insert": [1, 6], "basket": [1, 5], "insert_peg": 1, "slot": 1, "classic": [1, 4, 20, 23], "invert": 1, "limit": [1, 2, 5], "multipl": [1, 5, 8, 10, 11, 13, 18, 19, 22], "3": [1, 2, 5, 22], "point_mass": 1, "easi": [1, 22, 23], "point": [1, 3, 17, 22], "correspond": 1, "global": 1, "x": [1, 5], "y": [1, 5], "ax": [1, 5, 14], "hard": 1, "random": [1, 5], "gain": [1, 23], "per": [1, 5], "episod": [1, 5, 8, 9, 10, 11, 14, 19, 23], "memoryless": 1, "agent": [1, 2, 3], "quadrup": 1, "four": 1, "78": 1, "escap": 1, "environment": 1, "101": 1, "90": 1, "reacher": [1, 5, 7, 11, 13, 19], "link": [1, 3, 5], "sphere": 1, "stacker": 1, "stack_2": 1, "stack": [1, 9], "box": [1, 4, 6, 11, 20, 23], "correct": [1, 14], "placement": 1, "gripper": 1, "49": 1, "stack_4": 1, "63": 1, "swimmer": 1, "swimmer6": 1, "six": 1, "nose": 1, "insid": 1, "25": [1, 3, 5, 15], "swimmer15": 1, "fifteen": 1, "extend": 1, "14": 1, "61": 1, "walker": [1, 5], "trajectori": [1, 3, 7, 8, 10, 11, 13, 14, 19, 20, 22, 23], "horizon": [1, 3, 5, 6, 7], "dimens": [1, 3, 5, 6, 7, 22], "context": [1, 3, 5, 6, 11, 19, 20, 22], "dm_control_prodmp": 1, "A": [1, 3, 5, 6, 7, 22], "promp": [1, 7, 8, 10, 11, 13, 17, 18, 19, 20, 22, 23], "wrap": [1, 7], "1000": [1, 8, 9, 10, 11, 19, 23], "10": [1, 8, 9, 10, 11, 13, 23], "dm_control_dmp": [1, 19], "dmp": [1, 3, 6, 8, 9, 10, 11, 17, 18, 19, 20, 22, 23], "fanci": [2, 3, 5, 9, 11, 15, 19], "provid": [2, 3, 5, 7, 8, 10, 11, 17, 18, 19, 21], "access": [2, 19, 22, 23], "rang": [2, 5, 8, 9, 10, 11, 13, 15, 19, 22, 23], "environ": [2, 4, 8, 9, 10, 11, 13, 14, 15, 17, 18, 20, 21], "air": 2, "hockei": 2, "close": [2, 5, 6, 8, 10, 11, 15], "gap": 2, "between": [2, 5, 14, 19], "simul": [2, 3, 6], "learn": [2, 3, 5, 6, 11, 19, 20, 23], "real": [2, 14], "world": [2, 10], "applic": 2, "variou": [2, 5, 23], "aspect": 2, "oper": [2, 20], "deal": 2, "disturb": 2, "nois": 2, "safeti": 2, "avail": [2, 5, 19, 22], "through": [2, 11], "allow": [2, 3, 8, 10, 11, 17, 18, 19, 22], "develop": 2, "capabl": [2, 5], "differ": [2, 5, 8, 14, 18, 20], "level": [2, 19], "includ": [2, 5, 9, 17, 18, 23], "hit": [2, 5], "defend": 2, "both": [2, 22, 23], "degre": [2, 5, 23], "freedom": [2, 5], "dof": [2, 5], "seven": [2, 5], "7": [2, 5], "configur": [2, 5, 17, 18, 22], "base": [2, 4, 8, 9, 10, 11, 13, 15, 17, 18, 20, 22, 23], "kuka": 2, "iiwa14": 2, "which": [2, 3, 5, 8, 10, 11, 13, 17], "repres": [2, 20, 22], "higher": [2, 23], "control": [2, 4, 19, 20, 22, 23], "akin": 2, "set": [2, 8, 9, 10, 17, 19, 20, 23], "particip": 2, "strategi": 2, "enabl": [2, 11, 19], "react": 2, "adapt": [2, 4, 5], "within": [2, 5], "final": [2, 5], "phase": 2, "tournament": 2, "test": [2, 19, 21], "comprehens": [2, 5, 23], "game": [2, 5, 6], "scenario": 2, "top": [2, 5, 6], "team": 2, "actual": 2, "system": [2, 5], "For": [2, 5, 8, 10, 13, 22], "detail": [2, 19, 22], "inform": [2, 5, 13, 14, 19], "rule": 2, "stage": 2, "submiss": [2, 23], "pleas": [2, 14, 18, 22], "visit": 2, "offici": 2, "websit": 2, "follow": [2, 8, 10, 11, 22], "7dof": 2, "3dof": 2, "airhockit2023": 2, "foundat": [3, 5, 21, 23], "platform": 3, "explor": [3, 23], "experi": 3, "rl": [3, 5, 23], "algorithm": [3, 5], "design": [3, 4, 5, 6, 20], "simpl": 3, "research": [3, 5, 23], "practition": 3, "fundament": 3, "principl": 3, "without": [3, 19, 22], "dimension": [3, 22], "physic": 3, "simplereach": 3, "reach": [3, 5, 6, 19], "ani": [3, 9, 17, 18, 19], "until": 3, "150": [3, 6], "time": [3, 5, 8, 10, 11, 19, 23], "thi": [3, 5, 6, 8, 9, 10, 11, 14, 19, 20, 22, 23], "space": [3, 5, 11, 20, 22], "precis": [3, 5], "toward": 3, "end": [3, 5], "200": [3, 5, 9], "longsimplereach": 3, "18": [3, 5], "viapointreach": 3, "leverag": [3, 9], "support": [3, 6, 10, 19, 20, 22, 23], "self": [3, 22], "collis": 3, "detect": 3, "onli": [3, 5, 8, 10, 11, 17, 19, 21, 22], "100": [3, 5, 7, 15], "199": 3, "viapoint": 3, "respect": 3, "holereach": [3, 9, 11], "effector": [3, 5], "need": [3, 5, 8, 10, 18, 22], "narrow": 3, "hole": [3, 6], "colld": 3, "wall": [3, 6], "fancy_dmp": [3, 5, 11], "holereacherfixedgo": 3, "fix": [3, 5], "attractor": 3, "30": 3, "add": [4, 8, 10, 19, 22], "coupl": 4, "new": [4, 11, 18, 19, 20, 23], "some": [4, 11, 14, 19], "exist": [4, 6, 8, 10, 11, 17, 18, 19, 22], "while": [4, 5, 15, 19, 20], "other": [4, 8, 10, 19, 22, 23], "were": 4, "build": [4, 22], "u": 4, "from": [4, 5, 6, 8, 9, 10, 14, 19, 20, 22, 23], "ground": 4, "push": [4, 6, 23], "boxpushingdens": [4, 5, 15, 23], "mujoco": [4, 9, 11, 15, 21, 23], "step": [4, 8, 9, 10, 11, 13, 14, 15, 17, 18, 20, 22, 23], "tabl": [4, 23], "tenni": [4, 23], "beer": 4, "pong": 4, "mp": [4, 8, 10, 11, 14, 17, 18, 19, 20, 23], "airhockei": [4, 23], "present": [5, 20, 23], "reinforc": [5, 6, 23], "util": 5, "versatil": 5, "franka": 5, "emika": 5, "panda": [5, 23], "arm": [5, 6], "boast": 5, "orient": 5, "defin": [5, 11, 18, 22], "its": 5, "constrain": 5, "certain": 5, "along": 5, "encompass": 5, "full": [5, 8, 10, 11, 13, 19, 22, 23], "360": 5, "z": 5, "axi": [5, 14], "": [5, 20, 23], "mission": 5, "accuraci": 5, "centimet": 5, "0": [5, 8, 9, 10, 11, 13, 14, 15, 19, 22], "radian": 5, "sine": 5, "cosin": 5, "valu": [5, 9, 14, 19], "angl": 5, "quaternion": 5, "describ": 5, "each": [5, 19], "composit": 5, "function": [5, 9, 11], "serv": 5, "metric": 5, "It": [5, 8, 10, 11, 22], "account": 5, "distanc": 5, "rod": 5, "desir": [5, 15], "penalti": 5, "violat": 5, "well": [5, 19, 22], "cost": 5, "energi": 5, "expenditur": 5, "structur": [5, 6, 8, 10, 11], "purposefulli": 5, "enhanc": [5, 20], "gener": [5, 11, 15, 19, 20, 22, 23], "tempor": 5, "last": [5, 11], "timestep": 5, "spatial": 5, "almost": 5, "enought": 5, "somewhat": 5, "correctli": 5, "custom": [5, 8, 9, 10, 11, 15, 18, 19, 22, 23], "dens": 5, "13": 5, "boxpushingtemporalspars": [5, 11], "boxpushingtemporalspatialspars": 5, "offer": [5, 23], "equip": [5, 6], "respond": 5, "incom": 5, "return": [5, 8, 9, 10, 11, 12, 13, 19, 22], "accur": 5, "oppon": 5, "side": [5, 6], "meter": 5, "65": 5, "compris": [5, 6], "decis": 5, "consid": 5, "successfulli": 5, "complet": [5, 20], "land": 5, "also": [5, 6, 8, 9, 10, 11, 17, 18, 19, 21], "tight": 5, "margin": 5, "20": [5, 11], "reflect": 5, "condit": [5, 15], "whether": [5, 17, 22, 23], "wa": 5, "proxim": 5, "cater": 5, "addit": [5, 17, 18, 19], "overcom": 5, "tabletennis2d": 5, "2d": 5, "350": 5, "19": 5, "tabletennis2dreplan": 5, "replan": [5, 11, 19, 23], "tabletennis4d": [5, 11, 12], "4d": 5, "22": 5, "tabletennis4dreplan": [5, 11], "tabletenniswind": 5, "wind": 5, "effect": [5, 22], "tabletennisgoalswitch": 5, "switch": 5, "tabletenniswindreplan": [5, 11], "upon": [5, 23], "throw": 5, "place": [5, 6], "larg": 5, "establish": 5, "42": [5, 18], "05": [5, 14], "angular": 5, "rel": [5, 22], "bottom": 5, "current": [5, 6, 8, 10, 19, 20, 22], "method": [5, 8, 10, 11, 20, 23], "paramet": [5, 8, 10, 11, 18, 22, 23], "expand": 5, "weight": 5, "basi": [5, 11, 20], "durat": 5, "releas": 5, "implement": [5, 11, 19, 22], "form": 5, "squar": 5, "sum": [5, 11], "across": 5, "penal": 5, "excess": 5, "forc": 5, "encourag": [5, 23], "effici": [5, 6], "t": [5, 11, 14, 15], "befor": 5, "non": [5, 18], "markovian": 5, "compon": [5, 6], "assess": 5, "chosen": [5, 20], "ensur": 5, "fall": 5, "reason": 5, "overal": 5, "specif": [5, 13, 20], "success": 5, "determin": [5, 22], "conclus": 5, "showcas": 5, "abil": 5, "predict": [5, 20], "execut": [5, 11, 19, 20, 23], "popular": 5, "parti": [5, 21], "beerpong": 5, "300": 5, "29": 5, "beerpongstepbas": 5, "beerpongfixedreleas": 5, "modifi": 5, "gymnasium": [5, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 22, 23], "v2": [5, 6, 7, 9, 10, 13, 19], "reacherspars": 5, "same": [5, 8, 10, 11, 17, 18, 19, 22], "longreach": 5, "27": 5, "longreacherspars": 5, "reacher5d": [5, 9, 11, 14, 19], "env": [5, 6, 8, 9, 10, 11, 14, 15, 17, 18, 19, 22, 23], "reacherenv": 5, "reacher5dspars": 5, "reacher7d": 5, "reacher7dspars": 5, "hopperjumpspars": 5, "jump": 5, "250": [5, 8], "16": [5, 9], "hopperjump": 5, "continu": 5, "antjump": 5, "ant": 5, "119": 5, "halfcheetahjump": 5, "halfcheetah": [5, 9], "112": 5, "hopperjumponbox": 5, "hopperthrow": 5, "hopperthrowinbasket": 5, "walker2djump": 5, "walker2d": 5, "depend": [5, 20, 21], "most": 5, "variant": [5, 6, 19, 23], "refer": [5, 6, 7], "fancy_promp": [5, 11, 12, 14, 19, 23], "fancy_prodmp": [5, 11, 12, 15], "dial": 6, "turn": [6, 19], "open": [6, 19, 22], "sourc": [6, 17, 18], "benchmark": [6, 23], "meta": [6, 10], "multi": 6, "50": [6, 7], "divers": 6, "featur": 6, "univers": 6, "tabletop": 6, "sawyer": 6, "varieti": [6, 11], "everydai": 6, "share": 6, "pivot": 6, "reus": 6, "acquir": 6, "relat": 6, "make": [6, 8, 9, 10, 11, 12, 13, 14, 15, 19, 22, 23], "ml1": [6, 19], "standard": [6, 8, 10, 23], "assembli": 6, "assembl": 6, "39": 6, "basketbal": 6, "bin": 6, "pick": [6, 18], "button": [6, 10], "press": [6, 10], "topdown": 6, "down": 6, "perspect": 6, "coffe": 6, "machin": 6, "pull": 6, "lever": 6, "disassembl": 6, "door": 6, "lock": 6, "unlock": 6, "hand": [6, 22], "drawer": 6, "faucet": 6, "hammer": 6, "handl": [6, 14], "out": [6, 23], "back": [6, 11], "backward": 6, "plate": 6, "slide": 6, "unplug": 6, "soccer": 6, "stick": 6, "against": 6, "shelf": 6, "sweep": 6, "contain": 6, "window": 6, "metaworld_promp": [6, 10], "metaworld_prodmp": [6, 19], "now": [6, 11], "lunar": 7, "lander": 7, "lunarland": 7, "we": [7, 8, 10, 11, 18, 19, 20, 21, 22, 23], "farama": [7, 21], "previous": 7, "openai": [7, 9, 19, 23], "doc": 7, "overview": 7, "counterpart": 7, "gym_promp": [7, 13, 19], "continuousmountaincar": 7, "fetchslidedens": 7, "v1": [7, 9, 10], "fetchreachdens": 7, "import": [8, 9, 10, 11, 12, 13, 14, 15, 19, 22, 23], "def": [8, 9, 10, 11, 12, 13, 15, 22], "example_dmc": 8, "env_id": [8, 9, 10, 11, 13, 14], "seed": [8, 9, 10, 11, 13, 14, 15, 19], "iter": [8, 9, 10, 11, 15], "render": [8, 9, 10, 11, 13, 14, 15, 19, 23], "true": [8, 9, 10, 11, 12, 13, 14, 15, 17, 19], "dmc": [8, 9, 21, 23], "ha": [8, 10, 21, 22], "domain_nam": [8, 9], "task_nam": [8, 9, 10], "environment_nam": [8, 9], "arg": [8, 9, 10, 11, 13, 17, 18], "either": [8, 9, 14], "determinist": [8, 9, 10, 11], "behaviour": [8, 9, 10, 11], "number": [8, 9, 10, 11, 13, 15, 19, 22], "rollout": [8, 9, 10, 11], "render_mod": [8, 9, 10, 11, 13, 15, 23], "human": [8, 9, 10, 11, 13, 15, 19, 23], "els": [8, 9, 10, 11, 13, 15], "none": [8, 9, 10, 11, 13, 15, 17, 18, 19], "ob": [8, 9, 10, 11, 13, 15], "reset": [8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "print": [8, 9, 10, 11, 13, 17, 19, 22], "shape": [8, 9, 10, 14, 22], "observation_spac": [8, 9, 10, 22], "action_spac": [8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "ac": [8, 10, 11, 13, 15, 22], "sampl": [8, 9, 10, 11, 13, 14, 15, 19, 22, 23], "termin": [8, 9, 10, 11, 13, 15, 19, 22, 23], "truncat": [8, 9, 10, 11, 13, 15, 19, 22, 23], "info": [8, 9, 10, 11, 13, 15, 19, 22, 23], "del": [8, 10, 15], "example_custom_dmc_and_mp": 8, "alreadi": [8, 10, 11, 13, 17, 18, 19, 22], "regist": [8, 10, 11, 13, 15, 18, 22, 23], "henc": [8, 10, 11, 19], "adjust": [8, 10, 11], "hyperparamet": [8, 10, 11], "yet": [8, 10, 11, 21, 22], "recommend": [8, 10, 11, 22, 23], "abov": [8, 9, 10, 11, 19], "you": [8, 10, 11, 17, 18, 19, 21, 22, 23], "just": [8, 10, 11, 19], "interest": [8, 10, 11], "chain": [8, 10], "those": [8, 10, 11, 21], "appreci": [8, 10, 11, 23], "pr": [8, 10, 11, 22, 23], "especi": [8, 10, 11], "repo": [8, 10, 11], "http": [8, 10, 11, 21, 23], "github": [8, 10, 11, 21, 23], "com": [8, 10, 11, 21, 23], "alrhub": [8, 10, 11, 21, 23], "accord": [8, 10], "base_env_id": [8, 10, 11, 15], "replac": [8, 10], "your": [8, 10, 14, 22, 23], "inherit": [8, 10], "rawinterfacewrapp": [8, 10, 17, 18, 22], "can": [8, 10, 11, 15, 17, 18, 19, 21, 22, 23], "case": [8, 10, 19, 22], "thei": [8, 10, 11, 20, 21], "suit": [8, 20, 23], "mpwrapper": [8, 10, 11, 15], "trajectory_generator_kwarg": [8, 10, 11, 15], "trajectory_generator_typ": [8, 10, 11, 15], "phase_generator_kwarg": [8, 10, 11, 15, 22], "phase_generator_typ": [8, 10, 11, 15, 22], "controller_kwarg": [8, 10, 11, 14, 15, 22], "controller_typ": [8, 10, 11, 15], "motor": 8, "p_gain": [8, 14, 22], "d_gain": [8, 14, 22], "basis_generator_kwarg": [8, 10, 11, 15, 22], "basis_generator_typ": [8, 10, 11, 15], "zero_rbf": [8, 10, 11], "num_basi": [8, 10, 11, 15, 22], "num_basis_zero_start": [8, 10, 11, 22], "exp": [8, 10, 11, 15], "alpha_phas": [8, 10, 11], "rbf": [8, 10, 11], "base_env": [8, 10, 15], "make_bb": [8, 10, 15], "black_box_kwarg": [8, 10, 15], "traj_gen_kwarg": [8, 10, 15], "phase_kwarg": [8, 10, 15], "basis_kwarg": [8, 10, 15], "call": [8, 10, 11, 19], "onc": [8, 10, 11, 19, 20], "begin": [8, 10, 11, 19], "everi": [8, 10, 11, 19, 20], "consecut": [8, 10, 11], "mode": [8, 10, 11, 14, 19], "possibl": [8, 10, 11], "chang": [8, 10, 11, 19, 22], "nth": [8, 10], "should": [8, 10, 18, 22], "displai": [8, 10], "main": [8, 9, 10, 11, 13, 15], "fals": [8, 9, 10, 11, 15, 17], "disclaim": 8, "vision": 8, "integr": [8, 22, 23], "yield": 8, "error": 8, "reach_site_featur": 8, "hybrid": [8, 10, 19], "framework": [8, 9, 10, 20, 22, 23], "dm_control_promp": 8, "becaus": 8, "longer": [8, 19], "combo": 8, "__name__": [8, 9, 10, 11, 12, 13, 15], "__main__": [8, 9, 10, 11, 12, 13, 15], "collect": [9, 14, 19, 23], "defaultdict": 9, "numpi": [9, 14, 22], "np": [9, 14, 22], "example_gener": 9, "make_env": 9, "id": [9, 15, 17, 18, 19, 22], "example_async": 9, "n_cpu": 9, "int": [9, 22], "533d": 9, "n_sampl": 9, "800": 9, "vector": 9, "multiprocess": 9, "faster": 9, "Be": 9, "awar": 9, "reduc": 9, "total": [9, 19], "length": [9, 19], "individu": [9, 20], "cpu": 9, "core": 9, "parallel": 9, "tupl": [9, 22], "done": 9, "type": [9, 17, 18, 19, 22], "ndarrai": [9, 22], "asyncvectorenv": 9, "make_rank": 9, "OR": 9, "plot": [9, 12, 14], "zero": [9, 14], "buffer": 9, "list": [9, 17, 18, 19], "would": 9, "than": 9, "request": 9, "num_env": 9, "repeat": 9, "ceil": 9, "append": 9, "f": [9, 14], "do": [9, 22], "threshold": 9, "map": 9, "lambda": [9, 15], "v": 9, "basic": [9, 23], "example_meta": 10, "alwai": [10, 19], "found": [10, 19, 20, 23], "here": [10, 11, 19, 20, 22, 23], "arxiv": 10, "org": 10, "pdf": 10, "1910": 10, "10897": 10, "io": 10, "todo": [10, 14], "work": [10, 14, 19], "due": 10, "issu": [10, 19], "code": 10, "example_custom_meta_and_mp": 10, "goal_object_change_mp_wrapp": 10, "might": [10, 14], "necessari": [10, 19, 22], "opengl": 10, "export": 10, "ld_preload": 10, "usr": 10, "lib": 10, "x86_64": 10, "linux": 10, "gnu": 10, "libglew": 10, "so": [10, 22], "500": [10, 11], "example_mp": [11, 13], "env_nam": [11, 13, 15], "black": [11, 20, 23], "equival": 11, "have": [11, 20, 21, 22], "creat": [11, 17, 19, 23], "take": 11, "care": 11, "extern": 11, "raw": [11, 17, 18], "parametr": [11, 20], "give": 11, "sub": [11, 19], "equal": 11, "default": [11, 17, 18, 19, 22], "over": 11, "wise": [11, 19], "aggreg": 11, "example_custom_mp": 11, "argument": [11, 17, 19], "mp_config_overrid": [11, 14, 17, 18], "wai": [11, 14, 19], "mani": 11, "class": [11, 17, 18, 22], "custom_mpwrapp": 11, "mp_config": [11, 22], "weights_scal": [11, 15], "example_fully_custom_mp": 11, "custom_env_id": 11, "custom_env_id_dmp": 11, "custom_env_id_promp": 11, "upgrad": [11, 17, 22, 23], "mp_wrapper": [11, 15, 17, 18, 22], "add_mp_typ": [11, 17, 18], "base_id": [11, 18], "try": [11, 19, 23], "don": 11, "correlcti": 11, "except": [11, 19], "pass": [11, 17], "example_fully_custom_mp_altern": 11, "instead": [11, 17, 18, 20, 22], "mp_arg": 11, "dure": 11, "registr": [11, 18], "prodmp": [11, 15, 17, 18, 19, 20, 22, 23], "boxpushingdensereplan": [11, 15], "alter": 11, "obs1": 11, "compare_bases_shap": 12, "env1_id": 12, "env2_id": 12, "env1": 12, "traj_gen": [12, 13], "show_scaled_basi": 12, "env2": 12, "stuff": 13, "look": [13, 19, 22], "boolean": [13, 22], "ordereddict": 14, "matplotlib": 14, "pyplot": 14, "plt": 14, "howev": [14, 19, 22], "verifi": 14, "extract": 14, "below": 14, "w": 14, "po": [14, 15], "vel": [14, 15], "get_trajectori": 14, "base_shap": 14, "actual_po": 14, "len": 14, "actual_vel": 14, "act": 14, "ion": 14, "fig": 14, "figur": 14, "add_subplot": 14, "img": 14, "imshow": 14, "rgb_arrai": 14, "show": [14, 19], "des_po": 14, "des_vel": 14, "enumer": 14, "zip": 14, "tracking_control": 14, "get_act": 14, "current_po": [14, 22], "current_vel": [14, 22], "clip": 14, "low": 14, "set_data": 14, "canva": 14, "draw": 14, "flush_ev": 14, "figsiz": 14, "subplot": 14, "131": 14, "titl": [14, 23], "p1": 14, "c": 14, "c0": 14, "label": 14, "p2": 14, "c1": 14, "xlabel": 14, "gca": 14, "get_legend_handles_label": 14, "by_label": 14, "legend": 14, "kei": [14, 19], "132": 14, "133": 14, "std": 14, "example_run_replanning_env": 15, "break": 15, "example_custom_replanning_env": 15, "box_push": 15, "max_planning_tim": 15, "plan": 15, "replanning_schedul": 15, "trigger": 15, "condition_on_desir": 15, "boundari": [15, 23], "next": 15, "str": [17, 18], "entry_point": [17, 22], "union": [17, 22], "callabl": 17, "black_box": [17, 18], "raw_interface_wrapp": [17, 18], "registri": [17, 18], "defaultmpwrapp": [17, 18], "register_step_bas": 17, "bool": [17, 22], "dict": [17, 18], "kwarg": 17, "If": [17, 19, 21, 22, 23], "want": [17, 21, 23], "uniqu": [17, 18, 20], "identifi": [17, 18], "entri": 17, "srtep": 17, "dictionari": [17, 18, 19], "overrid": [17, 18], "keyword": 17, "constructor": 17, "note": [17, 18], "otherwis": [17, 18], "given": [17, 19, 22], "string": 17, "notat": 17, "warn": 17, "messag": 17, "suggest": 17, "exampl": [17, 18, 19, 22], "To": [17, 18, 19, 23], "myenv": [17, 18], "myenvclass": 17, "my_modul": 17, "expect": 18, "known_mp": 18, "Will": [18, 23], "match": [18, 22], "wish": 18, "one": [18, 22, 23], "alongsid": 18, "custommpwrapp": 18, "param": [18, 23], "prepar": 19, "ad": 19, "namespac": 19, "legaci": [19, 21], "rais": [19, 22], "metaworld": [19, 20, 21, 23], "n": 19, "cumul": 19, "part": [19, 22], "mainli": 19, "meant": 19, "debug": 19, "log": 19, "train": 19, "step_act": 19, "output": 19, "step_observ": 19, "intermedi": 19, "step_reward": 19, "trajectory_length": 19, "underli": 19, "origin": 19, "In": [19, 22], "miss": 19, "fill": 19, "_": 19, "keep": 19, "mind": 19, "process": 19, "split": 19, "lean": 19, "still": [19, 22], "beta": 19, "feel": [19, 22], "problem": 19, "occur": 19, "directli": [19, 22], "gym_": 19, "again": 19, "conveni": 19, "variabl": 19, "store": 19, "all_movement_primitive_environ": 19, "all_fancy_movement_primitive_environ": 19, "all_gym_movement_primitive_environ": 19, "deepmind": [19, 23], "all_dmc_movement_primitive_environ": 19, "all_metaworld_movement_primitive_environ": 19, "movement_primitive_environments_for_n": 19, "my_custom_namespac": 19, "tradit": 20, "concept": 20, "stochast": 20, "search": 20, "commonli": 20, "produc": 20, "like": [20, 21], "probabilist": [20, 23], "convert": 20, "track": 20, "pd": [20, 23], "tailor": 20, "addition": 20, "special": 20, "overarch": 20, "remain": 20, "polici": 20, "craft": 20, "accommod": 20, "contextu": [20, 22], "At": 20, "onset": 20, "subset": 20, "demand": 20, "virtual": 21, "venv": 21, "3rd": 21, "altern": [21, 23], "poetri": 21, "conda": 21, "few": 21, "choos": 21, "box2d": 21, "jax": 21, "automat": 21, "date": 21, "sinc": 21, "git": 21, "c822f28f582ba1ad49eb5dcf61016566f28003ba": 21, "egg": 21, "clone": 21, "repositori": 21, "go": 21, "folder": 21, "cd": 21, "manual": 21, "guid": 22, "explain": 22, "how": 22, "abc": 22, "abstractmethod": 22, "properti": 22, "context_mask": 22, "mask": 22, "filter": 22, "unwant": 22, "unnecessari": 22, "after": 22, "first": 22, "receiv": 22, "arrai": 22, "indic": 22, "ones": 22, "dtype": 22, "float": 22, "exclus": 22, "regardless": 22, "indirectli": 22, "notimplementederror": 22, "overitten": 22, "attribut": 22, "document": 22, "mp_pytorch": 22, "userguid": 22, "anoth": 22, "merg": 22, "num_basis_zero_go": 22, "rough": 22, "outlin": 22, "shown": 22, "simpli": 22, "cool_new_env": 22, "my_custom_mpwrapp": 22, "my_custom_env": 22, "custom_prodmp": 22, "built": 23, "fork": 23, "renown": 23, "librari": 23, "sever": 23, "etc": 23, "With": 23, "straightforward": 23, "transform": 23, "compat": 23, "contribut": 23, "own": 23, "re": 23, "inspir": 23, "assist": 23, "highli": 23, "randomli": 23, "sleep": 23, "metadata": 23, "render_fp": 23, "about": 23, "pypi": 23, "master": 23, "what": 23, "usag": 23, "tune": 23, "public": 23, "softwar": 23, "author": 23, "otto": 23, "fabian": 23, "celik": 23, "onur": 23, "roth": 23, "dominik": 23, "zhou": 23, "hongyi": 23, "abstract": 23, "unifi": 23, "approach": 23, "url": 23, "organ": 23, "autonom": 23, "lab": 23, "alr": 23, "kit": 23}, "objects": {"fancy_gym": [[16, 0, 0, "-", "envs"], [17, 1, 1, "", "register"], [18, 1, 1, "", "upgrade"]]}, "objtypes": {"0": "py:module", "1": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"]}, "titleterms": {"api": [0, 23], "deepmind": [1, 8], "control": [1, 3, 8, 14], "dmc": 1, "step": [1, 3, 5, 6, 7, 19], "base": [1, 3, 5, 6, 7, 19], "environ": [1, 3, 5, 6, 7, 19, 22, 23], "mp": [1, 3, 5, 6, 7, 12, 22], "airhockei": 2, "classic": 3, "fanci": [4, 23], "mujoco": 5, "box": [5, 19], "push": 5, "tabl": 5, "tenni": 5, "beer": 5, "pong": 5, "variat": 5, "exist": 5, "metaworld": [6, 10], "gymnasium": 7, "exampl": [8, 9, 10, 11, 12, 13, 14, 15, 23], "gener": 9, "usag": [9, 19], "movement": 11, "primit": 11, "param": 12, "tune": [12, 14], "openai": 13, "env": [13, 16], "pd": 14, "gain": 14, "replan": 15, "fancy_gym": [16, 17, 18], "regist": 17, "upgrad": 18, "basic": 19, "black": 19, "what": 20, "i": 20, "episod": 20, "rl": 20, "instal": 21, "from": 21, "pypi": 21, "recommend": 21, "master": 21, "creat": 22, "new": 22, "gym": 23, "kei": 23, "featur": 23, "quickstart": 23, "guid": 23, "user": 23, "cite": 23, "project": 23, "icon": 23, "attribut": 23}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"API": [[0, "api"], [23, null]], "DeepMind Control (DMC)": [[1, "deepmind-control-dmc"]], "Step-Based Environments": [[1, "step-based-environments"], [3, "step-based-environments"], [5, "step-based-environments"], [6, "step-based-environments"], [7, "step-based-environments"], [19, "step-based-environments"]], "MP Environments": [[1, "mp-environments"], [3, "mp-environments"], [5, "mp-environments"], [6, "mp-environments"], [7, "mp-environments"]], "AirHockey": [[2, "airhockey"]], "Classic Control": [[3, "classic-control"]], "Fancy": [[4, "fancy"]], "Mujoco": [[5, "mujoco"]], "Box Pushing": [[5, "box-pushing"]], "Table Tennis": [[5, "table-tennis"]], "Beer Pong": [[5, "beer-pong"]], "Variations of existing environments": [[5, "variations-of-existing-environments"]], "Metaworld": [[6, "metaworld"]], "Gymnasium": [[7, "gymnasium"]], "DeepMind Control Examples": [[8, "deepmind-control-examples"]], "General Usage Examples": [[9, "general-usage-examples"]], "Metaworld Examples": [[10, "metaworld-examples"]], "Movement Primitives Examples": [[11, "movement-primitives-examples"]], "MP Params Tuning Example": [[12, "mp-params-tuning-example"]], "OpenAI Envs Examples": [[13, "openai-envs-examples"]], "PD Control Gain Tuning Example": [[14, "pd-control-gain-tuning-example"]], "Replanning Example": [[15, "replanning-example"]], "fancy_gym.envs": [[16, "module-fancy_gym.envs"]], "fancy_gym.register": [[17, "fancy-gym-register"]], "fancy_gym.upgrade": [[18, "fancy-gym-upgrade"]], "Basic Usage": [[19, "basic-usage"]], "Black-Box Environments": [[19, "black-box-environments"]], "What is Episodic RL?": [[20, "what-is-episodic-rl"]], "Installation": [[21, "installation"]], "Installation from PyPI (recommended)": [[21, "installation-from-pypi-recommended"]], "Installation from master": [[21, "installation-from-master"]], "Creating new MP Environments": [[22, "creating-new-mp-environments"]], "Fancy Gym": [[23, "fancy-gym"]], "Key Features": [[23, "key-features"]], "Quickstart Guide": [[23, "quickstart-guide"]], "User Guide": [[23, null]], "Environments": [[23, null]], "Examples": [[23, null]], "Citing the Project": [[23, "citing-the-project"]], "Icon Attribution": [[23, "icon-attribution"]]}, "indexentries": {"fancy_gym.envs": [[16, "module-fancy_gym.envs"]], "module": [[16, "module-fancy_gym.envs"]], "register() (in module fancy_gym)": [[17, "fancy_gym.register"]], "upgrade() (in module fancy_gym)": [[18, "fancy_gym.upgrade"]]}}) \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 8ce3d31..0f17ef9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,13 +1,17 @@ # This conf.py is in large parts inspired by the oen used by stable-baselines 3 +import toml import datetime project = 'Fancy Gym' author = 'Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou' copyright = f'2020-{datetime.date.today().year}, {author}' -release = '0.2' # The full version, including alpha/beta/rc tags -version = '0.2' # The short X.Y version +pyproject_content = toml.load("../../pyproject.toml") +proj_version = pyproject_content["project"]["version"] + +release = proj_version # The full version, including alpha/beta/rc tags +version = proj_version # The short X.Y version extensions = [ 'myst_parser', @@ -50,4 +54,4 @@ html_context = { } def setup(app): - app.add_css_file("style.css") \ No newline at end of file + app.add_css_file("style.css") diff --git a/docs/source/guide/installation.rst b/docs/source/guide/installation.rst index 0077ba4..1c10973 100644 --- a/docs/source/guide/installation.rst +++ b/docs/source/guide/installation.rst @@ -32,7 +32,7 @@ since they are not avaible on PyPI yet. Install metaworld via .. code:: bash - pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld + pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld Installation from master ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -70,4 +70,4 @@ Metaworld has to be installed manually with .. code:: bash - pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld \ No newline at end of file + pip install metaworld@git+https://github.com/Farama-Foundation/Metaworld.git@c822f28f582ba1ad49eb5dcf61016566f28003ba#egg=metaworld diff --git a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py index 97cde0e..f89e0dd 100644 --- a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py +++ b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py @@ -115,6 +115,7 @@ class AntJumpEnv(AntEnvCustomXML): contact_force_range=contact_force_range, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs) + self.render_active = False def step(self, action): self.current_step += 1 @@ -153,8 +154,15 @@ class AntJumpEnv(AntEnvCustomXML): } truncated = False + if self.render_active and self.render_mode=='human': + self.render() + return obs, reward, terminated, truncated, info + def render(self): + self.render_active = True + return super().render() + def _get_obs(self): return np.append(super()._get_obs(), self.goal) diff --git a/fancy_gym/envs/mujoco/beerpong/beerpong.py b/fancy_gym/envs/mujoco/beerpong/beerpong.py index 802776f..e3af3fc 100644 --- a/fancy_gym/envs/mujoco/beerpong/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/beerpong.py @@ -44,6 +44,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + utils.EzPickle.__init__(self) self._steps = 0 # Small Context -> Easier. Todo: Should we do different versions? # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "beerpong_wo_cup.xml") @@ -89,7 +90,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): observation_space=self.observation_space, **kwargs ) - utils.EzPickle.__init__(self) + self.render_active = False @property def start_pos(self): @@ -169,8 +170,15 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): truncated = False + if self.render_active and self.render_mode=='human': + self.render() + return ob, reward, terminated, truncated, infos + def render(self): + self.render_active = True + return super().render() + def _get_obs(self): theta = self.data.qpos.flat[:7].copy() theta_dot = self.data.qvel.flat[:7].copy() diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index a508d84..5d5c653 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -4,6 +4,7 @@ import numpy as np from gymnasium import utils, spaces from gymnasium.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance +from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import calculate_jerk_profile, calculate_mean_squared_jerk, calculate_dimensionless_jerk, calculate_maximum_jerk @@ -62,6 +63,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): frame_skip=self.frame_skip, observation_space=self.observation_space, **kwargs) self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) + self.render_active = False def step(self, action): action = 10 * np.clip(action, self.action_space.low, self.action_space.high) @@ -116,8 +118,15 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): terminated = episode_end and infos['is_success'] truncated = episode_end and not infos['is_success'] + if self.render_active and self.render_mode=='human': + self.render() + return obs, reward, terminated, truncated, infos + def render(self): + self.render_active = True + return super().render() + def calculate_smoothness_metrics(self, velocity_profile, dt): """ Calculates the smoothness metrics for the given velocity profile. diff --git a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py index 088f959..24d855d 100644 --- a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py +++ b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py @@ -60,7 +60,11 @@ class HalfCheetahEnvCustomXML(HalfCheetahEnv): default_camera_config=DEFAULT_CAMERA_CONFIG, **kwargs, ) + self.render_active = False + def render(self): + self.render_active = True + return super().render() class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML): """ @@ -120,6 +124,9 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML): 'max_height': self.max_height } + if self.render_active and self.render_mode=='human': + self.render() + return observation, reward, terminated, truncated, info def _get_obs(self): diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index 96dd3a3..d6e7be3 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -88,6 +88,12 @@ class HopperEnvCustomXML(HopperEnv): **kwargs, ) + self.render_active = False + + def render(self): + self.render_active = True + return super().render() + class HopperJumpEnv(HopperEnvCustomXML): """ @@ -201,6 +207,10 @@ class HopperJumpEnv(HopperEnvCustomXML): healthy=self.is_healthy, contact_dist=self.contact_dist or 0 ) + + if self.render_active and self.render_mode=='human': + self.render() + return observation, reward, terminated, truncated, info def _get_obs(self): diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py index c0c57c2..b56840b 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -140,6 +140,9 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML): truncated = self.current_step >= self.max_episode_steps and not terminated + if self.render_active and self.render_mode=='human': + self.render() + return observation, reward, terminated, truncated, info def _get_obs(self): diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py index 7a39cd8..bf9169e 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py @@ -61,6 +61,8 @@ class HopperThrowEnv(HopperEnvCustomXML): exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs) + self.render_active = False + def step(self, action): self.current_step += 1 self.do_simulation(action, self.frame_skip) @@ -94,8 +96,15 @@ class HopperThrowEnv(HopperEnvCustomXML): } truncated = False + if self.render_active and self.render_mode=='human': + self.render() + return observation, reward, terminated, truncated, info + def render(self): + self.render_active = True + return super().render() + def _get_obs(self): return np.append(super()._get_obs(), self.goal) diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index 24ad402..56bbbec 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -68,6 +68,7 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML): reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs) + self.render_active = False def step(self, action): @@ -118,8 +119,15 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML): } truncated = False + if self.render_active and self.render_mode=='human': + self.render() + return observation, reward, terminated, truncated, info + def render(self): + self.render_active = True + return super().render() + def _get_obs(self): return np.append(super()._get_obs(), self.basket_x) diff --git a/fancy_gym/envs/mujoco/reacher/reacher.py b/fancy_gym/envs/mujoco/reacher/reacher.py index f5af7f6..f3901a6 100644 --- a/fancy_gym/envs/mujoco/reacher/reacher.py +++ b/fancy_gym/envs/mujoco/reacher/reacher.py @@ -47,6 +47,8 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): **kwargs ) + self.render_active = False + def step(self, action): self._steps += 1 @@ -77,8 +79,15 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): goal=self.goal if hasattr(self, "goal") else None ) + if self.render_active and self.render_mode=='human': + self.render() + return ob, reward, terminated, truncated, info + def render(self): + self.render_active = True + return super().render() + def distance_reward(self): vec = self.get_body_com("fingertip") - self.get_body_com("target") return -self._reward_weight * np.linalg.norm(vec) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index e6a14f8..a586305 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -83,6 +83,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): observation_space=self.observation_space, **kwargs) + self.render_active = False + if ctxt_dim == 2: self.context_bounds = CONTEXT_BOUNDS_2DIMS elif ctxt_dim == 4: @@ -170,8 +172,15 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS + if self.render_active and self.render_mode=='human': + self.render() + return self._get_obs(), reward, terminated, truncated, info + def render(self): + self.render_active = True + return super().render() + def _contact_checker(self, id_1, id_2): for coni in range(0, self.data.ncon): con = self.data.contact[coni] diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py index d9085ee..54ab00e 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -79,6 +79,8 @@ class Walker2dEnvCustomXML(Walker2dEnv): **kwargs, ) + self.render_active = False + class Walker2dJumpEnv(Walker2dEnvCustomXML): """ @@ -145,8 +147,15 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML): } truncated = False + if self.render_active and self.render_mode=='human': + self.render() + return observation, reward, terminated, truncated, info + def render(self): + self.render_active = True + return super().render() + def _get_obs(self): return np.append(super()._get_obs(), self.goal) diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index b06c970..47283a4 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -3,14 +3,14 @@ import fancy_gym def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False): - env = gym.make(env_name) + env = gym.make(env_name, render_mode='human' if render else None) env.reset(seed=seed) for i in range(iterations): while True: ac = env.action_space.sample() obs, reward, terminated, truncated, info = env.step(ac) if render: - env.render(mode="human") + env.render() if terminated or truncated: env.reset() break @@ -38,13 +38,13 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, 'condition_on_desired': True} - base_env = gym.make(base_env_id) + base_env = gym.make(base_env_id, render_mode='human' if render else None) env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, seed=seed) if render: - env.render(mode="human") + env.render() obs = env.reset() diff --git a/fancy_gym/examples/examples_dmc.py b/fancy_gym/examples/examples_dmc.py index 2703e01..f02b7d9 100644 --- a/fancy_gym/examples/examples_dmc.py +++ b/fancy_gym/examples/examples_dmc.py @@ -17,7 +17,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T Returns: """ - env = gym.make(env_id) + env = gym.make(env_id, render_mode='human' if render else None) rewards = 0 obs = env.reset(seed=seed) print("observation shape:", env.observation_space.shape) @@ -26,7 +26,7 @@ def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=T for i in range(iterations): ac = env.action_space.sample() if render: - env.render(mode="human") + env.render() obs, reward, terminated, truncated, info = env.step(ac) rewards += reward @@ -84,7 +84,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): # basis_generator_kwargs = {'basis_generator_type': 'rbf', # 'num_basis': 5 # } - base_env = gym.make(base_env_id) + base_env = gym.make(base_env_id, render_mode='human' if render else None) env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, @@ -96,7 +96,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): # It is also possible to change them mode multiple times when # e.g. only every nth trajectory should be displayed. if render: - env.render(mode="human") + env.render() rewards = 0 obs = env.reset() @@ -115,7 +115,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): env.close() del env -def main(render = True): +def main(render = False): # # Standard DMC Suite tasks example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) # diff --git a/fancy_gym/examples/examples_general.py b/fancy_gym/examples/examples_general.py index 9def5b6..aff6b5c 100644 --- a/fancy_gym/examples/examples_general.py +++ b/fancy_gym/examples/examples_general.py @@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True): """ - env = gym.make(env_id) + env = gym.make(env_id, render_mode='human' if render else None) rewards = 0 obs = env.reset(seed=seed) print("Observation shape: ", env.observation_space.shape) @@ -85,7 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16), # do not return values above threshold return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), -def main(render = True): +def main(render = False): # Basic gym task example_general("Pendulum-v1", seed=10, iterations=200, render=render) diff --git a/fancy_gym/examples/examples_metaworld.py b/fancy_gym/examples/examples_metaworld.py index bd87c2b..f8b59cd 100644 --- a/fancy_gym/examples/examples_metaworld.py +++ b/fancy_gym/examples/examples_metaworld.py @@ -2,7 +2,7 @@ import gymnasium as gym import fancy_gym -def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True): +def example_meta(env_id="metaworld/button-press-v2", seed=1, iterations=1000, render=True): """ Example for running a MetaWorld based env in the step based setting. The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always @@ -18,7 +18,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True): Returns: """ - env = gym.make(env_id) + env = gym.make(env_id, render_mode='human' if render else None) rewards = 0 obs = env.reset(seed=seed) print("observation shape:", env.observation_space.shape) @@ -27,9 +27,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True): for i in range(iterations): ac = env.action_space.sample() if render: - # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM - # TODO: Remove this, when Metaworld fixes its interface. - env.render(False) + env.render() obs, reward, terminated, truncated, info = env.step(ac) rewards += reward if terminated or truncated: @@ -81,7 +79,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): basis_generator_kwargs = {'basis_generator_type': 'rbf', 'num_basis': 5 } - base_env = gym.make(base_env_id) + base_env = gym.make(base_env_id, render_mode='human' if render else None) env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, @@ -93,7 +91,7 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): # It is also possible to change them mode multiple times when # e.g. only every nth trajectory should be displayed. if render: - env.render(mode="human") + env.render() rewards = 0 obs = env.reset(seed=seed) diff --git a/fancy_gym/examples/examples_open_ai.py b/fancy_gym/examples/examples_open_ai.py index f1688ef..5dbd10e 100644 --- a/fancy_gym/examples/examples_open_ai.py +++ b/fancy_gym/examples/examples_open_ai.py @@ -13,15 +13,13 @@ def example_mp(env_name, seed=1, render=True): Returns: """ - env = gym.make(env_name) + env = gym.make(env_name, render_mode='human' if render else None) returns = 0 obs = env.reset(seed=seed) # number of samples/full trajectories (multiple environment steps) for i in range(10): if render and i % 2 == 0: - env.render(mode="human") - else: env.render() ac = env.action_space.sample() obs, reward, terminated, truncated, info = env.step(ac) diff --git a/pyproject.toml b/pyproject.toml index 20e40b0..f18b987 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fancy_gym" -version = "0.1.4" +version = "0.3.0" description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches." readme = "README.md" authors = [ @@ -26,6 +26,7 @@ classifiers = [ ] dependencies = [ + "toml", "mp_pytorch<=0.1.3", "mujoco==2.3.3", "gymnasium[mujoco]>=0.26.0" diff --git a/setup.py b/setup.py index 2bd077d..2a533fa 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ # We still provide a setup.py for backwards compatability. # But the pyproject.toml should be prefered. +import toml import itertools from pathlib import Path from typing import List @@ -8,6 +9,9 @@ from setuptools import setup, find_packages print('[!] You are currently installing/building fancy_gym via setup.py. This is only provided for backwards-compatability. Please use the pyproject.toml instead.') +pyproject_content = toml.load("pyproject.toml") +project_version = pyproject_content["project"]["version"] + # Environment-specific dependencies for dmc and metaworld extras = { 'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'], @@ -38,7 +42,7 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]: setup( author='Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou', name='fancy_gym', - version='0.1.0', + version=project_version, classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Science/Research', @@ -55,6 +59,7 @@ setup( ], extras_require=extras, install_requires=[ + 'toml', 'mp_pytorch<=0.1.3', 'mujoco==2.3.3', 'gymnasium[mujoco]>=0.26.0'