From 0c7ac838bf9bf5ac3295a8002c5b8b381bf6592c Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 20 Oct 2022 10:10:44 +0200 Subject: [PATCH 001/198] first updats to reacher and env creation --- fancy_gym/envs/mujoco/reacher/reacher.py | 34 +++++++++++++++++++----- fancy_gym/utils/make_env_helpers.py | 5 +++- test/test_fancy_envs.py | 2 +- test/utils.py | 1 + 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/fancy_gym/envs/mujoco/reacher/reacher.py b/fancy_gym/envs/mujoco/reacher/reacher.py index ccd0073..c3c870b 100644 --- a/fancy_gym/envs/mujoco/reacher/reacher.py +++ b/fancy_gym/envs/mujoco/reacher/reacher.py @@ -3,6 +3,7 @@ import os import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv +from gym.spaces import Box MAX_EPISODE_STEPS_REACHER = 200 @@ -12,7 +13,17 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): More general version of the gym mujoco Reacher environment """ - def __init__(self, sparse: bool = False, n_links: int = 5, reward_weight: float = 1, ctrl_cost_weight: float = 1): + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + "render_fps": 50, + } + + def __init__(self, sparse: bool = False, n_links: int = 5, reward_weight: float = 1, ctrl_cost_weight: float = 1., + **kwargs): utils.EzPickle.__init__(**locals()) self._steps = 0 @@ -25,10 +36,16 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): file_name = f'reacher_{n_links}links.xml' + # sin, cos, velocity * n_Links + goal position (2) and goal distance (3) + shape = (self.n_links * 3 + 5,) + observation_space = Box(low=-np.inf, high=np.inf, shape=shape, dtype=np.float64) + MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", file_name), frame_skip=2, - mujoco_bindings="mujoco") + observation_space=observation_space, + **kwargs + ) def step(self, action): self._steps += 1 @@ -45,10 +62,14 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): reward = reward_dist + reward_ctrl + angular_vel self.do_simulation(action, self.frame_skip) - ob = self._get_obs() - done = False + if self.render_mode == "human": + self.render() - infos = dict( + ob = self._get_obs() + terminated = False + truncated = False + + info = dict( reward_dist=reward_dist, reward_ctrl=reward_ctrl, velocity=angular_vel, @@ -56,7 +77,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): goal=self.goal if hasattr(self, "goal") else None ) - return ob, reward, done, infos + return ob, reward, terminated, truncated, info def distance_reward(self): vec = self.get_body_com("fingertip") - self.get_body_com("target") @@ -66,6 +87,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): return -10 * np.square(self.data.qvel.flat[:self.n_links]).sum() if self.sparse else 0.0 def viewer_setup(self): + assert self.viewer is not None self.viewer.cam.trackbodyid = 0 def reset_model(self): diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 5221423..68bb66d 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -9,6 +9,7 @@ from typing import Iterable, Type, Union import gym import numpy as np from gym.envs.registration import register, registry +from gym.utils import seeding try: from dm_control import suite, manipulation @@ -88,7 +89,9 @@ def make(env_id: str, seed: int, **kwargs): else: env = make_gym(env_id, seed, **kwargs) - env.seed(seed) + np_random, _ = seeding.np_random(seed) + env.np_random = np_random + # env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) diff --git a/test/test_fancy_envs.py b/test/test_fancy_envs.py index 9acd696..7b7d5ca 100644 --- a/test/test_fancy_envs.py +++ b/test/test_fancy_envs.py @@ -6,7 +6,7 @@ import pytest from test.utils import run_env, run_env_determinism -CUSTOM_IDS = [spec.id for spec in gym.envs.registry.all() if +CUSTOM_IDS = [id for id, spec in gym.envs.registry.items() if "fancy_gym" in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] CUSTOM_MP_IDS = itertools.chain(*fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) SEED = 1 diff --git a/test/utils.py b/test/utils.py index 7ed8d61..88b56bc 100644 --- a/test/utils.py +++ b/test/utils.py @@ -24,6 +24,7 @@ def run_env(env_id, iterations=None, seed=0, render=False): actions = [] dones = [] obs = env.reset() + print(obs.dtype) verify_observations(obs, env.observation_space, "reset()") iterations = iterations or (env.spec.max_episode_steps or 1) From fbe3ef4a4b219a7cb0b5fcab81a6b57e5f9c38ef Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 12 Jan 2023 17:21:56 +0100 Subject: [PATCH 002/198] updated examples to new api, --- fancy_gym/examples/examples_dmc.py | 8 ++--- fancy_gym/examples/examples_general.py | 13 ++++--- fancy_gym/examples/examples_metaworld.py | 9 +++-- .../examples/examples_movement_primitives.py | 12 +++---- fancy_gym/examples/examples_open_ai.py | 4 +-- fancy_gym/examples/pd_control_gain_tuning.py | 2 +- setup.py | 8 +++-- test/test_dmc_envs.py | 36 ++++++++++--------- test/test_fancy_envs.py | 4 ++- test/test_gym_envs.py | 4 +-- test/utils.py | 29 ++++++++------- 11 files changed, 71 insertions(+), 58 deletions(-) diff --git a/fancy_gym/examples/examples_dmc.py b/fancy_gym/examples/examples_dmc.py index 75648b7..243bd70 100644 --- a/fancy_gym/examples/examples_dmc.py +++ b/fancy_gym/examples/examples_dmc.py @@ -26,10 +26,10 @@ def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True): ac = env.action_space.sample() if render: env.render(mode="human") - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) rewards += reward - if done: + if terminated or truncated: print(env_id, rewards) rewards = 0 obs = env.reset() @@ -102,10 +102,10 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): # number of samples/full trajectories (multiple environment steps) for i in range(iterations): ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) rewards += reward - if done: + if terminated or truncated: print(base_env_id, rewards) rewards = 0 obs = env.reset() diff --git a/fancy_gym/examples/examples_general.py b/fancy_gym/examples/examples_general.py index 1a89e30..383c4cf 100644 --- a/fancy_gym/examples/examples_general.py +++ b/fancy_gym/examples/examples_general.py @@ -1,6 +1,6 @@ from collections import defaultdict -import gym +import gymnasium as gym import numpy as np import fancy_gym @@ -29,13 +29,13 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True): # number of environment steps for i in range(iterations): - obs, reward, done, info = env.step(env.action_space.sample()) + obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) rewards += reward if render: env.render() - if done: + if terminated or truncated: print(rewards) rewards = 0 obs = env.reset() @@ -69,12 +69,15 @@ def example_async(env_id="HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samp # this would generate more samples than requested if n_samples % num_envs != 0 repeat = int(np.ceil(n_samples / env.num_envs)) for i in range(repeat): - obs, reward, done, info = env.step(env.action_space.sample()) + obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) buffer['obs'].append(obs) buffer['reward'].append(reward) - buffer['done'].append(done) + buffer['terminated'].append(terminated) + buffer['truncated'].append(truncated) buffer['info'].append(info) rewards += reward + + done = terminated or truncated if np.any(done): print(f"Reward at iteration {i}: {rewards[done]}") rewards[done] = 0 diff --git a/fancy_gym/examples/examples_metaworld.py b/fancy_gym/examples/examples_metaworld.py index 0fa7066..0c38bff 100644 --- a/fancy_gym/examples/examples_metaworld.py +++ b/fancy_gym/examples/examples_metaworld.py @@ -29,9 +29,9 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM # TODO: Remove this, when Metaworld fixes its interface. env.render(False) - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) rewards += reward - if done: + if terminated or truncated: print(env_id, rewards) rewards = 0 obs = env.reset() @@ -103,10 +103,10 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): # number of samples/full trajectories (multiple environment steps) for i in range(iterations): ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) rewards += reward - if done: + if terminated or truncated: print(base_env_id, rewards) rewards = 0 obs = env.reset() @@ -131,4 +131,3 @@ if __name__ == '__main__': # # # Custom MetaWorld task example_custom_dmc_and_mp(seed=10, iterations=1, render=render) - diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index da7c94d..6a8ad20 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -41,11 +41,11 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True # This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal # to the return of a trajectory. Default is the sum over the step-wise rewards. - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) # Aggregated returns returns += reward - if done: + if terminated or truncated: print(reward) obs = env.reset() @@ -79,10 +79,10 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render # number of samples/full trajectories (multiple environment steps) for i in range(iterations): ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) returns += reward - if done: + if terminated or truncated: print(i, reward) obs = env.reset() @@ -145,10 +145,10 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # number of samples/full trajectories (multiple environment steps) for i in range(iterations): ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) rewards += reward - if done: + if terminated or truncated: print(rewards) rewards = 0 obs = env.reset() diff --git a/fancy_gym/examples/examples_open_ai.py b/fancy_gym/examples/examples_open_ai.py index a4a162d..30ac1dd 100644 --- a/fancy_gym/examples/examples_open_ai.py +++ b/fancy_gym/examples/examples_open_ai.py @@ -24,10 +24,10 @@ def example_mp(env_name, seed=1, render=True): else: env.render(mode=None) ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) returns += reward - if done: + if terminated or truncated: print(returns) obs = env.reset() diff --git a/fancy_gym/examples/pd_control_gain_tuning.py b/fancy_gym/examples/pd_control_gain_tuning.py index 407bfa1..4cfae39 100644 --- a/fancy_gym/examples/pd_control_gain_tuning.py +++ b/fancy_gym/examples/pd_control_gain_tuning.py @@ -34,7 +34,7 @@ fig.show() for t, pos_vel in enumerate(zip(pos, vel)): actions = env.tracking_controller.get_action(pos_vel[0], pos_vel[1], env.current_vel, env.current_pos) actions = np.clip(actions, env.env.action_space.low, env.env.action_space.high) - _, _, _, _ = env.env.step(actions) + env.env.step(actions) if t % 15 == 0: img.set_data(env.env.render(mode="rgb_array")) fig.canvas.draw() diff --git a/setup.py b/setup.py index 1148e85..4a8b480 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,10 @@ extras = { "dmc": ["dm_control>=1.0.1"], "metaworld": ["metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld", 'mujoco-py<2.2,>=2.1', - 'scipy' + 'scipy', + 'gym>=0.15.4', ], + "mujoco": ["gymnasium[mujoco]"], } # All dependencies @@ -18,7 +20,7 @@ extras["all"] = list(set(itertools.chain.from_iterable(map(lambda group: extras[ setup( author='Fabian Otto, Onur Celik', name='fancy_gym', - version='0.2', + version='0.3', classifiers=[ # Python 3.7 is minimally supported "Programming Language :: Python :: 3", @@ -29,7 +31,7 @@ setup( ], extras_require=extras, install_requires=[ - 'gym[mujoco]<0.25.0,>=0.24.0', + 'gymnasium', 'mp_pytorch @ git+https://github.com/ALRhub/MP_PyTorch.git@main' ], packages=[package for package in find_packages() if package.startswith("fancy_gym")], diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py index 410f3c1..71b27a3 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc_envs.py @@ -1,39 +1,43 @@ from itertools import chain +from typing import Callable +import gymnasium as gym import pytest from dm_control import suite, manipulation import fancy_gym from test.utils import run_env, run_env_determinism -SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] -MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] +# SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] +# MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] +DM_CONTROL_IDS = [spec.id for spec in gym.envs.registry.values() if + not isinstance(spec.entry_point, Callable) and spec.entry_point.startswith('dm_control/')] DMC_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) SEED = 1 -@pytest.mark.parametrize('env_id', SUITE_IDS) -def test_step_suite_functionality(env_id: str): +@pytest.mark.parametrize('env_id', DM_CONTROL_IDS) +def test_step_dm_control_functionality(env_id: str): """Tests that suite step environments run without errors using random actions.""" run_env(env_id) -@pytest.mark.parametrize('env_id', SUITE_IDS) -def test_step_suite_determinism(env_id: str): +@pytest.mark.parametrize('env_id', DM_CONTROL_IDS) +def test_step_dm_control_determinism(env_id: str): """Tests that for step environments identical seeds produce identical trajectories.""" run_env_determinism(env_id, SEED) -@pytest.mark.parametrize('env_id', MANIPULATION_IDS) -def test_step_manipulation_functionality(env_id: str): - """Tests that manipulation step environments run without errors using random actions.""" - run_env(env_id) - - -@pytest.mark.parametrize('env_id', MANIPULATION_IDS) -def test_step_manipulation_determinism(env_id: str): - """Tests that for step environments identical seeds produce identical trajectories.""" - run_env_determinism(env_id, SEED) +# @pytest.mark.parametrize('env_id', MANIPULATION_IDS) +# def test_step_manipulation_functionality(env_id: str): +# """Tests that manipulation step environments run without errors using random actions.""" +# run_env(env_id) +# +# +# @pytest.mark.parametrize('env_id', MANIPULATION_IDS) +# def test_step_manipulation_determinism(env_id: str): +# """Tests that for step environments identical seeds produce identical trajectories.""" +# run_env_determinism(env_id, SEED) @pytest.mark.parametrize('env_id', DMC_MP_IDS) diff --git a/test/test_fancy_envs.py b/test/test_fancy_envs.py index 7b7d5ca..02208ce 100644 --- a/test/test_fancy_envs.py +++ b/test/test_fancy_envs.py @@ -1,12 +1,14 @@ import itertools +from typing import Callable import fancy_gym -import gym +import gymnasium as gym import pytest from test.utils import run_env, run_env_determinism CUSTOM_IDS = [id for id, spec in gym.envs.registry.items() if + not isinstance(spec.entry_point, Callable) and "fancy_gym" in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] CUSTOM_MP_IDS = itertools.chain(*fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) SEED = 1 diff --git a/test/test_gym_envs.py b/test/test_gym_envs.py index dae5944..20b089d 100644 --- a/test/test_gym_envs.py +++ b/test/test_gym_envs.py @@ -1,12 +1,12 @@ from itertools import chain -import gym +import gymnasium as gym import pytest import fancy_gym from test.utils import run_env, run_env_determinism -GYM_IDS = [spec.id for spec in gym.envs.registry.all() if +GYM_IDS = [spec.id for spec in gym.envs.registry.values() if "fancy_gym" not in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] GYM_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) SEED = 1 diff --git a/test/utils.py b/test/utils.py index 88b56bc..a57e58e 100644 --- a/test/utils.py +++ b/test/utils.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym import numpy as np from fancy_gym import make @@ -15,16 +15,16 @@ def run_env(env_id, iterations=None, seed=0, render=False): seed: random seeding render: Render the episode - Returns: observations, rewards, dones, actions + Returns: observations, rewards, terminations, truncations, actions """ env: gym.Env = make(env_id, seed=seed) rewards = [] observations = [] actions = [] - dones = [] - obs = env.reset() - print(obs.dtype) + terminations = [] + truncations = [] + obs, _ = env.reset() verify_observations(obs, env.observation_space, "reset()") iterations = iterations or (env.spec.max_episode_steps or 1) @@ -36,26 +36,28 @@ def run_env(env_id, iterations=None, seed=0, render=False): ac = env.action_space.sample() actions.append(ac) # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) verify_observations(obs, env.observation_space, "step()") verify_reward(reward) - verify_done(done) + verify_done(terminated) + verify_done(truncated) rewards.append(reward) - dones.append(done) + terminations.append(terminated) + truncations.append(truncated) if render: env.render("human") - if done: + if terminated or truncated: break - assert done, "Done flag is not True after end of episode." + assert terminated or truncated, "Termination or truncation flag is not True after end of episode." observations.append(obs) env.close() del env - return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + return np.array(observations), np.array(rewards), np.array(terminations), np.array(truncations), np.array(actions) def run_env_determinism(env_id: str, seed: int): @@ -63,11 +65,12 @@ def run_env_determinism(env_id: str, seed: int): traj2 = run_env(env_id, seed=seed) # Iterate over two trajectories, which should have the same state and action sequence for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + obs1, rwd1, term1, trunc1, ac1, obs2, rwd2, term2, trunc2, ac2 = time_step assert np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match." assert np.array_equal(ac1, ac2), f"Actions [{i}] {ac1} and {ac2} do not match." assert np.array_equal(rwd1, rwd2), f"Rewards [{i}] {rwd1} and {rwd2} do not match." - assert np.array_equal(done1, done2), f"Dones [{i}] {done1} and {done2} do not match." + assert np.array_equal(term1, term2), f"Terminateds [{i}] {term1} and {term2} do not match." + assert np.array_equal(term1, term2), f"Truncateds [{i}] {trunc1} and {trunc2} do not match." def verify_observations(obs, observation_space: gym.Space, obs_type="reset()"): From ed724046f3e3e097313501d4e69bdfe1f2b51469 Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 12 Jan 2023 17:22:45 +0100 Subject: [PATCH 003/198] updated custom tasks to new api --- fancy_gym/black_box/black_box_wrapper.py | 20 +-- fancy_gym/black_box/raw_interface_wrapper.py | 2 +- fancy_gym/envs/__init__.py | 2 +- .../base_reacher/base_reacher.py | 22 +-- .../base_reacher/base_reacher_direct.py | 7 +- .../base_reacher/base_reacher_torque.py | 7 +- .../hole_reacher/hole_reacher.py | 23 +-- .../simple_reacher/simple_reacher.py | 24 +-- .../viapoint_reacher/viapoint_reacher.py | 26 +--- fancy_gym/envs/mujoco/ant_jump/ant_jump.py | 20 +-- fancy_gym/envs/mujoco/beerpong/beerpong.py | 37 +++-- .../mujoco/beerpong/deprecated/beerpong.py | 44 ++---- .../half_cheetah_jump/half_cheetah_jump.py | 17 ++- .../envs/mujoco/hopper_jump/hopper_jump.py | 9 +- .../mujoco/hopper_jump/hopper_jump_on_box.py | 32 ++-- .../envs/mujoco/hopper_throw/hopper_throw.py | 34 ++--- .../hopper_throw/hopper_throw_in_basket.py | 34 ++--- fancy_gym/envs/mujoco/reacher/reacher.py | 6 +- .../mujoco/walker_2d_jump/walker_2d_jump.py | 36 ++--- fancy_gym/meta/__init__.py | 2 +- fancy_gym/open_ai/__init__.py | 2 +- fancy_gym/utils/make_env_helpers.py | 144 +++++++++++------- fancy_gym/utils/time_aware_observation.py | 62 +------- 23 files changed, 249 insertions(+), 363 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 68e2177..ee41479 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -1,8 +1,9 @@ -from typing import Tuple, Optional +from typing import Tuple, Optional, Dict, Any -import gym +import gymnasium as gym import numpy as np -from gym import spaces +from gymnasium import spaces +from gymnasium.core import ObsType from mp_pytorch.mp.mp_interfaces import MPInterface from fancy_gym.black_box.controller.base_controller import BaseController @@ -140,7 +141,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): for t, (pos, vel) in enumerate(zip(trajectory, velocity)): step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) - obs, c_reward, done, info = self.env.step(c_action) + obs, c_reward, terminated, truncated, info = self.env.step(c_action) rewards[t] = c_reward if self.verbose >= 2: @@ -155,8 +156,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps): + if terminated or truncated or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + t + 1 + self.current_traj_steps): break infos.update({k: v[:t] for k, v in infos.items()}) @@ -171,13 +172,14 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos['trajectory_length'] = t + 1 trajectory_return = self.reward_aggregation(rewards[:t + 1]) - return self.observation(obs), trajectory_return, done, infos + return self.observation(obs), trajectory_return, terminated, truncated, infos def render(self, **kwargs): """Only set render options here, such that they can be used during the rollout. This only needs to be called once""" self.render_kwargs = kwargs - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self.current_traj_steps = 0 - return super(BlackBoxWrapper, self).reset() + return super(BlackBoxWrapper, self).reset(seed=seed, options=options) diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py index 02945a1..b1a6aaa 100644 --- a/fancy_gym/black_box/raw_interface_wrapper.py +++ b/fancy_gym/black_box/raw_interface_wrapper.py @@ -1,6 +1,6 @@ from typing import Union, Tuple -import gym +import gymnasium as gym import numpy as np from mp_pytorch.mp.mp_interfaces import MPInterface diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 9f0299e..b0999e5 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -1,7 +1,7 @@ from copy import deepcopy import numpy as np -from gym import register +from gymnasium import register from . import classic_control, mujoco from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv diff --git a/fancy_gym/envs/classic_control/base_reacher/base_reacher.py b/fancy_gym/envs/classic_control/base_reacher/base_reacher.py index f2ba135..f0e0a3e 100644 --- a/fancy_gym/envs/classic_control/base_reacher/base_reacher.py +++ b/fancy_gym/envs/classic_control/base_reacher/base_reacher.py @@ -1,10 +1,10 @@ -from typing import Union, Tuple, Optional +from typing import Union, Tuple, Optional, Any, Dict -import gym +import gymnasium as gym import numpy as np -from gym import spaces -from gym.core import ObsType -from gym.utils import seeding +from gymnasium import spaces +from gymnasium.core import ObsType +from gymnasium.utils import seeding from fancy_gym.envs.classic_control.utils import intersect @@ -69,10 +69,14 @@ class BaseReacherEnv(gym.Env): def current_vel(self): return self._angle_velocity.copy() - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: # Sample only orientation of first link, i.e. the arm is always straight. - if self.random_start: + try: + random_start = options.get('random_start', self.random_start) + except AttributeError: + random_start = self.random_start + if random_start: first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) self._start_pos = self._joint_angles.copy() @@ -84,7 +88,7 @@ class BaseReacherEnv(gym.Env): self._update_joints() self._steps = 0 - return self._get_obs().copy() + return self._get_obs().copy(), {} def _update_joints(self): """ diff --git a/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py b/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py index ab21b39..6878922 100644 --- a/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py +++ b/fancy_gym/envs/classic_control/base_reacher/base_reacher_direct.py @@ -1,5 +1,5 @@ import numpy as np -from gym import spaces +from gymnasium import spaces from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv @@ -32,6 +32,7 @@ class BaseReacherDirectEnv(BaseReacherEnv): reward, info = self._get_reward(action) self._steps += 1 - done = self._terminate(info) + terminated = self._terminate(info) + truncated = False - return self._get_obs().copy(), reward, done, info + return self._get_obs().copy(), reward, terminated, truncated, info diff --git a/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py b/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py index 7364948..c9a7d4f 100644 --- a/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py +++ b/fancy_gym/envs/classic_control/base_reacher/base_reacher_torque.py @@ -1,5 +1,5 @@ import numpy as np -from gym import spaces +from gymnasium import spaces from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv @@ -31,6 +31,7 @@ class BaseReacherTorqueEnv(BaseReacherEnv): reward, info = self._get_reward(action) self._steps += 1 - done = False + terminated = False + truncated = False - return self._get_obs().copy(), reward, done, info + return self._get_obs().copy(), reward, terminated, truncated, info diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py index 5563ea6..c3e5020 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py +++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py @@ -1,9 +1,10 @@ -from typing import Union, Optional, Tuple +from typing import Union, Optional, Tuple, Any, Dict -import gym +import gymnasium as gym import matplotlib.pyplot as plt import numpy as np -from gym.core import ObsType +from gymnasium import spaces +from gymnasium.core import ObsType from matplotlib import patches from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv @@ -40,7 +41,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): [np.inf] # env steps, because reward start after n steps TODO: Maybe ]) # self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) if rew_fct == "simple": from fancy_gym.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward @@ -54,8 +55,8 @@ class HoleReacherEnv(BaseReacherDirectEnv): else: raise ValueError("Unknown reward function {}".format(rew_fct)) - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self._generate_hole() self._set_patches() self.reward_function.reset() @@ -225,14 +226,4 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.fig.gca().add_patch(hole_floor) -if __name__ == "__main__": - env = HoleReacherEnv(5) - env.reset() - - for i in range(10000): - ac = env.action_space.sample() - obs, rew, done, info = env.step(ac) - env.render() - if done: - env.reset() diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index 9b03147..4ef25ea 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -1,9 +1,9 @@ -from typing import Iterable, Union, Optional, Tuple +from typing import Iterable, Union, Optional, Tuple, Any, Dict import matplotlib.pyplot as plt import numpy as np -from gym import spaces -from gym.core import ObsType +from gymnasium import spaces +from gymnasium.core import ObsType from fancy_gym.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv @@ -42,11 +42,10 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): # def start_pos(self): # return self._start_pos - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self._generate_goal() - - return super().reset() + return super().reset(seed=seed, options=options) def _get_reward(self, action: np.ndarray): diff = self.end_effector - self._goal @@ -128,14 +127,3 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): self.fig.canvas.draw() self.fig.canvas.flush_events() - -if __name__ == "__main__": - env = SimpleReacherEnv(5) - env.reset() - for i in range(200): - ac = env.action_space.sample() - obs, rew, done, info = env.step(ac) - - env.render() - if done: - break diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py index f3412ac..ba5efd2 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -1,9 +1,10 @@ -from typing import Iterable, Union, Tuple, Optional +from typing import Iterable, Union, Tuple, Optional, Any, Dict -import gym +import gymnasium as gym import matplotlib.pyplot as plt import numpy as np -from gym.core import ObsType +from gymnasium import spaces +from gymnasium.core import ObsType from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv @@ -34,16 +35,16 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps ]) - self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) # @property # def start_pos(self): # return self._start_pos - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self._generate_goal() - return super().reset() + return super().reset(seed=seed, options=options) def _generate_goal(self): # TODO: Maybe improve this later, this can yield quite a lot of invalid settings @@ -185,14 +186,3 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): plt.pause(0.01) -if __name__ == "__main__": - - env = ViaPointReacherEnv(5) - env.reset() - - for i in range(10000): - ac = env.action_space.sample() - obs, rew, done, info = env.step(ac) - env.render() - if done: - env.reset() diff --git a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py index 9311ae1..fbf0804 100644 --- a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py +++ b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py @@ -1,8 +1,8 @@ -from typing import Tuple, Union, Optional +from typing import Tuple, Union, Optional, Any, Dict import numpy as np -from gym.core import ObsType -from gym.envs.mujoco.ant_v4 import AntEnv +from gymnasium.core import ObsType +from gymnasium.envs.mujoco.ant_v4 import AntEnv MAX_EPISODE_STEPS_ANTJUMP = 200 @@ -61,9 +61,10 @@ class AntJumpEnv(AntEnv): costs = ctrl_cost + contact_cost - done = bool(height < 0.3) # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle + terminated = bool( + height < 0.3) # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle - if self.current_step == MAX_EPISODE_STEPS_ANTJUMP or done: + if self.current_step == MAX_EPISODE_STEPS_ANTJUMP or terminated: # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled # height_reward = -10 * (np.linalg.norm(self.max_height - self.goal)) height_reward = -10 * np.linalg.norm(self.max_height - self.goal) @@ -80,19 +81,20 @@ class AntJumpEnv(AntEnv): 'max_height': self.max_height, 'goal': self.goal } + truncated = False - return obs, reward, done, info + return obs, reward, terminated, truncated, info def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self.current_step = 0 self.max_height = 0 # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE self.goal = self.np_random.uniform(1.0, 2.5, 1) - return super().reset() + return super().reset(seed=seed, options=options) # reset_model had to be implemented in every env to make it deterministic def reset_model(self): diff --git a/fancy_gym/envs/mujoco/beerpong/beerpong.py b/fancy_gym/envs/mujoco/beerpong/beerpong.py index 368425d..6a37e66 100644 --- a/fancy_gym/envs/mujoco/beerpong/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/beerpong.py @@ -1,9 +1,10 @@ import os -from typing import Optional +from typing import Optional, Any, Dict, Tuple import numpy as np -from gym import utils -from gym.envs.mujoco import MujocoEnv +from gymnasium import utils +from gymnasium.core import ObsType +from gymnasium.envs.mujoco import MujocoEnv MAX_EPISODE_STEPS_BEERPONG = 300 FIXED_RELEASE_STEP = 62 # empirically evaluated for frame_skip=2! @@ -30,7 +31,7 @@ CUP_COLLISION_OBJ = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "c class BeerPongEnv(MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, **kwargs): self._steps = 0 # Small Context -> Easier. Todo: Should we do different versions? # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "beerpong_wo_cup.xml") @@ -65,7 +66,13 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.ball_in_cup = False self.dist_ground_cup = -1 # distance floor to cup if first floor contact - MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=1, mujoco_bindings="mujoco") + MujocoEnv.__init__( + self, + self.xml_path, + frame_skip=1, + observation_space=self.observation_space, + **kwargs + ) utils.EzPickle.__init__(self) @property @@ -76,7 +83,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): def start_vel(self): return self._start_vel - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self.dists = [] self.dists_final = [] self.action_costs = [] @@ -86,7 +94,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.ball_cup_contact = False self.ball_in_cup = False self.dist_ground_cup = -1 # distance floor to cup if first floor contact - return super().reset() + return super().reset(seed=seed, options=options) def reset_model(self): init_pos_all = self.init_qpos.copy() @@ -128,11 +136,11 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): if not crash: reward, reward_infos = self._get_reward(applied_action) is_collided = reward_infos['is_collided'] # TODO: Remove if self collision does not make a difference - done = is_collided + terminated = is_collided self._steps += 1 else: reward = -30 - done = True + terminated = True reward_infos = {"success": False, "ball_pos": np.zeros(3), "ball_vel": np.zeros(3), "is_collided": False} infos = dict( @@ -142,7 +150,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): q_vel=self.data.qvel[0:7].ravel().copy(), sim_crash=crash, ) infos.update(reward_infos) - return ob, reward, done, infos + + truncated = False + + return ob, reward, terminated, truncated, infos def _get_obs(self): theta = self.data.qpos.flat[:7].copy() @@ -258,9 +269,9 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): return super(BeerPongEnvStepBasedEpisodicReward, self).step(a) else: reward = 0 - done = True + terminated, truncated = True, False while self._steps < MAX_EPISODE_STEPS_BEERPONG: - obs, sub_reward, done, infos = super(BeerPongEnvStepBasedEpisodicReward, self).step( + obs, sub_reward, terminated, truncated, infos = super(BeerPongEnvStepBasedEpisodicReward, self).step( np.zeros(a.shape)) reward += sub_reward - return obs, reward, done, infos + return obs, reward, terminated, truncated, infos diff --git a/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py b/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py index 015e887..2fc98ba 100644 --- a/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py @@ -2,8 +2,8 @@ import os import mujoco_py.builder import numpy as np -from gym import utils -from gym.envs.mujoco import MujocoEnv +from gymnasium import utils +from gymnasium.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.beerpong.deprecated.beerpong_reward_staged import BeerPongReward @@ -90,11 +90,11 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): if not crash: reward, reward_infos = self.reward_function.compute_reward(self, applied_action) is_collided = reward_infos['is_collided'] - done = is_collided or self._steps == self.ep_length - 1 + terminated = is_collided or self._steps == self.ep_length - 1 self._steps += 1 else: reward = -30 - done = True + terminated = True reward_infos = {"success": False, "ball_pos": np.zeros(3), "ball_vel": np.zeros(3), "is_collided": False} infos = dict( @@ -104,7 +104,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): q_vel=self.sim.data.qvel[0:7].ravel().copy(), sim_crash=crash, ) infos.update(reward_infos) - return ob, reward, done, infos + return ob, reward, terminated, infos def _get_obs(self): theta = self.sim.data.qpos.flat[:7] @@ -143,16 +143,16 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): return super(BeerPongEnvStepBasedEpisodicReward, self).step(a) else: reward = 0 - done = False - while not done: - sub_ob, sub_reward, done, sub_infos = super(BeerPongEnvStepBasedEpisodicReward, self).step( - np.zeros(a.shape)) + terminated, truncated = False, False + while not (terminated or truncated): + sub_ob, sub_reward, terminated, truncated, sub_infos = super(BeerPongEnvStepBasedEpisodicReward, + self).step(np.zeros(a.shape)) reward += sub_reward infos = sub_infos ob = sub_ob ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the # internal steps and thus, the observation also needs to be set correctly - return ob, reward, done, infos + return ob, reward, terminated, truncated, infos # class BeerBongEnvStepBased(BeerBongEnv): @@ -186,27 +186,3 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): # ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the # # internal steps and thus, the observation also needs to be set correctly # return ob, reward, done, infos - - -if __name__ == "__main__": - env = BeerPongEnv(frame_skip=2) - env.seed(0) - # env = BeerBongEnvStepBased(frame_skip=2) - # env = BeerBongEnvStepBasedEpisodicReward(frame_skip=2) - # env = BeerBongEnvFixedReleaseStep(frame_skip=2) - import time - - env.reset() - env.render("human") - for i in range(600): - # ac = 10 * env.action_space.sample() - ac = 0.05 * np.ones(7) - obs, rew, d, info = env.step(ac) - env.render("human") - - if d: - print('reward:', rew) - print('RESETTING') - env.reset() - time.sleep(1) - env.close() diff --git a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py index e0a5982..853c5e7 100644 --- a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py +++ b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py @@ -1,9 +1,9 @@ import os -from typing import Tuple, Union, Optional +from typing import Tuple, Union, Optional, Any, Dict import numpy as np -from gym.core import ObsType -from gym.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv +from gymnasium.core import ObsType +from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100 @@ -44,7 +44,8 @@ class HalfCheetahJumpEnv(HalfCheetahEnv): ## Didnt use fell_over, because base env also has no done condition - Paul and Marc # fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke? # TODO: Should a fall over be checked here? - done = False + terminated = False + truncated = False ctrl_cost = self.control_cost(action) costs = ctrl_cost @@ -63,17 +64,17 @@ class HalfCheetahJumpEnv(HalfCheetahEnv): 'max_height': self.max_height } - return observation, reward, done, info + return observation, reward, terminated, truncated, info def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self.max_height = 0 self.current_step = 0 self.goal = self.np_random.uniform(1.1, 1.6, 1) # 1.1 1.6 - return super().reset() + return super().reset(seed=seed, options=options) # overwrite reset_model to make it deterministic def reset_model(self): diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index da9ac4d..8ee4b11 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -1,7 +1,7 @@ import os import numpy as np -from gym.envs.mujoco.hopper_v4 import HopperEnv +from gymnasium.envs.mujoco.hopper_v4 import HopperEnv MAX_EPISODE_STEPS_HOPPERJUMP = 250 @@ -73,7 +73,7 @@ class HopperJumpEnv(HopperEnv): self.do_simulation(action, self.frame_skip) height_after = self.get_body_com("torso")[2] - #site_pos_after = self.data.get_site_xpos('foot_site') + # site_pos_after = self.data.get_site_xpos('foot_site') site_pos_after = self.data.site('foot_site').xpos self.max_height = max(height_after, self.max_height) @@ -88,7 +88,8 @@ class HopperJumpEnv(HopperEnv): ctrl_cost = self.control_cost(action) costs = ctrl_cost - done = False + terminated = False + truncated = False goal_dist = np.linalg.norm(site_pos_after - self.goal) if self.contact_dist is None and self.contact_with_floor: @@ -115,7 +116,7 @@ class HopperJumpEnv(HopperEnv): healthy=self.is_healthy, contact_dist=self.contact_dist or 0 ) - return observation, reward, done, info + return observation, reward, terminated, truncated, info def _get_obs(self): # goal_dist = self.data.get_site_xpos('foot_site') - self.goal diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py index f9834bd..a31e8ee 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -1,7 +1,9 @@ import os +from typing import Optional, Dict, Any, Tuple import numpy as np -from gym.envs.mujoco.hopper_v4 import HopperEnv +from gymnasium.core import ObsType +from gymnasium.envs.mujoco.hopper_v4 import HopperEnv MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250 @@ -74,10 +76,10 @@ class HopperJumpOnBoxEnv(HopperEnv): costs = ctrl_cost - done = fell_over or self.hopper_on_box + terminated = fell_over or self.hopper_on_box - if self.current_step >= self.max_episode_steps or done: - done = False + if self.current_step >= self.max_episode_steps or terminated: + done = False # TODO why are we doing this??? max_height = self.max_height.copy() min_distance = self.min_distance.copy() @@ -122,12 +124,13 @@ class HopperJumpOnBoxEnv(HopperEnv): 'goal': self.box_x, } - return observation, reward, done, info + return observation, reward, terminated, info def _get_obs(self): return np.append(super()._get_obs(), self.box_x) - def reset(self): + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self.max_height = 0 self.min_distance = 5000 @@ -136,7 +139,7 @@ class HopperJumpOnBoxEnv(HopperEnv): if self.context: self.box_x = self.np_random.uniform(1, 3, 1) self.model.body("box").pos = [self.box_x[0], 0, 0] - return super().reset() + return super().reset(seed=seed, options=options) # overwrite reset_model to make it deterministic def reset_model(self): @@ -151,20 +154,5 @@ class HopperJumpOnBoxEnv(HopperEnv): observation = self._get_obs() return observation -if __name__ == '__main__': - render_mode = "human" # "human" or "partial" or "final" - env = HopperJumpOnBoxEnv() - obs = env.reset() - for i in range(2000): - # objective.load_result("/tmp/cma") - # test with random actions - ac = env.action_space.sample() - obs, rew, d, info = env.step(ac) - if i % 10 == 0: - env.render(mode=render_mode) - if d: - print('After ', i, ' steps, done: ', d) - env.reset() - env.close() \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py index e69cea6..ed2bf96 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py @@ -1,8 +1,9 @@ import os -from typing import Optional +from typing import Optional, Any, Dict, Tuple import numpy as np -from gym.envs.mujoco.hopper_v4 import HopperEnv +from gymnasium.core import ObsType +from gymnasium.envs.mujoco.hopper_v4 import HopperEnv MAX_EPISODE_STEPS_HOPPERTHROW = 250 @@ -56,14 +57,14 @@ class HopperThrowEnv(HopperEnv): # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc ball_landed = bool(self.get_body_com("ball")[2] <= 0.05) - done = ball_landed + terminated = ball_landed ctrl_cost = self.control_cost(action) costs = ctrl_cost rewards = 0 - if self.current_step >= self.max_episode_steps or done: + if self.current_step >= self.max_episode_steps or terminated: distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \ self._forward_reward_weight * ball_pos_after healthy_reward = 0 if self.context else self.healthy_reward * self.current_step @@ -78,16 +79,18 @@ class HopperThrowEnv(HopperEnv): '_steps': self.current_step, 'goal': self.goal, } + truncated = False - return observation, reward, done, info + return observation, reward, terminated, truncated, info def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self.current_step = 0 self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1) # 0.5 8.0 - return super().reset() + return super().reset(seed=seed, options=options) # overwrite reset_model to make it deterministic def reset_model(self): @@ -103,20 +106,3 @@ class HopperThrowEnv(HopperEnv): return observation -if __name__ == '__main__': - render_mode = "human" # "human" or "partial" or "final" - env = HopperThrowEnv() - obs = env.reset() - - for i in range(2000): - # objective.load_result("/tmp/cma") - # test with random actions - ac = env.action_space.sample() - obs, rew, d, info = env.step(ac) - if i % 10 == 0: - env.render(mode=render_mode) - if d: - print('After ', i, ' steps, done: ', d) - env.reset() - - env.close() diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index 76ef861..439a677 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -1,8 +1,9 @@ import os -from typing import Optional +from typing import Optional, Any, Dict, Tuple import numpy as np -from gym.envs.mujoco.hopper_v4 import HopperEnv +from gymnasium.envs.mujoco.hopper_v4 import HopperEnv +from gymnasium.core import ObsType MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250 @@ -72,7 +73,7 @@ class HopperThrowInBasketEnv(HopperEnv): self.ball_in_basket = True ball_landed = self.get_body_com("ball")[2] <= 0.05 - done = bool(ball_landed or is_in_basket) + terminated = bool(ball_landed or is_in_basket) rewards = 0 @@ -80,7 +81,7 @@ class HopperThrowInBasketEnv(HopperEnv): costs = ctrl_cost - if self.current_step >= self.max_episode_steps or done: + if self.current_step >= self.max_episode_steps or terminated: if is_in_basket: if not self.context: @@ -101,13 +102,16 @@ class HopperThrowInBasketEnv(HopperEnv): info = { 'ball_pos': ball_pos[0], } + truncated = False - return observation, reward, done, info + return observation, reward, terminated, truncated, info def _get_obs(self): return np.append(super()._get_obs(), self.basket_x) - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: + if self.max_episode_steps == 10: # We have to initialize this here, because the spec is only added after creating the env. self.max_episode_steps = self.spec.max_episode_steps @@ -117,7 +121,7 @@ class HopperThrowInBasketEnv(HopperEnv): if self.context: self.basket_x = self.np_random.uniform(low=3, high=7, size=1) self.model.body("basket_ground").pos[:] = [self.basket_x[0], 0, 0] - return super().reset() + return super().reset(seed=seed, options=options) # overwrite reset_model to make it deterministic def reset_model(self): @@ -134,20 +138,4 @@ class HopperThrowInBasketEnv(HopperEnv): return observation -if __name__ == '__main__': - render_mode = "human" # "human" or "partial" or "final" - env = HopperThrowInBasketEnv() - obs = env.reset() - for i in range(2000): - # objective.load_result("/tmp/cma") - # test with random actions - ac = env.action_space.sample() - obs, rew, d, info = env.step(ac) - if i % 10 == 0: - env.render(mode=render_mode) - if d: - print('After ', i, ' steps, done: ', d) - env.reset() - - env.close() diff --git a/fancy_gym/envs/mujoco/reacher/reacher.py b/fancy_gym/envs/mujoco/reacher/reacher.py index c3c870b..8f5f893 100644 --- a/fancy_gym/envs/mujoco/reacher/reacher.py +++ b/fancy_gym/envs/mujoco/reacher/reacher.py @@ -1,9 +1,9 @@ import os import numpy as np -from gym import utils -from gym.envs.mujoco import MujocoEnv -from gym.spaces import Box +from gymnasium import utils +from gymnasium.envs.mujoco import MujocoEnv +from gymnasium.spaces import Box MAX_EPISODE_STEPS_REACHER = 200 diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py index ed663d2..cc9f2b4 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -1,8 +1,9 @@ import os -from typing import Optional +from typing import Optional, Any, Dict, Tuple import numpy as np -from gym.envs.mujoco.walker2d_v4 import Walker2dEnv +from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv +from gymnasium.core import ObsType MAX_EPISODE_STEPS_WALKERJUMP = 300 @@ -54,13 +55,13 @@ class Walker2dJumpEnv(Walker2dEnv): self.max_height = max(height, self.max_height) - done = bool(height < 0.2) + terminated = bool(height < 0.2) ctrl_cost = self.control_cost(action) costs = ctrl_cost rewards = 0 - if self.current_step >= self.max_episode_steps or done: - done = True + if self.current_step >= self.max_episode_steps or terminated: + terminated = True height_goal_distance = -10 * (np.linalg.norm(self.max_height - self.goal)) healthy_reward = self.healthy_reward * self.current_step @@ -73,17 +74,19 @@ class Walker2dJumpEnv(Walker2dEnv): 'max_height': self.max_height, 'goal': self.goal, } + truncated = False - return observation, reward, done, info + return observation, reward, terminated, truncated, info def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: self.current_step = 0 self.max_height = 0 self.goal = self.np_random.uniform(1.5, 2.5, 1) # 1.5 3.0 - return super().reset() + return super().reset(seed=seed, options=options) # overwrite reset_model to make it deterministic def reset_model(self): @@ -98,20 +101,3 @@ class Walker2dJumpEnv(Walker2dEnv): observation = self._get_obs() return observation - -if __name__ == '__main__': - render_mode = "human" # "human" or "partial" or "final" - env = Walker2dJumpEnv() - obs = env.reset() - - for i in range(6000): - # test with random actions - ac = env.action_space.sample() - obs, rew, d, info = env.step(ac) - if i % 10 == 0: - env.render(mode=render_mode) - if d: - print('After ', i, ' steps, done: ', d) - env.reset() - - env.close() diff --git a/fancy_gym/meta/__init__.py b/fancy_gym/meta/__init__.py index 04438f4..f30684f 100644 --- a/fancy_gym/meta/__init__.py +++ b/fancy_gym/meta/__init__.py @@ -1,6 +1,6 @@ from copy import deepcopy -from gym import register +from gymnasium import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper diff --git a/fancy_gym/open_ai/__init__.py b/fancy_gym/open_ai/__init__.py index 3e0b770..ab15cb2 100644 --- a/fancy_gym/open_ai/__init__.py +++ b/fancy_gym/open_ai/__init__.py @@ -1,6 +1,6 @@ from copy import deepcopy -from gym import register +from gymnasium import register from . import mujoco from .deprecated_needs_gym_robotics import robotics diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 68bb66d..50aa38f 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -1,18 +1,17 @@ import logging -import re import uuid from collections.abc import MutableMapping from copy import deepcopy from math import ceil from typing import Iterable, Type, Union -import gym +import gymnasium as gym import numpy as np -from gym.envs.registration import register, registry -from gym.utils import seeding +from gymnasium.envs.registration import register, registry try: from dm_control import suite, manipulation + from shimmy.dm_control_compatibility import EnvType except ImportError: pass @@ -83,15 +82,20 @@ def make(env_id: str, seed: int, **kwargs): if framework == 'metaworld': # MetaWorld environment env = make_metaworld(env_id, seed, **kwargs) - elif framework == 'dmc': - # DeepMind Control environment - env = make_dmc(env_id, seed, **kwargs) + # elif framework == 'dmc': + # Deprecated: With shimmy gym now has native support for deepmind envs + # # DeepMind Control environment + # env = make_dmc(env_id, seed, **kwargs) else: env = make_gym(env_id, seed, **kwargs) - np_random, _ = seeding.np_random(seed) - env.np_random = np_random - # env.seed(seed) + # try: + env.reset(seed=seed) + # except TypeError: + # # Support for older gym envs that do not have seeding + # # env.seed(seed) + # np_random, _ = seeding.np_random(seed) + # env.np_random = np_random env.action_space.seed(seed) env.observation_space.seed(seed) @@ -161,7 +165,7 @@ def make_bb( traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(env.action_space.shape).item()) if black_box_kwargs.get('duration') is None: - black_box_kwargs['duration'] = env.spec.max_episode_steps * env.dt + black_box_kwargs['duration'] = get_env_duration(env) if phase_kwargs.get('tau') is None: phase_kwargs['tau'] = black_box_kwargs['duration'] @@ -180,6 +184,24 @@ def make_bb( return bb_env +def get_env_duration(env: gym.Env): + try: + # TODO Remove if this is in the compatibility class + duration = env.spec.max_episode_steps * env.dt + except (AttributeError, TypeError) as e: + logging.error(f'Attributes env.spec.max_episode_steps and env.dt are not available. ' + f'Assuming you are using dm_control. Please make sure you have ran ' + f'"pip install shimmy[dm_control]" for that.') + if env.env_type is EnvType.COMPOSER: + max_episode_steps = ceil(env.unwrapped._time_limit / env.dt) + elif env.env_type is EnvType.RL_CONTROL: + max_episode_steps = int(env.unwrapped._step_limit) + else: + raise e + duration = max_episode_steps * env.control_timestep() + return duration + + def make_bb_env_helper(**kwargs): """ Helper function for registering a black box gym environment. @@ -229,52 +251,53 @@ def make_bb_env_helper(**kwargs): basis_kwargs=basis_kwargs, **kwargs, seed=seed) -def make_dmc( - env_id: str, - seed: int = None, - visualize_reward: bool = True, - time_limit: Union[None, float] = None, - **kwargs -): - if not re.match(r"\w+-\w+", env_id): - raise ValueError("env_id does not have the following structure: 'domain_name-task_name'") - domain_name, task_name = env_id.split("-") - - if task_name.endswith("_vision"): - # TODO - raise ValueError("The vision interface for manipulation tasks is currently not supported.") - - if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL: - raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.') - - # env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1' - gym_id = uuid.uuid4().hex + '-v1' - - task_kwargs = {'random': seed} - if time_limit is not None: - task_kwargs['time_limit'] = time_limit - - # create task - # Accessing private attribute because DMC does not expose time_limit or step_limit. - # Only the current time_step/time as well as the control_timestep can be accessed. - if domain_name == "manipulation": - env = manipulation.load(environment_name=task_name, seed=seed) - max_episode_steps = ceil(env._time_limit / env.control_timestep()) - else: - env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs, - visualize_reward=visualize_reward, environment_kwargs=kwargs) - max_episode_steps = int(env._step_limit) - - register( - id=gym_id, - entry_point='fancy_gym.dmc.dmc_wrapper:DMCWrapper', - kwargs={'env': lambda: env}, - max_episode_steps=max_episode_steps, - ) - - env = gym.make(gym_id) - env.seed(seed) - return env +# Deprecated: With shimmy gym now has native support for deepmind envs +# def make_dmc( +# env_id: str, +# seed: int = None, +# visualize_reward: bool = True, +# time_limit: Union[None, float] = None, +# **kwargs +# ): +# if not re.match(r"\w+-\w+", env_id): +# raise ValueError("env_id does not have the following structure: 'domain_name-task_name'") +# domain_name, task_name = env_id.split("-") +# +# if task_name.endswith("_vision"): +# # TODO +# raise ValueError("The vision interface for manipulation tasks is currently not supported.") +# +# if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL: +# raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.') +# +# # env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1' +# gym_id = uuid.uuid4().hex + '-v1' +# +# task_kwargs = {'random': seed} +# if time_limit is not None: +# task_kwargs['time_limit'] = time_limit +# +# # create task +# # Accessing private attribute because DMC does not expose time_limit or step_limit. +# # Only the current time_step/time as well as the control_timestep can be accessed. +# if domain_name == "manipulation": +# env = manipulation.load(environment_name=task_name, seed=seed) +# max_episode_steps = ceil(env._time_limit / env.control_timestep()) +# else: +# env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs, +# visualize_reward=visualize_reward, environment_kwargs=kwargs) +# max_episode_steps = int(env._step_limit) +# +# register( +# id=gym_id, +# entry_point='fancy_gym.dmc.dmc_wrapper:DMCWrapper', +# kwargs={'env': lambda: env}, +# max_episode_steps=max_episode_steps, +# ) +# +# env = gym.make(gym_id) +# env.seed(seed) +# return env def make_metaworld(env_id: str, seed: int, **kwargs): @@ -288,12 +311,17 @@ def make_metaworld(env_id: str, seed: int, **kwargs): # New argument to use global seeding _env.seeded_rand_vec = True + max_episode_steps = _env.max_path_length + + # TODO remove this as soon as there is support for the new API + _env = gym.wrappers.EnvCompatibility(_env) + gym_id = uuid.uuid4().hex + '-v1' register( id=gym_id, entry_point=lambda: _env, - max_episode_steps=_env.max_path_length, + max_episode_steps=max_episode_steps, ) # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index b2cbc78..192138d 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -1,45 +1,11 @@ -""" -Adapted from: https://github.com/openai/gym/blob/907b1b20dd9ac0cba5803225059b9c6673702467/gym/wrappers/time_aware_observation.py -License: MIT -Copyright (c) 2016 OpenAI (https://openai.com) - -Wrapper for adding time aware observations to environment observation. -""" -import gym +import gymnasium as gym import numpy as np -from gym.spaces import Box -class TimeAwareObservation(gym.ObservationWrapper): - """Augment the observation with the current time step in the episode. - - The observation space of the wrapped environment is assumed to be a flat :class:`Box`. - In particular, pixel observations are not supported. This wrapper will append the current timestep - within the current episode to the observation. - - Example: - >>> import gym - >>> env = gym.make('CartPole-v1') - >>> env = TimeAwareObservation(env) - >>> env.reset() - array([ 0.03810719, 0.03522411, 0.02231044, -0.01088205, 0. ]) - >>> env.step(env.action_space.sample())[0] - array([ 0.03881167, -0.16021058, 0.0220928 , 0.28875574, 1. ]) - """ +class TimeAwareObservation(gym.wrappers.TimeAwareObservation): def __init__(self, env: gym.Env): - """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box` - observation space. - - Args: - env: The environment to apply the wrapper - """ super().__init__(env) - assert isinstance(env.observation_space, Box) - low = np.append(self.observation_space.low, 0.0) - high = np.append(self.observation_space.high, 1.0) - self.observation_space = Box(low, high, dtype=self.observation_space.dtype) - self.t = 0 self._max_episode_steps = env.spec.max_episode_steps def observation(self, observation): @@ -52,27 +18,3 @@ class TimeAwareObservation(gym.ObservationWrapper): The observation with the time step appended to """ return np.append(observation, self.t / self._max_episode_steps) - - def step(self, action): - """Steps through the environment, incrementing the time step. - - Args: - action: The action to take - - Returns: - The environment's step using the action. - """ - self.t += 1 - return super().step(action) - - def reset(self, **kwargs): - """Reset the environment setting the time to zero. - - Args: - **kwargs: Kwargs to apply to env.reset() - - Returns: - The reset environment - """ - self.t = 0 - return super().reset(**kwargs) From 9ebc021ae0529b7481ff9e9bc1cbc368a587ba18 Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 12 Jan 2023 17:23:56 +0100 Subject: [PATCH 004/198] updated dm_control envs to use shimmy --- fancy_gym/dmc/__init__.py | 36 ++++++++++--------- fancy_gym/dmc/dmc_wrapper.py | 22 ++++++------ .../dmc/manipulation/reach_site/mp_wrapper.py | 2 +- fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py | 2 +- fancy_gym/dmc/suite/cartpole/mp_wrapper.py | 2 +- fancy_gym/dmc/suite/reacher/mp_wrapper.py | 2 +- 6 files changed, 34 insertions(+), 32 deletions(-) diff --git a/fancy_gym/dmc/__init__.py b/fancy_gym/dmc/__init__.py index 22ae47f..29bd354 100644 --- a/fancy_gym/dmc/__init__.py +++ b/fancy_gym/dmc/__init__.py @@ -1,14 +1,16 @@ from copy import deepcopy +from gymnasium.wrappers import FlattenObservation + from . import manipulation, suite ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} -from gym.envs.registration import register +from gymnasium.envs.registration import register DEFAULT_BB_DICT_ProMP = { "name": 'EnvName', - "wrappers": [], + "wrappers": [FlattenObservation], "trajectory_generator_kwargs": { 'trajectory_generator_type': 'promp' }, @@ -29,7 +31,7 @@ DEFAULT_BB_DICT_ProMP = { DEFAULT_BB_DICT_DMP = { "name": 'EnvName', - "wrappers": [], + "wrappers": [FlattenObservation], "trajectory_generator_kwargs": { 'trajectory_generator_type': 'dmp' }, @@ -49,7 +51,7 @@ DEFAULT_BB_DICT_DMP = { # DeepMind Control Suite (DMC) kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch" +kwargs_dict_bic_dmp['name'] = f"dm_control/ball_in_cup-catch-v0" kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper) # bandwidth_factor=2 kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2 @@ -62,7 +64,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch" +kwargs_dict_bic_promp['name'] = f"dm_control/ball_in_cup-catch-v0" kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper) register( id=f'dmc_ball_in_cup-catch_promp-v0', @@ -72,7 +74,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy" +kwargs_dict_reacher_easy_dmp['name'] = f"dm_control/reacher-easy-v0" kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper) # bandwidth_factor=2 kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2 @@ -86,7 +88,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy" +kwargs_dict_reacher_easy_promp['name'] = f"dm_control/reacher-easy-v0" kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 register( @@ -97,7 +99,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard" +kwargs_dict_reacher_hard_dmp['name'] = f"dm_control/reacher-hard-v0" kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper) # bandwidth_factor = 2 kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2 @@ -111,7 +113,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard" +kwargs_dict_reacher_hard_promp['name'] = f"dm_control/reacher-hard-v0" kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 register( @@ -126,7 +128,7 @@ _dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] for _task in _dmc_cartpole_tasks: _env_id = f'dmc_cartpole-{_task}_dmp-v0' kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}" + kwargs_dict_cartpole_dmp['name'] = f"dm_control/cartpole-{_task}-v0" kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper) # bandwidth_factor = 2 kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2 @@ -143,7 +145,7 @@ for _task in _dmc_cartpole_tasks: _env_id = f'dmc_cartpole-{_task}_promp-v0' kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_cartpole_promp['name'] = f"dmc:cartpole-{_task}" + kwargs_dict_cartpole_promp['name'] = f"dm_control/cartpole-{_task}-v0" kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper) kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10 @@ -156,7 +158,7 @@ for _task in _dmc_cartpole_tasks: ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles" +kwargs_dict_cartpole2poles_dmp['name'] = f"dm_control/cartpole-two_poles-v0" kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) # bandwidth_factor = 2 kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 @@ -173,7 +175,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole2poles_promp['name'] = f"dmc:cartpole-two_poles" +kwargs_dict_cartpole2poles_promp['name'] = f"dm_control/cartpole-two_poles-v0" kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10 @@ -187,7 +189,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles" +kwargs_dict_cartpole3poles_dmp['name'] = f"dm_control/cartpole-three_poles-v0" kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) # bandwidth_factor = 2 kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 @@ -204,7 +206,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole3poles_promp['name'] = f"dmc:cartpole-three_poles" +kwargs_dict_cartpole3poles_promp['name'] = f"dm_control/cartpole-three_poles-v0" kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10 @@ -219,7 +221,7 @@ ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # DeepMind Manipulation kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_mani_reach_site_features_dmp['name'] = f"dmc:manipulation-reach_site_features" +kwargs_dict_mani_reach_site_features_dmp['name'] = f"dm_control/reach_site_features-v0" kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper) kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2 # TODO: weight scale 50, but goal scale 0.1 @@ -233,7 +235,7 @@ register( ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_mani_reach_site_features_promp['name'] = f"dmc:manipulation-reach_site_features" +kwargs_dict_mani_reach_site_features_promp['name'] = f"dm_control/reach_site_features-v0" kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper) kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity' diff --git a/fancy_gym/dmc/dmc_wrapper.py b/fancy_gym/dmc/dmc_wrapper.py index b1522c3..d1e5f0d 100644 --- a/fancy_gym/dmc/dmc_wrapper.py +++ b/fancy_gym/dmc/dmc_wrapper.py @@ -3,15 +3,15 @@ # Copyright (c) 2020 Denis Yarats import collections from collections.abc import MutableMapping -from typing import Any, Dict, Tuple, Optional, Union, Callable +from typing import Any, Dict, Tuple, Optional, Union, Callable, SupportsFloat -import gym +import gymnasium as gym import numpy as np from dm_control import composer from dm_control.rl import control from dm_env import specs -from gym import spaces -from gym.core import ObsType +from gymnasium import spaces +from gymnasium.core import ObsType, ActType def _spec_to_box(spec): @@ -100,23 +100,23 @@ class DMCWrapper(gym.Env): self._action_space.seed(seed) self._observation_space.seed(seed) - def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]: + def step(self, action: ActType) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict[str, Any]]: assert self._action_space.contains(action) extra = {'internal_state': self._env.physics.get_state().copy()} - time_step = self._env.step(action) reward = time_step.reward or 0. - done = time_step.last() + terminated = False + truncated = time_step.last() and time_step.discount > 0 obs = self._get_obs(time_step) extra['discount'] = time_step.discount - return obs, reward, done, extra + return obs, reward, terminated, truncated, extra - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: + def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ + -> Tuple[ObsType, Dict[str, Any]]: time_step = self._env.reset() obs = self._get_obs(time_step) - return obs + return obs, {} def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False, segmentation=False, scene_option=None, render_flag_overrides=None): diff --git a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py index f64ac4a..908cee1 100644 --- a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py +++ b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py @@ -35,4 +35,4 @@ class MPWrapper(RawInterfaceWrapper): @property def dt(self) -> Union[float, int]: - return self.env.dt + return self.env.control_timestep() diff --git a/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py b/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py index dc6a539..94f9041 100644 --- a/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py +++ b/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py @@ -31,4 +31,4 @@ class MPWrapper(RawInterfaceWrapper): @property def dt(self) -> Union[float, int]: - return self.env.dt + return self.env.control_timestep() diff --git a/fancy_gym/dmc/suite/cartpole/mp_wrapper.py b/fancy_gym/dmc/suite/cartpole/mp_wrapper.py index 7edd51f..85afa83 100644 --- a/fancy_gym/dmc/suite/cartpole/mp_wrapper.py +++ b/fancy_gym/dmc/suite/cartpole/mp_wrapper.py @@ -35,7 +35,7 @@ class MPWrapper(RawInterfaceWrapper): @property def dt(self) -> Union[float, int]: - return self.env.dt + return self.env.control_timestep() class TwoPolesMPWrapper(MPWrapper): diff --git a/fancy_gym/dmc/suite/reacher/mp_wrapper.py b/fancy_gym/dmc/suite/reacher/mp_wrapper.py index 5ac52e5..2d0aee5 100644 --- a/fancy_gym/dmc/suite/reacher/mp_wrapper.py +++ b/fancy_gym/dmc/suite/reacher/mp_wrapper.py @@ -30,4 +30,4 @@ class MPWrapper(RawInterfaceWrapper): @property def dt(self) -> Union[float, int]: - return self.env.dt + return self.env.control_timestep() From ec2063aa0b026b2e5792ccf80e0c9400c2deb7c0 Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 12 Jan 2023 17:36:33 +0100 Subject: [PATCH 005/198] updated tests for dm_control --- test/test_dmc_envs.py | 14 ++++++++------ test/utils.py | 8 ++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py index 71b27a3..53119af 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc_envs.py @@ -11,21 +11,23 @@ from test.utils import run_env, run_env_determinism # SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] # MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] DM_CONTROL_IDS = [spec.id for spec in gym.envs.registry.values() if - not isinstance(spec.entry_point, Callable) and spec.entry_point.startswith('dm_control/')] -DMC_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) + spec.id.startswith('dm_control/') + and 'compatibility-env-v0' not in spec.id + and 'lqr-lqr' not in spec.id] +DM_control_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) SEED = 1 @pytest.mark.parametrize('env_id', DM_CONTROL_IDS) def test_step_dm_control_functionality(env_id: str): """Tests that suite step environments run without errors using random actions.""" - run_env(env_id) + run_env(env_id, 1000) @pytest.mark.parametrize('env_id', DM_CONTROL_IDS) def test_step_dm_control_determinism(env_id: str): """Tests that for step environments identical seeds produce identical trajectories.""" - run_env_determinism(env_id, SEED) + run_env_determinism(env_id, SEED, 1000) # @pytest.mark.parametrize('env_id', MANIPULATION_IDS) @@ -40,13 +42,13 @@ def test_step_dm_control_determinism(env_id: str): # run_env_determinism(env_id, SEED) -@pytest.mark.parametrize('env_id', DMC_MP_IDS) +@pytest.mark.parametrize('env_id', DM_control_MP_IDS) def test_bb_dmc_functionality(env_id: str): """Tests that black box environments run without errors using random actions.""" run_env(env_id) -@pytest.mark.parametrize('env_id', DMC_MP_IDS) +@pytest.mark.parametrize('env_id', DM_control_MP_IDS) def test_bb_dmc_determinism(env_id: str): """Tests that for black box environment identical seeds produce identical trajectories.""" run_env_determinism(env_id, SEED) diff --git a/test/utils.py b/test/utils.py index a57e58e..56f739f 100644 --- a/test/utils.py +++ b/test/utils.py @@ -53,16 +53,16 @@ def run_env(env_id, iterations=None, seed=0, render=False): if terminated or truncated: break - assert terminated or truncated, "Termination or truncation flag is not True after end of episode." + assert terminated or truncated, f"Termination or truncation flag is not True after {i + 1} iterations." observations.append(obs) env.close() del env return np.array(observations), np.array(rewards), np.array(terminations), np.array(truncations), np.array(actions) -def run_env_determinism(env_id: str, seed: int): - traj1 = run_env(env_id, seed=seed) - traj2 = run_env(env_id, seed=seed) +def run_env_determinism(env_id: str, seed: int, iterations: int = None): + traj1 = run_env(env_id, iterations=iterations, seed=seed) + traj2 = run_env(env_id, iterations=iterations, seed=seed) # Iterate over two trajectories, which should have the same state and action sequence for i, time_step in enumerate(zip(*traj1, *traj2)): obs1, rwd1, term1, trunc1, ac1, obs2, rwd2, term2, trunc2, ac2 = time_step From c53924d9fc6a1628342b25b5bf951f5ef8a99cf2 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 17 Jan 2023 08:27:29 +0100 Subject: [PATCH 006/198] updated to new API, so tests still failing --- .../classic_control/base_reacher/base_reacher.py | 6 +----- .../classic_control/hole_reacher/hole_reacher.py | 10 ++++++---- fancy_gym/utils/env_compatibility.py | 11 +++++++++++ fancy_gym/utils/make_env_helpers.py | 10 ++++++---- test/test_dmc_envs.py | 6 +++--- test/test_fancy_envs.py | 4 ++-- test/test_gym_envs.py | 10 ++++++++-- test/test_metaworld_envs.py | 3 +-- test/utils.py | 16 +++++++++++----- 9 files changed, 49 insertions(+), 27 deletions(-) create mode 100644 fancy_gym/utils/env_compatibility.py diff --git a/fancy_gym/envs/classic_control/base_reacher/base_reacher.py b/fancy_gym/envs/classic_control/base_reacher/base_reacher.py index f0e0a3e..18305fd 100644 --- a/fancy_gym/envs/classic_control/base_reacher/base_reacher.py +++ b/fancy_gym/envs/classic_control/base_reacher/base_reacher.py @@ -55,7 +55,6 @@ class BaseReacherEnv(gym.Env): self.fig = None self._steps = 0 - self.seed() @property def dt(self) -> Union[float, int]: @@ -72,6 +71,7 @@ class BaseReacherEnv(gym.Env): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: # Sample only orientation of first link, i.e. the arm is always straight. + super(BaseReacherEnv, self).reset(seed=seed, options=options) try: random_start = options.get('random_start', self.random_start) except AttributeError: @@ -128,10 +128,6 @@ class BaseReacherEnv(gym.Env): def _terminate(self, info) -> bool: raise NotImplementedError - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - def close(self): super(BaseReacherEnv, self).close() del self.fig diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py index c3e5020..0ed03f2 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py +++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py @@ -57,11 +57,16 @@ class HoleReacherEnv(BaseReacherDirectEnv): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: + + # initialize seed here as the random goal needs to be generated before the super reset() + gym.Env.reset(self, seed=seed, options=options) + self._generate_hole() self._set_patches() self.reward_function.reset() - return super().reset() + # do not provide seed to avoid setting it twice + return super(HoleReacherEnv, self).reset(options=options) def _get_reward(self, action: np.ndarray) -> (float, dict): return self.reward_function.get_reward(self) @@ -224,6 +229,3 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.fig.gca().add_patch(left_block) self.fig.gca().add_patch(right_block) self.fig.gca().add_patch(hole_floor) - - - diff --git a/fancy_gym/utils/env_compatibility.py b/fancy_gym/utils/env_compatibility.py new file mode 100644 index 0000000..a278451 --- /dev/null +++ b/fancy_gym/utils/env_compatibility.py @@ -0,0 +1,11 @@ +import gymnasium as gym + + +class EnvCompatibility(gym.wrappers.EnvCompatibility): + def __getattr__(self, item): + """Propagate only non-existent properties to wrapped env.""" + if item.startswith('_'): + raise AttributeError("attempted to get missing private attribute '{}'".format(item)) + if item in self.__dict__: + return getattr(self, item) + return getattr(self.env, item) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 50aa38f..eb7b49c 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -3,12 +3,14 @@ import uuid from collections.abc import MutableMapping from copy import deepcopy from math import ceil -from typing import Iterable, Type, Union +from typing import Iterable, Type, Union, Optional import gymnasium as gym import numpy as np from gymnasium.envs.registration import register, registry +from fancy_gym.utils.env_compatibility import EnvCompatibility + try: from dm_control import suite, manipulation from shimmy.dm_control_compatibility import EnvType @@ -186,9 +188,9 @@ def make_bb( def get_env_duration(env: gym.Env): try: - # TODO Remove if this is in the compatibility class duration = env.spec.max_episode_steps * env.dt except (AttributeError, TypeError) as e: + # TODO Remove if this information is in the compatibility class logging.error(f'Attributes env.spec.max_episode_steps and env.dt are not available. ' f'Assuming you are using dm_control. Please make sure you have ran ' f'"pip install shimmy[dm_control]" for that.') @@ -300,7 +302,7 @@ def make_bb_env_helper(**kwargs): # return env -def make_metaworld(env_id: str, seed: int, **kwargs): +def make_metaworld(env_id: str, seed: int, render_mode: Optional[str] = None, **kwargs): if env_id not in metaworld.ML1.ENV_NAMES: raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.') @@ -314,7 +316,7 @@ def make_metaworld(env_id: str, seed: int, **kwargs): max_episode_steps = _env.max_path_length # TODO remove this as soon as there is support for the new API - _env = gym.wrappers.EnvCompatibility(_env) + _env = EnvCompatibility(_env, render_mode) gym_id = uuid.uuid4().hex + '-v1' diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py index 53119af..266a12f 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc_envs.py @@ -14,20 +14,20 @@ DM_CONTROL_IDS = [spec.id for spec in gym.envs.registry.values() if spec.id.startswith('dm_control/') and 'compatibility-env-v0' not in spec.id and 'lqr-lqr' not in spec.id] -DM_control_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) +DM_control_MP_IDS = list(chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) SEED = 1 @pytest.mark.parametrize('env_id', DM_CONTROL_IDS) def test_step_dm_control_functionality(env_id: str): """Tests that suite step environments run without errors using random actions.""" - run_env(env_id, 1000) + run_env(env_id, 5000, wrappers=[gym.wrappers.FlattenObservation]) @pytest.mark.parametrize('env_id', DM_CONTROL_IDS) def test_step_dm_control_determinism(env_id: str): """Tests that for step environments identical seeds produce identical trajectories.""" - run_env_determinism(env_id, SEED, 1000) + run_env_determinism(env_id, SEED, 5000, wrappers=[gym.wrappers.FlattenObservation]) # @pytest.mark.parametrize('env_id', MANIPULATION_IDS) diff --git a/test/test_fancy_envs.py b/test/test_fancy_envs.py index 02208ce..898cc08 100644 --- a/test/test_fancy_envs.py +++ b/test/test_fancy_envs.py @@ -1,4 +1,4 @@ -import itertools +from itertools import chain from typing import Callable import fancy_gym @@ -10,7 +10,7 @@ from test.utils import run_env, run_env_determinism CUSTOM_IDS = [id for id, spec in gym.envs.registry.items() if not isinstance(spec.entry_point, Callable) and "fancy_gym" in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] -CUSTOM_MP_IDS = itertools.chain(*fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) +CUSTOM_MP_IDS = list(chain(*fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) SEED = 1 diff --git a/test/test_gym_envs.py b/test/test_gym_envs.py index 20b089d..76b5c85 100644 --- a/test/test_gym_envs.py +++ b/test/test_gym_envs.py @@ -1,4 +1,6 @@ +import re from itertools import chain +from typing import Callable import gymnasium as gym import pytest @@ -7,8 +9,12 @@ import fancy_gym from test.utils import run_env, run_env_determinism GYM_IDS = [spec.id for spec in gym.envs.registry.values() if - "fancy_gym" not in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] -GYM_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) + not isinstance(spec.entry_point, Callable) and + "fancy_gym" not in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point + and 'jax' not in spec.id.lower() + and not re.match(r'GymV2.Environment', spec.id) + ] +GYM_MP_IDS = list(chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) SEED = 1 diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py index 768958d..55de621 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_metaworld_envs.py @@ -8,8 +8,7 @@ from test.utils import run_env, run_env_determinism METAWORLD_IDS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] -METAWORLD_MP_IDS = chain(*fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) -print(METAWORLD_MP_IDS) +METAWORLD_MP_IDS = list(chain(*fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) SEED = 1 diff --git a/test/utils.py b/test/utils.py index 56f739f..51f0c37 100644 --- a/test/utils.py +++ b/test/utils.py @@ -1,9 +1,12 @@ +from typing import List, Type + import gymnasium as gym import numpy as np from fancy_gym import make -def run_env(env_id, iterations=None, seed=0, render=False): +def run_env(env_id: str, iterations: int = None, seed: int = 0, wrappers: List[Type[gym.Wrapper]] = [], + render: bool = False): """ Example for running a DMC based env in the step based setting. The env_id has to be specified as `dmc:domain_name-task_name` or @@ -13,12 +16,15 @@ def run_env(env_id, iterations=None, seed=0, render=False): env_id: Either `dmc:domain_name-task_name` or `dmc:manipulation-environment_name` iterations: Number of rollout steps to run seed: random seeding + wrappers: List of Wrappers to apply to the environment render: Render the episode Returns: observations, rewards, terminations, truncations, actions """ env: gym.Env = make(env_id, seed=seed) + for w in wrappers: + env = w(env) rewards = [] observations = [] actions = [] @@ -60,13 +66,13 @@ def run_env(env_id, iterations=None, seed=0, render=False): return np.array(observations), np.array(rewards), np.array(terminations), np.array(truncations), np.array(actions) -def run_env_determinism(env_id: str, seed: int, iterations: int = None): - traj1 = run_env(env_id, iterations=iterations, seed=seed) - traj2 = run_env(env_id, iterations=iterations, seed=seed) +def run_env_determinism(env_id: str, seed: int, iterations: int = None, wrappers: List[Type[gym.Wrapper]] = []): + traj1 = run_env(env_id, iterations=iterations, seed=seed, wrappers=wrappers) + traj2 = run_env(env_id, iterations=iterations, seed=seed, wrappers=wrappers) # Iterate over two trajectories, which should have the same state and action sequence for i, time_step in enumerate(zip(*traj1, *traj2)): obs1, rwd1, term1, trunc1, ac1, obs2, rwd2, term2, trunc2, ac2 = time_step - assert np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match." + assert np.allclose(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match." assert np.array_equal(ac1, ac2), f"Actions [{i}] {ac1} and {ac2} do not match." assert np.array_equal(rwd1, rwd2), f"Rewards [{i}] {rwd1} and {rwd2} do not match." assert np.array_equal(term1, term2), f"Terminateds [{i}] {term1} and {term2} do not match." From b69523931853aaaba7c05cd34c81bf460292898e Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 15 May 2023 16:32:45 +0200 Subject: [PATCH 007/198] num_basis is not implemented; tests are expected to fail --- test/test_black_box.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 5ade1ae..1b9e8e2 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -158,7 +158,9 @@ def test_context_space(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapp @pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('num_dof', [0, 1, 2, 5]) -@pytest.mark.parametrize('num_basis', [0, 1, 2, 5]) +@pytest.mark.parametrize('num_basis', [ + pytest.param(0, marks=pytest.mark.xfail(reason="Basis Length 0 is not yet implemented.")), + 1, 2, 5]) @pytest.mark.parametrize('learn_tau', [True, False]) @pytest.mark.parametrize('learn_delay', [True, False]) def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: bool, learn_delay: bool): From 9de1257e1c323e47b118728e122dddfdded097ce Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 15 May 2023 16:55:53 +0200 Subject: [PATCH 008/198] Updating gym(nasium) and metaworld; sticter versions --- setup.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 5993519..ea6331b 100644 --- a/setup.py +++ b/setup.py @@ -7,9 +7,11 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { "dmc": ["dm_control>=1.0.1"], - "metaworld": ["metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld", + "metaworld": ["metaworld @ git+https://github.com/rlworkgroup/metaworld.git@3ced29c8cee6445386eba32e92870d664ad5e6e3#egg=metaworld", 'mujoco-py<2.2,>=2.1', - 'scipy' + 'gym>=0.15.4', + 'numpy>=1.18', + 'scipy>=1.4.1', ], } @@ -30,7 +32,7 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]: setup( author='Fabian Otto, Onur Celik', name='fancy_gym', - version='0.2', + version='0.3', classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Science/Research', @@ -46,7 +48,7 @@ setup( ], extras_require=extras, install_requires=[ - 'gym[mujoco]<0.25.0,>=0.24.1', + 'gymnasium>=0.26.0' 'mp_pytorch<=0.1.3' ], packages=[package for package in find_packages() if package.startswith("fancy_gym")], From 2cbfff7919049502fe4224c8cdaa2a3752c1f55b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 15 May 2023 17:09:52 +0200 Subject: [PATCH 009/198] Added box2d as a dependency --- setup.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index ea6331b..52aba06 100644 --- a/setup.py +++ b/setup.py @@ -6,25 +6,25 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { - "dmc": ["dm_control>=1.0.1"], - "metaworld": ["metaworld @ git+https://github.com/rlworkgroup/metaworld.git@3ced29c8cee6445386eba32e92870d664ad5e6e3#egg=metaworld", + 'dmc': ['dm_control>=1.0.1'], + 'metaworld': ['metaworld @ git+https://github.com/rlworkgroup/metaworld.git@3ced29c8cee6445386eba32e92870d664ad5e6e3#egg=metaworld', 'mujoco-py<2.2,>=2.1', - 'gym>=0.15.4', - 'numpy>=1.18', - 'scipy>=1.4.1', ], + 'box2d': ['gymnasium[box2d]>=0.26.0'], } # All dependencies all_groups = set(extras.keys()) -extras["all"] = list(set(itertools.chain.from_iterable(map(lambda group: extras[group], all_groups)))) +extras["all"] = list(set(itertools.chain.from_iterable( + map(lambda group: extras[group], all_groups)))) def find_package_data(extensions_to_include: List[str]) -> List[str]: envs_dir = Path("fancy_gym/envs/mujoco") package_data_paths = [] for extension in extensions_to_include: - package_data_paths.extend([str(path)[10:] for path in envs_dir.rglob(extension)]) + package_data_paths.extend([str(path)[10:] + for path in envs_dir.rglob(extension)]) return package_data_paths @@ -51,7 +51,8 @@ setup( 'gymnasium>=0.26.0' 'mp_pytorch<=0.1.3' ], - packages=[package for package in find_packages() if package.startswith("fancy_gym")], + packages=[package for package in find_packages( + ) if package.startswith("fancy_gym")], package_data={ "fancy_gym": find_package_data(extensions_to_include=["*.stl", "*.xml"]) }, From 1c092d01bde3b2888766f2d034215080df6d95e5 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 15 May 2023 17:11:14 +0200 Subject: [PATCH 010/198] Optional Dependency: PyTest --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 52aba06..f40c7c3 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ extras = { 'mujoco-py<2.2,>=2.1', ], 'box2d': ['gymnasium[box2d]>=0.26.0'], + 'testing': ['pytest'], } # All dependencies From 826f22b18e605e0354a97f966dda0c7fceab2292 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 15 May 2023 17:11:57 +0200 Subject: [PATCH 011/198] Update URL for Metaworld (rlworkgroup -> Farama) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f40c7c3..5720ca1 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { 'dmc': ['dm_control>=1.0.1'], - 'metaworld': ['metaworld @ git+https://github.com/rlworkgroup/metaworld.git@3ced29c8cee6445386eba32e92870d664ad5e6e3#egg=metaworld', + 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@3ced29c8cee6445386eba32e92870d664ad5e6e3#egg=metaworld', 'mujoco-py<2.2,>=2.1', ], 'box2d': ['gymnasium[box2d]>=0.26.0'], From a559f92562b2461b565a3be5f62bb1d7dd6ffefc Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 18 May 2023 17:31:40 +0200 Subject: [PATCH 012/198] Adapted test to new gym interface In prevous gym versions executing a step returned obs, reward, done, info = env.step(...) With the switch to gymnasium this has changed to obs, reward, terminated, truncated, info = env.step(...) We also made the code a bit more self explainatory. --- test/test_black_box.py | 25 ++++--- test/test_metaworld_envs.py | 4 -- test/test_replanning_sequencing.py | 109 ++++++++++++++++------------- 3 files changed, 78 insertions(+), 60 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 1b9e8e2..7d33a30 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -94,11 +94,13 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] {'phase_generator_type': 'exp'}, {'basis_generator_type': basis_generator_type}) env.reset() - info_keys = list(env.step(env.action_space.sample())[3].keys()) + _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) + info_keys = list(info.keys()) env_step = fancy_gym.make(env_id, SEED) env_step.reset() - info_keys_step = env_step.step(env_step.action_space.sample())[3].keys() + _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) + info_keys_step = info.keys() assert all(e in info_keys for e in info_keys_step) assert 'trajectory_length' in info_keys @@ -122,7 +124,8 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): for _ in range(5): env.reset() - length = env.step(env.action_space.sample())[3]['trajectory_length'] + _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) + length = info['trajectory_length'] assert length == env.spec.max_episode_steps @@ -138,7 +141,8 @@ def test_aggregation(mp_type: str, reward_aggregation: Callable[[np.ndarray], fl {'basis_generator_type': basis_generator_type}) env.reset() # ToyEnv only returns 1 as reward - assert env.step(env.action_space.sample())[1] == reward_aggregation(np.ones(50, )) + _obs, reward, _terminated, _truncated, _info = env.step(env.action_space.sample()) + assert reward == reward_aggregation(np.ones(50, )) @pytest.mark.parametrize('mp_type', ['promp', 'dmp']) @@ -250,6 +254,8 @@ def test_learn_tau(mp_type: str, tau: float): assert np.all(vel[:tau_time_steps - 2] != vel[-1]) # # + + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75]) def test_learn_delay(mp_type: str, delay: float): @@ -292,6 +298,8 @@ def test_learn_delay(mp_type: str, delay: float): assert np.all(vel[max(1, delay_time_steps)] != vel[0]) # # + + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1]) @pytest.mark.parametrize('delay', [0.25, 0.5, 0.75, 1]) @@ -312,15 +320,16 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): if env.spec.max_episode_steps * env.dt < delay + tau: return - d = True + done = True for i in range(5): - if d: + if done: env.reset() action = env.action_space.sample() action[0] = tau action[1] = delay - obs, r, d, info = env.step(action) + _obs, _reward, terminated, truncated, info = env.step(action) + done = terminated or truncated length = info['trajectory_length'] assert length == env.spec.max_episode_steps @@ -345,4 +354,4 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): active_pos = pos[delay_time_steps: joint_time_steps - 1] active_vel = vel[delay_time_steps: joint_time_steps - 2] assert np.all(active_pos != pos[-1]) and np.all(active_pos != pos[0]) - assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) \ No newline at end of file + assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py index 77d0c35..55de621 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_metaworld_envs.py @@ -8,11 +8,7 @@ from test.utils import run_env, run_env_determinism METAWORLD_IDS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] -<<<<<<< HEAD -METAWORLD_MP_IDS = chain(*fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) -======= METAWORLD_MP_IDS = list(chain(*fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) ->>>>>>> 47-update-to-new-gym-api SEED = 1 diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 9d04d02..b76d6a9 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -79,13 +79,14 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter # This also verifies we are not adding the TimeAwareObservationWrapper twice assert env.observation_space == env_step.observation_space - d = True + done = True for i in range(25): - if d: + if done: env.reset() action = env.action_space.sample() - obs, r, d, info = env.step(action) + _obs, _reward, terminated, truncated, info = env.step(action) + done = terminated or truncated length = info['trajectory_length'] @@ -112,7 +113,7 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra if add_time_aware_wrapper_before: wrappers += [TimeAwareObservation] - replanning_schedule = lambda c_pos, c_vel, obs, c_action, t: t % replanning_time == 0 + def replanning_schedule(c_pos, c_vel, obs, c_action, t): return t % replanning_time == 0 basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' phase_generator_type = 'exp' if 'dmp' in mp_type else 'linear' @@ -134,18 +135,20 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra # Make 3 episodes, total steps depend on the replanning steps for i in range(3 * episode_steps): action = env.action_space.sample() - obs, r, d, info = env.step(action) + _obs, _reward, terminated, truncated, info = env.step(action) + done = terminated or truncated length = info['trajectory_length'] - if d: + if done: # Check if number of steps until termination match the replanning interval - print(d, (i + 1), episode_steps) + print(done, (i + 1), episode_steps) assert (i + 1) % episode_steps == 0 env.reset() assert replanning_schedule(None, None, None, None, length) + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) @pytest.mark.parametrize('sub_segment_steps', [5, 10]) @@ -167,13 +170,16 @@ def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_s }, seed=SEED) _ = env.reset() - d = False + done = False planning_times = 0 - while not d: - _, _, d, _ = env.step(env.action_space.sample()) + while not done: + action = env.action_space.sample() + _obs, _reward, terminated, truncated, _info = env.step(action) + done = terminated or truncated planning_times += 1 assert planning_times == max_planning_times + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) @pytest.mark.parametrize('sub_segment_steps', [5, 10]) @@ -196,15 +202,17 @@ def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_se }, seed=SEED) _ = env.reset() - d = False + done = False planning_times = 0 - while not d: + while not done: action = env.action_space.sample() action[0] = tau - _, _, d, info = env.step(action) + _obs, _reward, terminated, truncated, _info = env.step(action) + done = terminated or truncated planning_times += 1 assert planning_times == max_planning_times + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) @pytest.mark.parametrize('sub_segment_steps', [5, 10]) @@ -213,26 +221,27 @@ def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_ basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], - {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, - 'max_planning_times': max_planning_times, - 'verbose': 2}, - {'trajectory_generator_type': mp_type, - }, - {'controller_type': 'motor'}, - {'phase_generator_type': phase_generator_type, - 'learn_tau': False, - 'learn_delay': True - }, - {'basis_generator_type': basis_generator_type, - }, - seed=SEED) + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) _ = env.reset() - d = False + done = False planning_times = 0 - while not d: + while not done: action = env.action_space.sample() action[0] = delay - _, _, d, info = env.step(action) + _obs, _reward, terminated, truncated, info = env.step(action) + done = terminated or truncated delay_time_steps = int(np.round(delay / env.dt)) pos = info['positions'].flatten() @@ -256,6 +265,7 @@ def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_ assert planning_times == max_planning_times + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('max_planning_times', [1, 2, 3]) @pytest.mark.parametrize('sub_segment_steps', [5, 10, 15]) @@ -266,27 +276,28 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], - {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, - 'max_planning_times': max_planning_times, - 'verbose': 2}, - {'trajectory_generator_type': mp_type, - }, - {'controller_type': 'motor'}, - {'phase_generator_type': phase_generator_type, - 'learn_tau': True, - 'learn_delay': True - }, - {'basis_generator_type': basis_generator_type, - }, - seed=SEED) + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': True, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) _ = env.reset() - d = False + done = False planning_times = 0 - while not d: + while not done: action = env.action_space.sample() action[0] = tau action[1] = delay - _, _, d, info = env.step(action) + _obs, _reward, terminated, truncated, info = env.step(action) + done = terminated or truncated delay_time_steps = int(np.round(delay / env.dt)) @@ -306,6 +317,7 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i assert planning_times == max_planning_times + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) @pytest.mark.parametrize('sub_segment_steps', [5, 10]) @@ -327,7 +339,8 @@ def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_ }, seed=SEED) _ = env.reset() - d = False for i in range(max_planning_times): - _, _, d, _ = env.step(env.action_space.sample()) - assert d + action = env.action_space.sample() + _obs, _reward, terminated, truncated, _info = env.step(action) + done = terminated or truncated + assert done From 1e62da0833af963f188dc86e9a10fcb61528ec36 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 18 May 2023 18:03:42 +0200 Subject: [PATCH 013/198] Use shimmy as a binding for dmc, also 'testing' seperate from 'all' pip install .[all] will no longer install components only required for testing pip install .[testing] will also install all compionents required to run all tests --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 7afd526..c477a8a 100644 --- a/setup.py +++ b/setup.py @@ -6,13 +6,12 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { - 'dmc': ['dm_control>=1.0.1'], + 'dmc': ['dm_control>=1.0.1', 'shimmy[dm-control]'], 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@3ced29c8cee6445386eba32e92870d664ad5e6e3#egg=metaworld', 'mujoco-py<2.2,>=2.1', ], 'box2d': ['gymnasium[box2d]>=0.26.0'], - 'testing': ['pytest'], - "mujoco": ["gymnasium[mujoco]"], + 'mujoco': ['gymnasium[mujoco]>0.26.0'], } # All dependencies @@ -20,6 +19,8 @@ all_groups = set(extras.keys()) extras["all"] = list(set(itertools.chain.from_iterable( map(lambda group: extras[group], all_groups)))) +extras['testing'] = extras["all"] + ['pytest'] + def find_package_data(extensions_to_include: List[str]) -> List[str]: envs_dir = Path("fancy_gym/envs/mujoco") From d4a8306b9d13df859045f4ff2b2dccd54461f114 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 18 May 2023 19:07:19 +0200 Subject: [PATCH 014/198] Buf Fix: PyTests still used binding to old gym --- test/test_black_box.py | 6 +++--- test/test_replanning_sequencing.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 7d33a30..ef397f1 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -1,11 +1,11 @@ from itertools import chain from typing import Tuple, Type, Union, Optional, Callable -import gym +import gymnasium as gym import numpy as np import pytest -from gym import register -from gym.core import ActType, ObsType +from gymnasium import register +from gymnasium.core import ActType, ObsType import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index b76d6a9..1fd5a84 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -2,11 +2,11 @@ from itertools import chain from types import FunctionType from typing import Tuple, Type, Union, Optional -import gym +import gymnasium as gym import numpy as np import pytest -from gym import register -from gym.core import ActType, ObsType +from gymnasium import register +from gymnasium.core import ActType, ObsType import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper From 0dfd8e18e42cb757303dbb67292750e2fddbfa23 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 18 May 2023 19:08:11 +0200 Subject: [PATCH 015/198] Fixed Bug: env_id was broken, when prefix is handled by other library (e.g. 'dmc:...' by shimmy) --- fancy_gym/utils/make_env_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 709a4f7..ff76ca2 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -77,13 +77,13 @@ def make(env_id: str, seed: int, **kwargs): if ':' in env_id: split_id = env_id.split(':') - framework, env_id = split_id[-2:] + framework, framework_env_id = split_id[-2:] else: framework = None if framework == 'metaworld': # MetaWorld environment - env = make_metaworld(env_id, seed, **kwargs) + env = make_metaworld(framework_env_id, seed, **kwargs) # elif framework == 'dmc': # Deprecated: With shimmy gym now has native support for deepmind envs # # DeepMind Control environment From 21d5ebb442b46924125be6cac788c7ba53e04641 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 18 May 2023 19:13:35 +0200 Subject: [PATCH 016/198] Fixed Bug: ToyEnv did not follow new gym spec --- test/test_black_box.py | 6 ++++-- test/test_replanning_sequencing.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index ef397f1..c408079 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -32,10 +32,12 @@ class ToyEnv(gym.Env): def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: - return np.array([-1]) + obs, options = np.array([-1]), {} + return obs, options def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: - return np.array([-1]), 1, False, {} + obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} + return obs, reward, terminated, truncated, info def render(self, mode="human"): pass diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 1fd5a84..1a35f99 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -26,10 +26,12 @@ class ToyEnv(gym.Env): def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: - return np.array([-1]) + obs, options = np.array([-1]), {} + return obs, options def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: - return np.array([-1]), 1, False, {} + obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} + return obs, reward, terminated, truncated, info def render(self, mode="human"): pass From 07de6550250358ea3034bf4974272a3c354d9163 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 18 May 2023 19:50:19 +0200 Subject: [PATCH 017/198] Allow dmc envs to be accessed via dmc:... (for backwards compatibility) --- fancy_gym/utils/make_env_helpers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index ff76ca2..f5b5459 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -84,10 +84,11 @@ def make(env_id: str, seed: int, **kwargs): if framework == 'metaworld': # MetaWorld environment env = make_metaworld(framework_env_id, seed, **kwargs) - # elif framework == 'dmc': - # Deprecated: With shimmy gym now has native support for deepmind envs - # # DeepMind Control environment - # env = make_dmc(env_id, seed, **kwargs) + elif framework == 'dmc': + # DeepMind Control environment + # ensures legacy compatability: + # shimmy expects dm_controll/..., while we used dmc:... in the past + env = make_gym('dm_control/'+framework_env_id, seed, **kwargs) else: env = make_gym(env_id, seed, **kwargs) From bf3ed8a06c4164e93ba5f3ec70683fe51fecd5db Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 19 May 2023 13:59:54 +0200 Subject: [PATCH 018/198] Fix: TimeAwareObservation did not support float64 and tried to forbidden access private attribute. --- fancy_gym/utils/time_aware_observation.py | 59 ++++++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index 192138d..c6b16f1 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -1,20 +1,65 @@ +from gymnasium.spaces import Box import gymnasium as gym import numpy as np -class TimeAwareObservation(gym.wrappers.TimeAwareObservation): +class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): + """Augment the observation with the current time step in the episode. - def __init__(self, env: gym.Env): - super().__init__(env) - self._max_episode_steps = env.spec.max_episode_steps + The observation space of the wrapped environment is assumed to be a flat :class:`Box`. + In particular, pixel observations are not supported. This wrapper will append the current timestep within the current episode to the observation. + The timestep will be indicated as a number between 0 and 1. + """ + + def __init__(self, env: gym.Env, enforce_dtype_float32=False): + """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box` observation space. + + Args: + env: The environment to apply the wrapper + """ + gym.utils.RecordConstructorArgs.__init__(self) + gym.ObservationWrapper.__init__(self, env) + assert isinstance(env.observation_space, Box) + if enforce_dtype_float32: + assert env.observation_space.dtype == np.float32, + 'TimeAwareObservation was given an environment with a dtype!=np.float32 ('+str(env.observation_space.dtype)+'). This requirement can be removed by setting enforce_dtype_float32=False.' + dtype = env.observation_space.dtype + low = np.append(self.observation_space.low, 0.0) + high = np.append(self.observation_space.high, np.inf) + self.observation_space = Box(low, high, dtype=dtype) + self.is_vector_env = getattr(env, "is_vector_env", False) def observation(self, observation): - """Adds to the observation with the current time step normalized with max steps. + """Adds to the observation with the current time step. Args: observation: The observation to add the time step to Returns: - The observation with the time step appended to + The observation with the time step appended to (relative to total number of steps) """ - return np.append(observation, self.t / self._max_episode_steps) + return np.append(observation, self.t / getattr(self.env, '_max_episode_steps') + + def step(self, action): + """Steps through the environment, incrementing the time step. + + Args: + action: The action to take + + Returns: + The environment's step using the action. + """ + self.t += 1 + return super().step(action) + + def reset(self, **kwargs): + """Reset the environment setting the time to zero. + + Args: + **kwargs: Kwargs to apply to env.reset() + + Returns: + The reset environment + """ + self.t=0 + return super().reset(**kwargs) From e75ab89a376a3152c614eadd80d0e95b87a54f8a Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 19 May 2023 14:01:31 +0200 Subject: [PATCH 019/198] Ported box_pushing to new mujoco bindings --- fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 275bba1..2408404 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -13,6 +13,7 @@ MAX_EPISODE_STEPS_BOX_PUSHING = 100 BOX_POS_BOUND = np.array([[0.3, -0.45, -0.01], [0.6, 0.45, -0.01]]) + class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): """ franka box pushing environment @@ -41,8 +42,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): self._episode_energy = 0. MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"), - frame_skip=self.frame_skip, - mujoco_bindings="mujoco") + frame_skip=self.frame_skip) self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) def step(self, action): @@ -246,7 +246,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): old_err_norm = err_norm - ### get Jacobian by mujoco + # get Jacobian by mujoco self.data.qpos[:7] = q mujoco.mj_forward(self.model, self.data) @@ -280,6 +280,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): return q + class BoxPushingDense(BoxPushingEnvBase): def __init__(self, frame_skip: int = 10): super(BoxPushingDense, self).__init__(frame_skip=frame_skip) @@ -295,7 +296,7 @@ class BoxPushingDense(BoxPushingEnvBase): energy_cost = -0.0005 * np.sum(np.square(action)) reward = joint_penalty + tcp_box_dist_reward + \ - box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost + box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost rod_inclined_angle = rotation_distance(rod_quat, self._desired_rod_quat) if rod_inclined_angle > np.pi / 4: @@ -303,6 +304,7 @@ class BoxPushingDense(BoxPushingEnvBase): return reward + class BoxPushingTemporalSparse(BoxPushingEnvBase): def __init__(self, frame_skip: int = 10): super(BoxPushingTemporalSparse, self).__init__(frame_skip=frame_skip) @@ -331,6 +333,7 @@ class BoxPushingTemporalSparse(BoxPushingEnvBase): return reward + class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase): def __init__(self, frame_skip: int = 10): From e70a56a7e310a6ef546f4cdcd481caf716053dd0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 19 May 2023 14:04:19 +0200 Subject: [PATCH 020/198] Fixed: Typos --- fancy_gym/utils/time_aware_observation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index c6b16f1..c1aea7f 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -21,8 +21,8 @@ class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorAr gym.ObservationWrapper.__init__(self, env) assert isinstance(env.observation_space, Box) if enforce_dtype_float32: - assert env.observation_space.dtype == np.float32, - 'TimeAwareObservation was given an environment with a dtype!=np.float32 ('+str(env.observation_space.dtype)+'). This requirement can be removed by setting enforce_dtype_float32=False.' + assert env.observation_space.dtype == np.float32, 'TimeAwareObservation was given an environment with a dtype!=np.float32 ('+str( + env.observation_space.dtype)+'). This requirement can be removed by setting enforce_dtype_float32=False.' dtype = env.observation_space.dtype low = np.append(self.observation_space.low, 0.0) high = np.append(self.observation_space.high, np.inf) @@ -38,7 +38,7 @@ class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorAr Returns: The observation with the time step appended to (relative to total number of steps) """ - return np.append(observation, self.t / getattr(self.env, '_max_episode_steps') + return np.append(observation, self.t / getattr(self.env, '_max_episode_steps')) def step(self, action): """Steps through the environment, incrementing the time step. @@ -61,5 +61,5 @@ class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorAr Returns: The reset environment """ - self.t=0 + self.t = 0 return super().reset(**kwargs) From 3bbf101e86af565ff1d6724e7cba4daa01caf8fc Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 19 May 2023 14:53:04 +0200 Subject: [PATCH 021/198] Fixed: test_black_box.py still used old gym specs in two places --- test/test_black_box.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index c408079..74985ac 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -12,7 +12,7 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.utils.time_aware_observation import TimeAwareObservation SEED = 1 -ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch', 'metaworld:reach-v2', 'Reacher-v2'] +ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) @@ -229,14 +229,15 @@ def test_learn_tau(mp_type: str, tau: float): {'basis_generator_type': basis_generator_type, }, seed=SEED) - d = True + done = True for i in range(5): - if d: + if done: env.reset() action = env.action_space.sample() action[0] = tau - obs, r, d, info = env.step(action) + _obs, _reward, terminated, truncated, info = env.step(action) + done = terminated or truncated length = info['trajectory_length'] assert length == env.spec.max_episode_steps @@ -274,14 +275,15 @@ def test_learn_delay(mp_type: str, delay: float): {'basis_generator_type': basis_generator_type, }, seed=SEED) - d = True + done = True for i in range(5): - if d: + if done: env.reset() action = env.action_space.sample() action[0] = delay - obs, r, d, info = env.step(action) + _obs, _reward, terminated, truncated, info = env.step(env.action_space.sample()) + done = terminated or truncated length = info['trajectory_length'] assert length == env.spec.max_episode_steps From bc9d4cf19db8922c9bac48cce105797b6bf2be67 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 19 May 2023 14:54:13 +0200 Subject: [PATCH 022/198] More verbose errors for run_env_determinism --- test/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/utils.py b/test/utils.py index 0855f37..2402f98 100644 --- a/test/utils.py +++ b/test/utils.py @@ -76,7 +76,7 @@ def run_env_determinism(env_id: str, seed: int, iterations: int = None, wrappers for i, time_step in enumerate(zip(*traj1, *traj2)): obs1, rwd1, term1, trunc1, ac1, obs2, rwd2, term2, trunc2, ac2 = time_step assert np.allclose( - obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match." + obs1, obs2), f"Observations [{i}] {obs1} ({obs1.shape}) and {obs2} ({obs2.shape}) do not match." assert np.array_equal( ac1, ac2), f"Actions [{i}] {ac1} and {ac2} do not match." assert np.array_equal( From 1c002a235b2e9f2ac7f0797a970d480956ed2760 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 19 May 2023 15:17:28 +0200 Subject: [PATCH 023/198] Warning no longer applicable --- fancy_gym/utils/make_env_helpers.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index f5b5459..42096f3 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -200,10 +200,6 @@ def get_env_duration(env: gym.Env): try: duration = env.spec.max_episode_steps * env.dt except (AttributeError, TypeError) as e: - # TODO Remove if this information is in the compatibility class - logging.error(f'Attributes env.spec.max_episode_steps and env.dt are not available. ' - f'Assuming you are using dm_control. Please make sure you have ran ' - f'"pip install shimmy[dm_control]" for that.') if env.env_type is EnvType.COMPOSER: max_episode_steps = ceil(env.unwrapped._time_limit / env.dt) elif env.env_type is EnvType.RL_CONTROL: From dabfc7cafe3facb4e260b91c2f2df5ba3c7ac667 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 19 May 2023 15:18:14 +0200 Subject: [PATCH 024/198] Adapted Mujoco Envs to new gymnasium spec Gymnasium Mujoco Envs no longer allow overriding the used xml_file We therefore implement intermediate classes, that reimplement this feature. --- .../half_cheetah_jump/half_cheetah_jump.py | 60 ++++++++++++- .../envs/mujoco/hopper_jump/hopper_jump.py | 88 ++++++++++++++++++- .../envs/mujoco/hopper_throw/hopper_throw.py | 6 +- .../hopper_throw/hopper_throw_in_basket.py | 9 +- .../mujoco/walker_2d_jump/walker_2d_jump.py | 68 +++++++++++++- 5 files changed, 212 insertions(+), 19 deletions(-) diff --git a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py index 853c5e7..f4bc677 100644 --- a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py +++ b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py @@ -3,12 +3,66 @@ from typing import Tuple, Union, Optional, Any, Dict import numpy as np from gymnasium.core import ObsType -from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv +from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv, DEFAULT_CAMERA_CONFIG + +from gymnasium import utils +from gymnasium.envs.mujoco import MujocoEnv +from gymnasium.spaces import Box MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100 -class HalfCheetahJumpEnv(HalfCheetahEnv): +class HalfCheetahEnvCustomXML(HalfCheetahEnv): + + def __init__( + self, + xml_file, + forward_reward_weight=1.0, + ctrl_cost_weight=0.1, + reset_noise_scale=0.1, + exclude_current_positions_from_observation=True, + **kwargs, + ): + utils.EzPickle.__init__( + self, + xml_file, + forward_reward_weight, + ctrl_cost_weight, + reset_noise_scale, + exclude_current_positions_from_observation, + **kwargs, + ) + + self._forward_reward_weight = forward_reward_weight + + self._ctrl_cost_weight = ctrl_cost_weight + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + + MujocoEnv.__init__( + self, + xml_file, + 5, + observation_space=observation_space, + default_camera_config=DEFAULT_CAMERA_CONFIG, + **kwargs, + ) + + +class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML): """ _ctrl_cost_weight 0.1 -> 0.0 """ @@ -41,7 +95,7 @@ class HalfCheetahJumpEnv(HalfCheetahEnv): height_after = self.get_body_com("torso")[2] self.max_height = max(height_after, self.max_height) - ## Didnt use fell_over, because base env also has no done condition - Paul and Marc + # Didnt use fell_over, because base env also has no done condition - Paul and Marc # fell_over = abs(self.sim.data.qpos[2]) > 2.5 # how to figure out if the cheetah fell over? -> 2.5 oke? # TODO: Should a fall over be checked here? terminated = False diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index 8ee4b11..53d9265 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -1,12 +1,92 @@ import os import numpy as np -from gymnasium.envs.mujoco.hopper_v4 import HopperEnv +from gymnasium.envs.mujoco.hopper_v4 import HopperEnv, DEFAULT_CAMERA_CONFIG + +from gymnasium import utils +from gymnasium.envs.mujoco import MujocoEnv +from gymnasium.spaces import Box MAX_EPISODE_STEPS_HOPPERJUMP = 250 -class HopperJumpEnv(HopperEnv): +class HopperEnvCustomXML(HopperEnv): + """ + Initialization changes to normal Hopper: + - terminate_when_unhealthy: True -> False + - healthy_reward: 1.0 -> 2.0 + - healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf')) + - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf')) + - exclude_current_positions_from_observation: True -> False + """ + + def __init__( + self, + xml_file, + forward_reward_weight=1.0, + ctrl_cost_weight=1e-3, + healthy_reward=1.0, + terminate_when_unhealthy=True, + healthy_state_range=(-100.0, 100.0), + healthy_z_range=(0.7, float("inf")), + healthy_angle_range=(-0.2, 0.2), + reset_noise_scale=5e-3, + exclude_current_positions_from_observation=True, + **kwargs, + ): + xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) + utils.EzPickle.__init__( + self, + xml_file, + forward_reward_weight, + ctrl_cost_weight, + healthy_reward, + terminate_when_unhealthy, + healthy_state_range, + healthy_z_range, + healthy_angle_range, + reset_noise_scale, + exclude_current_positions_from_observation, + **kwargs + ) + + self._forward_reward_weight = forward_reward_weight + + self._ctrl_cost_weight = ctrl_cost_weight + + self._healthy_reward = healthy_reward + self._terminate_when_unhealthy = terminate_when_unhealthy + + self._healthy_state_range = healthy_state_range + self._healthy_z_range = healthy_z_range + self._healthy_angle_range = healthy_angle_range + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64 + ) + + MujocoEnv.__init__( + self, + xml_file, + 4, + observation_space=observation_space, + default_camera_config=DEFAULT_CAMERA_CONFIG, + **kwargs, + ) + + +class HopperJumpEnv(HopperEnvCustomXML): """ Initialization changes to normal Hopper: - terminate_when_unhealthy: True -> False @@ -141,8 +221,8 @@ class HopperJumpEnv(HopperEnv): noise_high[5] = 0.785 qpos = ( - self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) + - self.init_qpos + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) + + self.init_qpos ) qvel = ( # self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) + diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py index ed2bf96..bb38c88 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py @@ -3,12 +3,12 @@ from typing import Optional, Any, Dict, Tuple import numpy as np from gymnasium.core import ObsType -from gymnasium.envs.mujoco.hopper_v4 import HopperEnv +from fancy_gym.envs.mujoco.hopper_jump.hopper_jump import HopperEnvCustomXML MAX_EPISODE_STEPS_HOPPERTHROW = 250 -class HopperThrowEnv(HopperEnv): +class HopperThrowEnv(HopperEnvCustomXML): """ Initialization changes to normal Hopper: - healthy_reward: 1.0 -> 0.0 -> 0.1 @@ -104,5 +104,3 @@ class HopperThrowEnv(HopperEnv): observation = self._get_obs() return observation - - diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index 439a677..6d49dcb 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -2,13 +2,13 @@ import os from typing import Optional, Any, Dict, Tuple import numpy as np -from gymnasium.envs.mujoco.hopper_v4 import HopperEnv +from fancy_gym.envs.mujoco.hopper_jump.hopper_jump import HopperEnvCustomXML from gymnasium.core import ObsType MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250 -class HopperThrowInBasketEnv(HopperEnv): +class HopperThrowInBasketEnv(HopperEnvCustomXML): """ Initialization changes to normal Hopper: - healthy_reward: 1.0 -> 0.0 @@ -66,7 +66,7 @@ class HopperThrowInBasketEnv(HopperEnv): is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size / 2) and ball_pos[1] <= basket_pos[1] + ( - self.basket_size / 2) + self.basket_size / 2) is_in_basket_z = ball_pos[2] < 0.1 is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z if is_in_basket: @@ -136,6 +136,3 @@ class HopperThrowInBasketEnv(HopperEnv): observation = self._get_obs() return observation - - - diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py index cc9f2b4..7c358fa 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -2,9 +2,13 @@ import os from typing import Optional, Any, Dict, Tuple import numpy as np -from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv +from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv, DEFAULT_CAMERA_CONFIG from gymnasium.core import ObsType +from gymnasium import utils +from gymnasium.envs.mujoco import MujocoEnv +from gymnasium.spaces import Box + MAX_EPISODE_STEPS_WALKERJUMP = 300 @@ -12,6 +16,67 @@ MAX_EPISODE_STEPS_WALKERJUMP = 300 # to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as height # as possible, while landing at a specific target position +class Walker2dEnvCustomXML(Walker2dEnv): + def __init__( + self, + xml_file, + forward_reward_weight=1.0, + ctrl_cost_weight=1e-3, + healthy_reward=1.0, + terminate_when_unhealthy=True, + healthy_z_range=(0.8, 2.0), + healthy_angle_range=(-1.0, 1.0), + reset_noise_scale=5e-3, + exclude_current_positions_from_observation=True, + **kwargs, + ): + utils.EzPickle.__init__( + self, + xml_file, + forward_reward_weight, + ctrl_cost_weight, + healthy_reward, + terminate_when_unhealthy, + healthy_z_range, + healthy_angle_range, + reset_noise_scale, + exclude_current_positions_from_observation, + **kwargs, + ) + + self._forward_reward_weight = forward_reward_weight + self._ctrl_cost_weight = ctrl_cost_weight + + self._healthy_reward = healthy_reward + self._terminate_when_unhealthy = terminate_when_unhealthy + + self._healthy_z_range = healthy_z_range + self._healthy_angle_range = healthy_angle_range + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + + MujocoEnv.__init__( + self, + xml_file, + 4, + observation_space=observation_space, + default_camera_config=DEFAULT_CAMERA_CONFIG, + **kwargs, + ) + class Walker2dJumpEnv(Walker2dEnv): """ @@ -100,4 +165,3 @@ class Walker2dJumpEnv(Walker2dEnv): observation = self._get_obs() return observation - From d6df6779a1603549c426d1bdeb20aaa14c22936c Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 27 May 2023 11:39:47 +0200 Subject: [PATCH 025/198] Auto convert output spaces.Dict to Box for BB-Envs --- fancy_gym/utils/make_env_helpers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 42096f3..d3642a4 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -8,6 +8,7 @@ from typing import Iterable, Type, Union, Optional import gymnasium as gym import numpy as np from gymnasium.envs.registration import register, registry +from gymnasium.wrappers import FlattenObservation from fancy_gym.utils.env_compatibility import EnvCompatibility @@ -165,6 +166,10 @@ def make_bb( env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) + # BB expects a spaces.Box to be exposed, need to convert for dict-observations + if type(env.observation_space) == gym.spaces.dict.Dict: + env = FlattenObservation(env) + traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(env.action_space.shape).item()) if black_box_kwargs.get('duration') is None: From a8a67601ca868a4c90dcbc56099cafb0298f618e Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 27 May 2023 12:48:45 +0200 Subject: [PATCH 026/198] Fix: NameError: name 'd' is not defined --- test/test_replanning_sequencing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 1a35f99..f2c598e 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -92,7 +92,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter length = info['trajectory_length'] - if not d: + if not done: assert length == np.round(action[0] / env.dt) assert length == np.round(env.traj_gen.tau.numpy() / env.dt) else: From 29b458c7df946e7d91f59644b892847cc617b769 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 27 May 2023 12:53:57 +0200 Subject: [PATCH 027/198] Fix: hopper_jump_on_box still used unpatched HopperEnv --- fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py index a31e8ee..7dab661 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -3,12 +3,13 @@ from typing import Optional, Dict, Any, Tuple import numpy as np from gymnasium.core import ObsType -from gymnasium.envs.mujoco.hopper_v4 import HopperEnv +from fancy_gym.envs.mujoco.hopper_jump.hopper_jump import HopperEnvCustomXML + MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250 -class HopperJumpOnBoxEnv(HopperEnv): +class HopperJumpOnBoxEnv(HopperEnvCustomXML): """ Initialization changes to normal Hopper: - healthy_reward: 1.0 -> 0.01 -> 0.001 @@ -153,6 +154,3 @@ class HopperJumpOnBoxEnv(HopperEnv): observation = self._get_obs() return observation - - - From dbd7c37da558eda79b6cc7bc6812456760017d59 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 27 May 2023 12:54:30 +0200 Subject: [PATCH 028/198] Also support old gym Box as observation_space (backwards compat) --- fancy_gym/utils/time_aware_observation.py | 29 ++++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index c1aea7f..12c3762 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -1,4 +1,5 @@ -from gymnasium.spaces import Box +from gymnasium.spaces import Box, Dict +from gym.spaces import Box as OldBox import gymnasium as gym import numpy as np @@ -6,27 +7,37 @@ import numpy as np class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): """Augment the observation with the current time step in the episode. - The observation space of the wrapped environment is assumed to be a flat :class:`Box`. - In particular, pixel observations are not supported. This wrapper will append the current timestep within the current episode to the observation. - The timestep will be indicated as a number between 0 and 1. + The observation space of the wrapped environment is assumed to be a flat :class:`Box` or flattable :class:`Dict`. + In particular, pixel observations are not supported. This wrapper will append the current progress within the current episode to the observation. + The progress will be indicated as a number between 0 and 1. """ def __init__(self, env: gym.Env, enforce_dtype_float32=False): - """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box` observation space. + """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box` or flattable :class:`Dict` observation space. Args: env: The environment to apply the wrapper """ gym.utils.RecordConstructorArgs.__init__(self) gym.ObservationWrapper.__init__(self, env) - assert isinstance(env.observation_space, Box) + allowed_classes = [Box, OldBox, Dict] if enforce_dtype_float32: assert env.observation_space.dtype == np.float32, 'TimeAwareObservation was given an environment with a dtype!=np.float32 ('+str( env.observation_space.dtype)+'). This requirement can be removed by setting enforce_dtype_float32=False.' dtype = env.observation_space.dtype - low = np.append(self.observation_space.low, 0.0) - high = np.append(self.observation_space.high, np.inf) - self.observation_space = Box(low, high, dtype=dtype) + + assert env.observation_space.__class__ in allowed_classes, str(env.observation_space)+' is not supported. Only Box or Dict' + + low = np.append(env.observation_space.low, 0.0) + high = np.append(env.observation_space.high, 1.0) + + if env.observation_space.__class__ in [Box, OldBox]: + self.observation_space = Box(low, high, dtype=dtype) + else: + import pdb + pdb.set_trace() + exit + self.is_vector_env = getattr(env, "is_vector_env", False) def observation(self, observation): From 110a8a9c0c37c11554202bae8070ad9909c4f4ae Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 27 May 2023 12:55:46 +0200 Subject: [PATCH 029/198] Fix: MujocoEnv no longer supports manual assignment of mujoco_bindings --- .../mujoco/table_tennis/table_tennis_env.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 7fb5e9f..872aa75 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -22,6 +22,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): """ 7 DoF table tennis environment """ + def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, goal_switching_step: int = None, enable_artificial_wind: bool = False): @@ -52,9 +53,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), - frame_skip=frame_skip, - mujoco_bindings="mujoco") - + frame_skip=frame_skip,) + if ctxt_dim == 2: self.context_bounds = CONTEXT_BOUNDS_2DIMS elif ctxt_dim == 4: @@ -83,11 +83,11 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): unstable_simulation = False if self._steps == self._goal_switching_step and self.np_random.uniform() < 0.5: - new_goal_pos = self._generate_goal_pos(random=True) - new_goal_pos[1] = -new_goal_pos[1] - self._goal_pos = new_goal_pos - self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) - mujoco.mj_forward(self.model, self.data) + new_goal_pos = self._generate_goal_pos(random=True) + new_goal_pos[1] = -new_goal_pos[1] + self._goal_pos = new_goal_pos + self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) + mujoco.mj_forward(self.model, self.data) for _ in range(self.frame_skip): if self._enable_artificial_wind: @@ -102,7 +102,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): if not self._hit_ball: self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \ - self._contact_checker(self._ball_contact_id, self._bat_back_id) + self._contact_checker(self._ball_contact_id, self._bat_back_id) if not self._hit_ball: ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id) if ball_land_on_floor_no_hit: @@ -130,7 +130,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): reward = -25 if unstable_simulation else self._get_reward(self._terminated) land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \ - if self._ball_landing_pos is not None else 10. + if self._ball_landing_pos is not None else 10. return self._get_obs(), reward, self._terminated, { "hit_ball": self._hit_ball, @@ -202,7 +202,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): if not self._hit_ball: return 0.2 * (1 - np.tanh(min_r_b_dist**2)) if self._ball_landing_pos is None: - min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1)) + min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:, :2] - self._goal_pos[:2], axis=1)) return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2)) min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2]) over_net_bonus = int(self._ball_landing_pos[0] < 0) @@ -231,11 +231,11 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0)) invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ - violate_high_bound_error + violate_low_bound_error + violate_high_bound_error + violate_low_bound_error return -invalid_penalty def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs): - obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj + obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj penalty = self._get_traj_invalid_penalty(action, pos_traj) return obs, penalty, True, { "hit_ball": [False], @@ -249,7 +249,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): @staticmethod def check_traj_validity(action, pos_traj, vel_traj): time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ - or action[1] > delay_bound[1] or action[1] < delay_bound[0] + or action[1] > delay_bound[1] or action[1] < delay_bound[0] if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): return False, pos_traj, vel_traj return True, pos_traj, vel_traj From 1fddeb838b87acf40ff2fe74909ab8877552d96f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 27 May 2023 13:06:19 +0200 Subject: [PATCH 030/198] Forgot to make it derive from Walker2dEnvCustomXML instead of original Walker2dEnv --- fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py index 7c358fa..fda511e 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -78,7 +78,7 @@ class Walker2dEnvCustomXML(Walker2dEnv): ) -class Walker2dJumpEnv(Walker2dEnv): +class Walker2dJumpEnv(Walker2dEnvCustomXML): """ healthy reward 1.0 -> 0.005 -> 0.0025 not from alex penalty 10 -> 0 not from alex From 38cb5e17505a2b783b9718569f169ac686703219 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 13:33:51 +0200 Subject: [PATCH 031/198] Converted mujoco env 'hopper_jump' to new coordinate convention New versions of mujoco no longer allow global coordinates. We therefore convert them to local ones. The original files are kept as reference. --- .../assets/hopper_jump.before_convert.xml | 52 +++++++++++++ .../mujoco/hopper_jump/assets/hopper_jump.xml | 77 +++++++++---------- .../hopper_jump_on_box.before_convert.xml | 51 ++++++++++++ .../hopper_jump/assets/hopper_jump_on_box.xml | 75 +++++++++--------- 4 files changed, 178 insertions(+), 77 deletions(-) create mode 100644 fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.before_convert.xml create mode 100644 fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml diff --git a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.before_convert.xml b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.before_convert.xml new file mode 100644 index 0000000..3348bab --- /dev/null +++ b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.before_convert.xml @@ -0,0 +1,52 @@ + + + + + + + + diff --git a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.xml b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.xml index 3348bab..fb1b978 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.xml +++ b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump.xml @@ -1,52 +1,51 @@ - - - - - - - diff --git a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml new file mode 100644 index 0000000..69d78ff --- /dev/null +++ b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml @@ -0,0 +1,51 @@ + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml index 69d78ff..b66c3ca 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml +++ b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml @@ -1,51 +1,50 @@ - - - - - - - \ No newline at end of file + From c06fbee728fc0f9002e30212a36c1d5a35081d0b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 13:37:40 +0200 Subject: [PATCH 032/198] Converted mujoco env 'hopper_throw' to new coordinate convention New versions of mujoco no longer allow global coordinates. We therefore convert them to local ones. The original files are kept as reference. --- .../assets/hopper_throw.before_convert.xml | 56 +++++ .../hopper_throw/assets/hopper_throw.xml | 82 +++--- .../hopper_throw_in_basket.before_convert.xml | 132 ++++++++++ .../assets/hopper_throw_in_basket.xml | 233 +++++++++--------- 4 files changed, 343 insertions(+), 160 deletions(-) create mode 100644 fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml create mode 100644 fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml diff --git a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml new file mode 100644 index 0000000..1c39602 --- /dev/null +++ b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml @@ -0,0 +1,56 @@ + + + + + + + + diff --git a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.xml b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.xml index 1c39602..fd17979 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.xml +++ b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.xml @@ -1,56 +1,54 @@ - - - - - - - diff --git a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml new file mode 100644 index 0000000..b4f0342 --- /dev/null +++ b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml @@ -0,0 +1,132 @@ + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml index b4f0342..655b056 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml +++ b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml @@ -1,132 +1,129 @@ - - - - - - - \ No newline at end of file + From ff382a29225ad1767748c2dae49ed08b1c2adae6 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 13:40:33 +0200 Subject: [PATCH 033/198] Converted mujoco env 'walker2d' to new coordinate convention New versions of mujoco no longer allow global coordinates. We therefore convert them to local ones. The original files are kept as reference. --- .../assets/walker2d.before_convert.xml | 64 +++++++++++++ .../mujoco/walker_2d_jump/assets/walker2d.xml | 96 +++++++++---------- 2 files changed, 110 insertions(+), 50 deletions(-) create mode 100644 fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml b/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml new file mode 100644 index 0000000..f3bcbd1 --- /dev/null +++ b/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml @@ -0,0 +1,64 @@ + + + + + + + diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.xml b/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.xml index f3bcbd1..96621c7 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.xml +++ b/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.xml @@ -1,64 +1,60 @@ - - - - + + From 5e7f027ea01171554a5e5be9b03daed9bb74bb9b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 18:34:49 +0200 Subject: [PATCH 034/198] Fixed Bug: walker_2d_jump was missing obsvation_space attribute --- fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py index fda511e..fe8d0b2 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -68,6 +68,8 @@ class Walker2dEnvCustomXML(Walker2dEnv): low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 ) + self.observation_space = observation_space + MujocoEnv.__init__( self, xml_file, From ddf6fd73b2a25c550aa70881938903b842f1872d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 18:40:03 +0200 Subject: [PATCH 035/198] Fixed: hopper_jump returned observations bigger then observation_space --- fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index 53d9265..0da71db 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -69,11 +69,11 @@ class HopperEnvCustomXML(HopperEnv): if exclude_current_positions_from_observation: observation_space = Box( - low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(13,), dtype=np.float64 ) else: observation_space = Box( - low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(14,), dtype=np.float64 ) MujocoEnv.__init__( From 42003a3f9af32788dea8aedb1857bc98f4985ac6 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 18:47:41 +0200 Subject: [PATCH 036/198] Allow custom XML-files for ant_env --- fancy_gym/envs/mujoco/ant_jump/ant_jump.py | 73 +++++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py index fbf0804..b228195 100644 --- a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py +++ b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py @@ -2,7 +2,10 @@ from typing import Tuple, Union, Optional, Any, Dict import numpy as np from gymnasium.core import ObsType -from gymnasium.envs.mujoco.ant_v4 import AntEnv +from gymnasium.envs.mujoco.ant_v4 import AntEnv, DEFAULT_CAMERA_CONFIG +from gymnasium import utils +from gymnasium.envs.mujoco import MujocoEnv +from gymnasium.spaces import Box MAX_EPISODE_STEPS_ANTJUMP = 200 @@ -12,8 +15,74 @@ MAX_EPISODE_STEPS_ANTJUMP = 200 # to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as heigh # as possible, while landing at a specific target position +class AntEnvCustomXML(AntEnv): + def __init__( + self, + xml_file="ant.xml", + ctrl_cost_weight=0.5, + use_contact_forces=False, + contact_cost_weight=5e-4, + healthy_reward=1.0, + terminate_when_unhealthy=True, + healthy_z_range=(0.2, 1.0), + contact_force_range=(-1.0, 1.0), + reset_noise_scale=0.1, + exclude_current_positions_from_observation=True, + **kwargs, + ): + utils.EzPickle.__init__( + self, + xml_file, + ctrl_cost_weight, + use_contact_forces, + contact_cost_weight, + healthy_reward, + terminate_when_unhealthy, + healthy_z_range, + contact_force_range, + reset_noise_scale, + exclude_current_positions_from_observation, + **kwargs, + ) -class AntJumpEnv(AntEnv): + self._ctrl_cost_weight = ctrl_cost_weight + self._contact_cost_weight = contact_cost_weight + + self._healthy_reward = healthy_reward + self._terminate_when_unhealthy = terminate_when_unhealthy + self._healthy_z_range = healthy_z_range + + self._contact_force_range = contact_force_range + + self._reset_noise_scale = reset_noise_scale + + self._use_contact_forces = use_contact_forces + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + obs_shape = 27 + if not exclude_current_positions_from_observation: + obs_shape += 2 + if use_contact_forces: + obs_shape += 84 + + observation_space = Box( + low=-np.inf, high=np.inf, shape=(obs_shape,), dtype=np.float64 + ) + + MujocoEnv.__init__( + self, + xml_file, + 5, + observation_space=observation_space, + default_camera_config=DEFAULT_CAMERA_CONFIG, + **kwargs, + ) + + +class AntJumpEnv(AntEnvCustomXML): """ Initialization changes to normal Ant: - healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc From 40d2409c2611265ed49c4ef5959c2f260c35f1bb Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 18:48:13 +0200 Subject: [PATCH 037/198] Fixed: ant_jump returned observations bigger then observation_space --- fancy_gym/envs/mujoco/ant_jump/ant_jump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py index b228195..14ab625 100644 --- a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py +++ b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py @@ -62,7 +62,7 @@ class AntEnvCustomXML(AntEnv): exclude_current_positions_from_observation ) - obs_shape = 27 + obs_shape = 27 + 1 if not exclude_current_positions_from_observation: obs_shape += 2 if use_contact_forces: From f07b8a26ac2f93a37d355fb2b8ff136dd6b6f21d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 10 Jun 2023 18:49:02 +0200 Subject: [PATCH 038/198] Made some assertions more verbose for easier debugging --- test/test_black_box.py | 4 ++-- test/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 74985ac..bfde2fb 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -124,12 +124,12 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): {'phase_generator_type': 'exp'}, {'basis_generator_type': basis_generator_type}) - for _ in range(5): + for i in range(5): env.reset() _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) length = info['trajectory_length'] - assert length == env.spec.max_episode_steps + assert length == env.spec.max_episode_steps, f'Expcted total simulation length ({length}) to be equal to spec.max_episode_steps ({env.spec.max_episode_steps}), but was not during test nr. {i}' @pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) diff --git a/test/utils.py b/test/utils.py index 2402f98..86e82a2 100644 --- a/test/utils.py +++ b/test/utils.py @@ -76,7 +76,7 @@ def run_env_determinism(env_id: str, seed: int, iterations: int = None, wrappers for i, time_step in enumerate(zip(*traj1, *traj2)): obs1, rwd1, term1, trunc1, ac1, obs2, rwd2, term2, trunc2, ac2 = time_step assert np.allclose( - obs1, obs2), f"Observations [{i}] {obs1} ({obs1.shape}) and {obs2} ({obs2.shape}) do not match." + obs1, obs2), f"Observations [{i}] {obs1} ({obs1.shape}) and {obs2} ({obs2.shape}) do not match: Biggest difference is {np.abs(obs1-obs2).max()} at index {np.abs(obs1-obs2).argmax()}." assert np.array_equal( ac1, ac2), f"Actions [{i}] {ac1} and {ac2} do not match." assert np.array_equal( @@ -89,7 +89,7 @@ def run_env_determinism(env_id: str, seed: int, iterations: int = None, wrappers def verify_observations(obs, observation_space: gym.Space, obs_type="reset()"): assert observation_space.contains(obs), \ - f"Observation {obs} received from {obs_type} not contained in observation space {observation_space}." + f"Observation {obs} ({obs.shape}) received from {obs_type} not contained in observation space {observation_space}." def verify_reward(reward): From ef64b0c21c4b0b49070c00298c500e8e3224f62e Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 11:08:46 +0200 Subject: [PATCH 039/198] Adding/fixing obs space definitions and metadata for various envs --- fancy_gym/envs/mujoco/beerpong/beerpong.py | 17 +++++++++++++++-- .../mujoco/box_pushing/box_pushing_env.py | 16 +++++++++++++++- .../half_cheetah_jump/half_cheetah_jump.py | 4 ++-- .../envs/mujoco/hopper_jump/hopper_jump.py | 19 ++++++++++--------- .../mujoco/hopper_jump/hopper_jump_on_box.py | 11 +++++++++++ .../envs/mujoco/hopper_throw/hopper_throw.py | 11 +++++++++++ .../hopper_throw/hopper_throw_in_basket.py | 12 ++++++++++++ .../mujoco/table_tennis/table_tennis_env.py | 15 ++++++++++++++- .../mujoco/walker_2d_jump/walker_2d_jump.py | 4 ++-- 9 files changed, 92 insertions(+), 17 deletions(-) diff --git a/fancy_gym/envs/mujoco/beerpong/beerpong.py b/fancy_gym/envs/mujoco/beerpong/beerpong.py index 6a37e66..1f35bce 100644 --- a/fancy_gym/envs/mujoco/beerpong/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/beerpong.py @@ -5,6 +5,7 @@ import numpy as np from gymnasium import utils from gymnasium.core import ObsType from gymnasium.envs.mujoco import MujocoEnv +from gymnasium.spaces import Box MAX_EPISODE_STEPS_BEERPONG = 300 FIXED_RELEASE_STEP = 62 # empirically evaluated for frame_skip=2! @@ -31,6 +32,14 @@ CUP_COLLISION_OBJ = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "c class BeerPongEnv(MujocoEnv, utils.EzPickle): + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + def __init__(self, **kwargs): self._steps = 0 # Small Context -> Easier. Todo: Should we do different versions? @@ -66,6 +75,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.ball_in_cup = False self.dist_ground_cup = -1 # distance floor to cup if first floor contact + self.observation_space = Box( + low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 + ) + MujocoEnv.__init__( self, self.xml_path, @@ -208,13 +221,13 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0, 0 action_cost = 1e-4 * np.mean(action_cost) reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \ - action_cost - ground_contact_dist_coeff * self.dist_ground_cup ** 2 + action_cost - ground_contact_dist_coeff * self.dist_ground_cup ** 2 # release step punishment min_time_bound = 0.1 max_time_bound = 1.0 release_time = self.release_step * self.dt release_time_rew = int(release_time < min_time_bound) * (-30 - 10 * (release_time - min_time_bound) ** 2) + \ - int(release_time > max_time_bound) * (-30 - 10 * (release_time - max_time_bound) ** 2) + int(release_time > max_time_bound) * (-30 - 10 * (release_time - max_time_bound) ** 2) reward += release_time_rew success = self.ball_in_cup else: diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 2408404..65db553 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -27,6 +27,14 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): 3. time-spatial-depend sparse reward """ + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + def __init__(self, frame_skip: int = 10): utils.EzPickle.__init__(**locals()) self._steps = 0 @@ -40,9 +48,15 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): self._desired_rod_quat = desired_rod_quat self._episode_energy = 0. + + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 + ) + MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"), - frame_skip=self.frame_skip) + frame_skip=self.frame_skip, + observation_space=self.observation_space) self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) def step(self, action): diff --git a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py index f4bc677..4ef2757 100644 --- a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py +++ b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py @@ -45,11 +45,11 @@ class HalfCheetahEnvCustomXML(HalfCheetahEnv): if exclude_current_positions_from_observation: observation_space = Box( - low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 ) else: observation_space = Box( - low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(19,), dtype=np.float64 ) MujocoEnv.__init__( diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index 0da71db..f7936c7 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -67,20 +67,21 @@ class HopperEnvCustomXML(HopperEnv): exclude_current_positions_from_observation ) - if exclude_current_positions_from_observation: - observation_space = Box( - low=-np.inf, high=np.inf, shape=(13,), dtype=np.float64 - ) - else: - observation_space = Box( - low=-np.inf, high=np.inf, shape=(14,), dtype=np.float64 - ) + if not hasattr(self, 'observation_space'): + if exclude_current_positions_from_observation: + self.observation_space = Box( + low=-np.inf, high=np.inf, shape=(15,), dtype=np.float64 + ) + else: + self.observation_space = Box( + low=-np.inf, high=np.inf, shape=(16,), dtype=np.float64 + ) MujocoEnv.__init__( self, xml_file, 4, - observation_space=observation_space, + observation_space=self.observation_space, default_camera_config=DEFAULT_CAMERA_CONFIG, **kwargs, ) diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py index 7dab661..60d387a 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -4,6 +4,7 @@ from typing import Optional, Dict, Any, Tuple import numpy as np from gymnasium.core import ObsType from fancy_gym.envs.mujoco.hopper_jump.hopper_jump import HopperEnvCustomXML +from gymnasium import spaces MAX_EPISODE_STEPS_HOPPERJUMPONBOX = 250 @@ -36,6 +37,16 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML): self.hopper_on_box = False self.context = context self.box_x = 1 + + if exclude_current_positions_from_observation: + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(13,), dtype=np.float64 + ) + else: + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(14,), dtype=np.float64 + ) + xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py index bb38c88..2dd82b2 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py @@ -4,6 +4,7 @@ from typing import Optional, Any, Dict, Tuple import numpy as np from gymnasium.core import ObsType from fancy_gym.envs.mujoco.hopper_jump.hopper_jump import HopperEnvCustomXML +from gymnasium import spaces MAX_EPISODE_STEPS_HOPPERTHROW = 250 @@ -37,6 +38,16 @@ class HopperThrowEnv(HopperEnvCustomXML): self.max_episode_steps = max_episode_steps self.context = context self.goal = 0 + + if not hasattr(self, 'observation_space'): + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + else: + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(19,), dtype=np.float64 + ) + super().__init__(xml_file=xml_file, forward_reward_weight=forward_reward_weight, ctrl_cost_weight=ctrl_cost_weight, diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index 6d49dcb..be6b81a 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -4,6 +4,8 @@ from typing import Optional, Any, Dict, Tuple import numpy as np from fancy_gym.envs.mujoco.hopper_jump.hopper_jump import HopperEnvCustomXML from gymnasium.core import ObsType +from gymnasium import spaces + MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250 @@ -43,6 +45,16 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML): self.context = context self.penalty = penalty self.basket_x = 5 + + if exclude_current_positions_from_observation: + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + else: + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(19,), dtype=np.float64 + ) + xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) super().__init__(xml_file=xml_file, forward_reward_weight=forward_reward_weight, diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 872aa75..a5d67c0 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -23,6 +23,14 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): 7 DoF table tennis environment """ + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, goal_switching_step: int = None, enable_artificial_wind: bool = False): @@ -51,9 +59,14 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self._artificial_force = 0. + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(9,), dtype=np.float64 + ) + MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), - frame_skip=frame_skip,) + frame_skip=frame_skip, + observation_space=self.observation_space) if ctxt_dim == 2: self.context_bounds = CONTEXT_BOUNDS_2DIMS diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py index fe8d0b2..127719c 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -61,11 +61,11 @@ class Walker2dEnvCustomXML(Walker2dEnv): if exclude_current_positions_from_observation: observation_space = Box( - low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 ) else: observation_space = Box( - low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(19,), dtype=np.float64 ) self.observation_space = observation_space From 6f1a36d18c2149e0ce138f7b2788332d9a888c54 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 12:30:56 +0200 Subject: [PATCH 040/198] Adding required render_fps metadata to custom envs --- fancy_gym/dmc/dmc_wrapper.py | 2 ++ fancy_gym/envs/mujoco/beerpong/beerpong.py | 1 + fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py | 1 + fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py | 1 + 4 files changed, 5 insertions(+) diff --git a/fancy_gym/dmc/dmc_wrapper.py b/fancy_gym/dmc/dmc_wrapper.py index d1e5f0d..b4f2292 100644 --- a/fancy_gym/dmc/dmc_wrapper.py +++ b/fancy_gym/dmc/dmc_wrapper.py @@ -62,6 +62,8 @@ class DMCWrapper(gym.Env): env: Callable[[], Union[composer.Environment, control.Environment]], ): + raise Exception('The fancy_gym dmc-wrapper is deprecated; shimmy should be used instead.') + # TODO: Currently this is required to be a function because dmc does not allow to copy composers environments self._env = env() diff --git a/fancy_gym/envs/mujoco/beerpong/beerpong.py b/fancy_gym/envs/mujoco/beerpong/beerpong.py index 1f35bce..8e2f9fc 100644 --- a/fancy_gym/envs/mujoco/beerpong/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/beerpong.py @@ -38,6 +38,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): "rgb_array", "depth_array", ], + "render_fps": 100 } def __init__(self, **kwargs): diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 65db553..3efcf3f 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -33,6 +33,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): "rgb_array", "depth_array", ], + "render_fps": 50 } def __init__(self, frame_skip: int = 10): diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index a5d67c0..ddf5022 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -29,6 +29,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): "rgb_array", "depth_array", ], + "render_fps": 125 } def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, From 80de15fd14f8f38c64f8e1a7deb1d0d3eb04f827 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 13:43:10 +0200 Subject: [PATCH 041/198] Fix: Return for invalid trajectories did not follow new gym spec --- fancy_gym/black_box/black_box_wrapper.py | 6 +++--- fancy_gym/black_box/raw_interface_wrapper.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index f5677d6..a097b09 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -158,9 +158,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): done = False if not traj_is_valid: - obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, - self.return_context_observation) - return self.observation(obs), trajectory_return, done, infos + obs, trajectory_return, terminated, truncated, infos = self.env.invalid_traj_callback(action, position, velocity, + self.return_context_observation) + return self.observation(obs), trajectory_return, terminated, truncated, infos self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py index bf6e67d..78f1f8c 100644 --- a/fancy_gym/black_box/raw_interface_wrapper.py +++ b/fancy_gym/black_box/raw_interface_wrapper.py @@ -108,7 +108,8 @@ class RawInterfaceWrapper(gym.Wrapper): Returns: obs: artificial observation if the trajectory is invalid, by default a zero vector reward: artificial reward if the trajectory is invalid, by default 0 - done: artificial done if the trajectory is invalid, by default True + terminated: artificial terminated if the trajectory is invalid, by default True + truncated: artificial truncated if the trajectory is invalid, by default False info: artificial info if the trajectory is invalid, by default empty dict """ - return np.zeros(1), 0, True, {} \ No newline at end of file + return np.zeros(1), 0, True, False, {} From 4921cc4b0b3df0fd4c1fe3ed1e678e5346507e45 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 13:45:44 +0200 Subject: [PATCH 042/198] Fix: Some envs had wrong obs space shapes and did not follow new gym spec --- fancy_gym/envs/mujoco/beerpong/beerpong.py | 2 +- .../mujoco/box_pushing/box_pushing_env.py | 12 ++++++---- .../mujoco/hopper_jump/hopper_jump_on_box.py | 8 ++++--- .../mujoco/table_tennis/table_tennis_env.py | 22 +++++++++++++------ 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/fancy_gym/envs/mujoco/beerpong/beerpong.py b/fancy_gym/envs/mujoco/beerpong/beerpong.py index 8e2f9fc..fd1a5dc 100644 --- a/fancy_gym/envs/mujoco/beerpong/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/beerpong.py @@ -77,7 +77,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.dist_ground_cup = -1 # distance floor to cup if first floor contact self.observation_space = Box( - low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(29,), dtype=np.float64 ) MujocoEnv.__init__( diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 3efcf3f..4fafd44 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -1,8 +1,8 @@ import os import numpy as np -from gym import utils, spaces -from gym.envs.mujoco import MujocoEnv +from gymnasium import utils, spaces +from gymnasium.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat @@ -51,7 +51,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): self._episode_energy = 0. self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(28,), dtype=np.float64 ) MujocoEnv.__init__(self, @@ -103,7 +103,11 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): 'is_success': True if episode_end and box_goal_pos_dist < 0.05 and box_goal_quat_dist < 0.5 else False, 'num_steps': self._steps } - return obs, reward, episode_end, infos + + terminated = episode_end and infos['is_success'] + truncated = episode_end and not infos['is_success'] + + return obs, reward, terminated, truncated, infos def reset_model(self): # rest box to initial position diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py index 60d387a..c8c15c3 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -40,11 +40,11 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML): if exclude_current_positions_from_observation: self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=(13,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64 ) else: self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=(14,), dtype=np.float64 + low=-np.inf, high=np.inf, shape=(13,), dtype=np.float64 ) xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) @@ -136,7 +136,9 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML): 'goal': self.box_x, } - return observation, reward, terminated, info + truncated = self.current_step >= self.max_episode_steps and not terminated + + return observation, reward, terminated, truncated, info def _get_obs(self): return np.append(super()._get_obs(), self.box_x) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index ddf5022..55aa77c 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -1,8 +1,8 @@ import os import numpy as np -from gym import utils, spaces -from gym.envs.mujoco import MujocoEnv +from gymnasium import utils, spaces +from gymnasium.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound @@ -60,9 +60,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self._artificial_force = 0. - self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=(9,), dtype=np.float64 - ) + if not hasattr(self, 'observation_space'): + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(19,), dtype=np.float64 + ) MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), @@ -146,7 +147,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \ if self._ball_landing_pos is not None else 10. - return self._get_obs(), reward, self._terminated, { + info = { "hit_ball": self._hit_ball, "ball_returned_success": self._ball_return_success, "land_dist_error": land_dist_err, @@ -154,6 +155,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): "num_steps": self._steps, } + terminated, truncated = self._terminated, False + + return self._get_obs(), reward, terminated, truncated, info + def _contact_checker(self, id_1, id_2): for coni in range(0, self.data.ncon): con = self.data.contact[coni] @@ -251,7 +256,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs): obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj penalty = self._get_traj_invalid_penalty(action, pos_traj) - return obs, penalty, True, { + return obs, penalty, True, False, { "hit_ball": [False], "ball_returned_success": [False], "land_dist_error": [10.], @@ -271,6 +276,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): class TableTennisWind(TableTennisEnv): def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4): + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=(22,), dtype=np.float64 + ) super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, enable_artificial_wind=True) def _get_obs(self): From e44b0ed9edaa4f7764449b07a2ca77c134d6a52a Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 13:46:38 +0200 Subject: [PATCH 043/198] Fix: Version specification now requirted for dmc envs --- test/test_replanning_sequencing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index f2c598e..5edfa7a 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -13,7 +13,7 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.utils.time_aware_observation import TimeAwareObservation SEED = 1 -ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch', 'metaworld:reach-v2', 'Reacher-v2'] +ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) From 2ad42f4132a928f1e39181f0da429d7407b02969 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 13:47:38 +0200 Subject: [PATCH 044/198] Fix: Minor bugs in time aware obs wrapper --- fancy_gym/utils/time_aware_observation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index 12c3762..a042438 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -28,10 +28,10 @@ class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorAr assert env.observation_space.__class__ in allowed_classes, str(env.observation_space)+' is not supported. Only Box or Dict' - low = np.append(env.observation_space.low, 0.0) - high = np.append(env.observation_space.high, 1.0) - if env.observation_space.__class__ in [Box, OldBox]: + low = np.append(env.observation_space.low, 0.0) + high = np.append(env.observation_space.high, 1.0) + self.observation_space = Box(low, high, dtype=dtype) else: import pdb @@ -49,7 +49,7 @@ class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorAr Returns: The observation with the time step appended to (relative to total number of steps) """ - return np.append(observation, self.t / getattr(self.env, '_max_episode_steps')) + return np.append(observation, self.t / self.env.spec.max_episode_steps) def step(self, action): """Steps through the environment, incrementing the time step. From abeb963b4e4bedf8f668d17a9f355a75e25e5978 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 17:37:32 +0200 Subject: [PATCH 045/198] Little hack to make envs work, that don't expose the max_episode_steps in their spec --- fancy_gym/utils/make_env_helpers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 778e5d8..7f1878e 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -93,6 +93,14 @@ def make(env_id: str, seed: int, **kwargs): else: env = make_gym(env_id, seed, **kwargs) + if not env.spec.max_episode_steps == None: + # Hack: Some envs violate the gym spec in that they don't correctly expose the maximum episode steps + # Gymnasium disallows accessing private attributes, so we have to get creative to read the internal values + # TODO: Remove this, when all supported envs correctly implement this themselves + unwrapped = env.unwrapped if hasattr(env, 'unwrapped') else env + if hasattr(unwrapped, '_max_episode_steps'): + env.spec.max_episode_steps = unwrapped.__getattribute__('_max_episode_steps') + # try: env.reset(seed=seed) # except TypeError: From a23b44752e58fcd64ad4c757d4c98c2776cffd28 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 17:38:16 +0200 Subject: [PATCH 046/198] Implement support for Dict spaces for time_aware_observation-wrapper --- fancy_gym/utils/time_aware_observation.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index a042438..61290dd 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -2,6 +2,7 @@ from gymnasium.spaces import Box, Dict from gym.spaces import Box as OldBox import gymnasium as gym import numpy as np +import copy class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): @@ -24,19 +25,21 @@ class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorAr if enforce_dtype_float32: assert env.observation_space.dtype == np.float32, 'TimeAwareObservation was given an environment with a dtype!=np.float32 ('+str( env.observation_space.dtype)+'). This requirement can be removed by setting enforce_dtype_float32=False.' - dtype = env.observation_space.dtype - assert env.observation_space.__class__ in allowed_classes, str(env.observation_space)+' is not supported. Only Box or Dict' if env.observation_space.__class__ in [Box, OldBox]: + dtype = env.observation_space.dtype + low = np.append(env.observation_space.low, 0.0) high = np.append(env.observation_space.high, 1.0) self.observation_space = Box(low, high, dtype=dtype) else: - import pdb - pdb.set_trace() - exit + spaces = copy.copy(env.observation_space.spaces) + dtype = np.float64 + spaces['time_awareness'] = Box(0, 1, dtype=dtype) + + self.observation_space = Dict(spaces) self.is_vector_env = getattr(env, "is_vector_env", False) From c9467c0e062a486fef7159a5ae3da06329c46166 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 17:39:04 +0200 Subject: [PATCH 047/198] Upgrading metaworld (fixed bug that affected us) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c477a8a..92e0d72 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { 'dmc': ['dm_control>=1.0.1', 'shimmy[dm-control]'], - 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@3ced29c8cee6445386eba32e92870d664ad5e6e3#egg=metaworld', + 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@43abf981b97c01669af898833a740fb63605b8ac#egg=metaworld', 'mujoco-py<2.2,>=2.1', ], 'box2d': ['gymnasium[box2d]>=0.26.0'], From 9605f2e56c8a44d670f3316fd4d39946da759b99 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 11 Jun 2023 18:05:50 +0200 Subject: [PATCH 048/198] Fix: Test for dmc still referenced lib directly (not via shimmy) --- test/test_dmc_envs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py index 266a12f..3888f59 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc_envs.py @@ -3,7 +3,6 @@ from typing import Callable import gymnasium as gym import pytest -from dm_control import suite, manipulation import fancy_gym from test.utils import run_env, run_env_determinism From fbba12903491706a582307685cc9e08d558f8c8d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 11:51:01 +0200 Subject: [PATCH 049/198] Fix: Need to supply seed to reset in tests --- test/test_black_box.py | 14 +++++++------- test/test_replanning_sequencing.py | 17 +++++++++-------- test/utils.py | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index bfde2fb..678eaa9 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -78,7 +78,7 @@ def test_missing_local_state(mp_type: str): {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, {'basis_generator_type': basis_generator_type}) - env.reset() + env.reset(seed=SEED) with pytest.raises(NotImplementedError): env.step(env.action_space.sample()) @@ -95,7 +95,7 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, {'basis_generator_type': basis_generator_type}) - env.reset() + env.reset(seed=SEED) _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) info_keys = list(info.keys()) @@ -125,7 +125,7 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): {'basis_generator_type': basis_generator_type}) for i in range(5): - env.reset() + env.reset(seed=SEED) _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) length = info['trajectory_length'] @@ -141,7 +141,7 @@ def test_aggregation(mp_type: str, reward_aggregation: Callable[[np.ndarray], fl {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, {'basis_generator_type': basis_generator_type}) - env.reset() + env.reset(seed=SEED) # ToyEnv only returns 1 as reward _obs, reward, _terminated, _truncated, _info = env.step(env.action_space.sample()) assert reward == reward_aggregation(np.ones(50, )) @@ -232,7 +232,7 @@ def test_learn_tau(mp_type: str, tau: float): done = True for i in range(5): if done: - env.reset() + env.reset(seed=SEED) action = env.action_space.sample() action[0] = tau @@ -278,7 +278,7 @@ def test_learn_delay(mp_type: str, delay: float): done = True for i in range(5): if done: - env.reset() + env.reset(seed=SEED) action = env.action_space.sample() action[0] = delay @@ -327,7 +327,7 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): done = True for i in range(5): if done: - env.reset() + env.reset(seed=SEED) action = env.action_space.sample() action[0] = tau action[1] = delay diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 5edfa7a..6e4a760 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -7,6 +7,7 @@ import numpy as np import pytest from gymnasium import register from gymnasium.core import ActType, ObsType +from gymnasium import spaces import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper @@ -85,7 +86,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter for i in range(25): if done: - env.reset() + env.reset(seed=SEED) action = env.action_space.sample() _obs, _reward, terminated, truncated, info = env.step(action) done = terminated or truncated @@ -131,7 +132,7 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra # This also verifies we are not adding the TimeAwareObservationWrapper twice assert env.observation_space == env_step.observation_space - env.reset() + env.reset(seed=SEED) episode_steps = env_step.spec.max_episode_steps // replanning_time # Make 3 episodes, total steps depend on the replanning steps @@ -146,7 +147,7 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra # Check if number of steps until termination match the replanning interval print(done, (i + 1), episode_steps) assert (i + 1) % episode_steps == 0 - env.reset() + env.reset(seed=SEED) assert replanning_schedule(None, None, None, None, length) @@ -171,7 +172,7 @@ def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_s {'basis_generator_type': basis_generator_type, }, seed=SEED) - _ = env.reset() + _ = env.reset(seed=SEED) done = False planning_times = 0 while not done: @@ -203,7 +204,7 @@ def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_se {'basis_generator_type': basis_generator_type, }, seed=SEED) - _ = env.reset() + _ = env.reset(seed=SEED) done = False planning_times = 0 while not done: @@ -236,7 +237,7 @@ def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_ {'basis_generator_type': basis_generator_type, }, seed=SEED) - _ = env.reset() + _ = env.reset(seed=SEED) done = False planning_times = 0 while not done: @@ -291,7 +292,7 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i {'basis_generator_type': basis_generator_type, }, seed=SEED) - _ = env.reset() + _ = env.reset(seed=SEED) done = False planning_times = 0 while not done: @@ -340,7 +341,7 @@ def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_ {'basis_generator_type': basis_generator_type, }, seed=SEED) - _ = env.reset() + _ = env.reset(seed=SEED) for i in range(max_planning_times): action = env.action_space.sample() _obs, _reward, terminated, truncated, _info = env.step(action) diff --git a/test/utils.py b/test/utils.py index 86e82a2..157f840 100644 --- a/test/utils.py +++ b/test/utils.py @@ -30,7 +30,7 @@ def run_env(env_id: str, iterations: int = None, seed: int = 0, wrappers: List[T actions = [] terminations = [] truncations = [] - obs, _ = env.reset() + obs, _ = env.reset(seed=seed) verify_observations(obs, env.observation_space, "reset()") iterations = iterations or (env.spec.max_episode_steps or 1) From f44f01b478c7f21c4bf555308679c5f1263ead78 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 11:52:35 +0200 Subject: [PATCH 050/198] Fix: Allow observation space dict in test_replanning --- test/test_replanning_sequencing.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 6e4a760..6425ef8 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -80,7 +80,10 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter assert env.learn_sub_trajectories assert env.traj_gen.learn_tau # This also verifies we are not adding the TimeAwareObservationWrapper twice - assert env.observation_space == env_step.observation_space + if env.observation_space.__class__ in [spaces.Dict]: + assert spaces.flatten_space(env.observation_space) == env_step.observation_space + else: + assert env.observation_space == env_step.observation_space done = True @@ -130,7 +133,10 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra assert env.do_replanning assert callable(env.replanning_schedule) # This also verifies we are not adding the TimeAwareObservationWrapper twice - assert env.observation_space == env_step.observation_space + if env.observation_space.__class__ in [spaces.Dict]: + assert spaces.flatten_space(env.observation_space) == env_step.observation_space + else: + assert env.observation_space == env_step.observation_space env.reset(seed=SEED) From 49ac9c378571e78c26833a25edbd3f24fb62c0a1 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 11:53:10 +0200 Subject: [PATCH 051/198] Fix: Don't throw errors for envs, that do not expose a max_episode_steps. --- test/test_black_box.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test_black_box.py b/test/test_black_box.py index 678eaa9..61926cf 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -115,6 +115,11 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] @pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): + if not env.spec.max_episode_steps: + # Not all envs expose a max_episode_steps. + # To use those with MPs, they could be put in a time_limit-wrapper. + return True + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env_id, wrapper_class = env_wrap From 60a4cf11d613faeeaf21cf9bf470bf6478fd11a3 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 12:10:01 +0200 Subject: [PATCH 052/198] Fix: Some envs used no longer existing binding to mujoco --- fancy_gym/envs/mujoco/beerpong/beerpong.py | 8 +++++--- fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py | 14 ++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fancy_gym/envs/mujoco/beerpong/beerpong.py b/fancy_gym/envs/mujoco/beerpong/beerpong.py index fd1a5dc..802776f 100644 --- a/fancy_gym/envs/mujoco/beerpong/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/beerpong.py @@ -7,6 +7,8 @@ from gymnasium.core import ObsType from gymnasium.envs.mujoco import MujocoEnv from gymnasium.spaces import Box +import mujoco + MAX_EPISODE_STEPS_BEERPONG = 300 FIXED_RELEASE_STEP = 62 # empirically evaluated for frame_skip=2! @@ -61,9 +63,9 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self.repeat_action = 2 # TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this self.model = None - self.geom_id = lambda x: self._mujoco_bindings.mj_name2id(self.model, - self._mujoco_bindings.mjtObj.mjOBJ_GEOM, - x) + self.geom_id = lambda x: mujoco.mj_name2id(self.model, + mujoco.mjtObj.mjOBJ_GEOM, + x) # for reward calculation self.dists = [] diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index f7936c7..b77cab1 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -7,6 +7,8 @@ from gymnasium import utils from gymnasium.envs.mujoco import MujocoEnv from gymnasium.spaces import Box +import mujoco + MAX_EPISODE_STEPS_HOPPERJUMP = 250 @@ -244,12 +246,12 @@ class HopperJumpEnv(HopperEnvCustomXML): # floor_geom_id = self.model.geom_name2id('floor') # foot_geom_id = self.model.geom_name2id('foot_geom') # TODO: do this properly over a sensor in the xml file, see dmc hopper - floor_geom_id = self._mujoco_bindings.mj_name2id(self.model, - self._mujoco_bindings.mjtObj.mjOBJ_GEOM, - 'floor') - foot_geom_id = self._mujoco_bindings.mj_name2id(self.model, - self._mujoco_bindings.mjtObj.mjOBJ_GEOM, - 'foot_geom') + floor_geom_id = mujoco.mj_name2id(self.model, + mujoco.mjtObj.mjOBJ_GEOM, + 'floor') + foot_geom_id = mujoco.mj_name2id(self.model, + mujoco.mjtObj.mjOBJ_GEOM, + 'foot_geom') for i in range(self.data.ncon): contact = self.data.contact[i] collision = contact.geom1 == floor_geom_id and contact.geom2 == foot_geom_id From b032dec5fe597d5802adcb71dfc72119eb2bb128 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 14:23:59 +0200 Subject: [PATCH 053/198] Better handling of envs without defined max_steps --- fancy_gym/utils/make_env_helpers.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 7f1878e..848c083 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -8,9 +8,10 @@ from typing import Iterable, Type, Union, Optional import gymnasium as gym import numpy as np from gymnasium.envs.registration import register, registry -from gymnasium.wrappers import FlattenObservation +from gymnasium.wrappers import TimeLimit from fancy_gym.utils.env_compatibility import EnvCompatibility +from fancy_gym.utils.wrappers import FlattenObservation try: from dm_control import suite, manipulation @@ -31,7 +32,7 @@ from fancy_gym.black_box.factory.controller_factory import get_controller from fancy_gym.black_box.factory.phase_generator_factory import get_phase_generator from fancy_gym.black_box.factory.trajectory_generator_factory import get_trajectory_generator from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper -from fancy_gym.utils.time_aware_observation import TimeAwareObservation +from fancy_gym.utils.wrappers import TimeAwareObservation from fancy_gym.utils.utils import nested_update @@ -114,7 +115,7 @@ def make(env_id: str, seed: int, **kwargs): return env -def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs): +def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, fallback_max_steps=None, **kwargs): """ Helper function for creating a wrapped gym environment using MPs. It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is @@ -130,6 +131,8 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1 """ # _env = gym.make(env_id) _env = make(env_id, seed, **kwargs) + if fallback_max_steps: + _env = ensure_finite_time(_env, fallback_max_steps) has_black_box_wrapper = False for w in wrappers: # only wrap the environment if not BlackBoxWrapper, e.g. for vision @@ -144,7 +147,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1 def make_bb( env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping, controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed: int = 1, - **kwargs): + fallback_max_steps: int = None, **kwargs): """ This can also be used standalone for manually building a custom DMP environment. Args: @@ -172,7 +175,7 @@ def make_bb( # Add as first wrapper in order to alter observation wrappers.insert(0, TimeAwareObservation) - env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) + env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, fallback_max_steps=fallback_max_steps, **kwargs) # BB expects a spaces.Box to be exposed, need to convert for dict-observations if type(env.observation_space) == gym.spaces.dict.Dict: @@ -209,6 +212,15 @@ def make_bb( return bb_env +def ensure_finite_time(env: gym.Env, fallback_max_steps=500): + cur_limit = env.spec.max_episode_steps + if not cur_limit: + if hasattr(env.unwrapped, 'max_path_length'): + return TimeLimit(env, env.unwrapped.__getattribute__('max_path_length')) + return TimeLimit(env, fallback_max_steps) + return env + + def get_env_duration(env: gym.Env): try: duration = env.spec.max_episode_steps * env.dt From 9ade0dcdc4dfaf5ea2f5e7f424741f2d2f5bebb5 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 14:25:20 +0200 Subject: [PATCH 054/198] Fix: Make wrappers work with BB and Dict-Space --- fancy_gym/utils/wrappers.py | 127 +++++++++++++++++++++++++++++ test/test_black_box.py | 11 +-- test/test_replanning_sequencing.py | 37 +++++---- 3 files changed, 150 insertions(+), 25 deletions(-) create mode 100644 fancy_gym/utils/wrappers.py diff --git a/fancy_gym/utils/wrappers.py b/fancy_gym/utils/wrappers.py new file mode 100644 index 0000000..03542c7 --- /dev/null +++ b/fancy_gym/utils/wrappers.py @@ -0,0 +1,127 @@ +from gymnasium.spaces import Box, Dict, flatten, flatten_space +from gym.spaces import Box as OldBox +import gymnasium as gym +import numpy as np +import copy + + +class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): + """Augment the observation with the current time step in the episode. + + The observation space of the wrapped environment is assumed to be a flat :class:`Box` or flattable :class:`Dict`. + In particular, pixel observations are not supported. This wrapper will append the current progress within the current episode to the observation. + The progress will be indicated as a number between 0 and 1. + """ + + def __init__(self, env: gym.Env, enforce_dtype_float32=False): + """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box` or flattable :class:`Dict` observation space. + + Args: + env: The environment to apply the wrapper + """ + gym.utils.RecordConstructorArgs.__init__(self) + gym.ObservationWrapper.__init__(self, env) + allowed_classes = [Box, OldBox, Dict] + if enforce_dtype_float32: + assert env.observation_space.dtype == np.float32, 'TimeAwareObservation was given an environment with a dtype!=np.float32 ('+str( + env.observation_space.dtype)+'). This requirement can be removed by setting enforce_dtype_float32=False.' + assert env.observation_space.__class__ in allowed_classes, str(env.observation_space)+' is not supported. Only Box or Dict' + + if env.observation_space.__class__ in [Box, OldBox]: + dtype = env.observation_space.dtype + + low = np.append(env.observation_space.low, 0.0) + high = np.append(env.observation_space.high, 1.0) + + self.observation_space = Box(low, high, dtype=dtype) + else: + spaces = copy.copy(env.observation_space.spaces) + dtype = np.float64 + spaces['time_awareness'] = Box(0, 1, dtype=dtype) + + self.observation_space = Dict(spaces) + + self.is_vector_env = getattr(env, "is_vector_env", False) + + def observation(self, observation): + """Adds to the observation with the current time step. + + Args: + observation: The observation to add the time step to + + Returns: + The observation with the time step appended to (relative to total number of steps) + """ + if self.observation_space.__class__ in [Box, OldBox]: + return np.append(observation, self.t / self.env.spec.max_episode_steps) + else: + obs = copy.copy(observation) + obs['time_awareness'] = self.t / self.env.spec.max_episode_steps + return obs + + def step(self, action): + """Steps through the environment, incrementing the time step. + + Args: + action: The action to take + + Returns: + The environment's step using the action. + """ + self.t += 1 + return super().step(action) + + def reset(self, **kwargs): + """Reset the environment setting the time to zero. + + Args: + **kwargs: Kwargs to apply to env.reset() + + Returns: + The reset environment + """ + self.t = 0 + return super().reset(**kwargs) + + +class FlattenObservation(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): + """Observation wrapper that flattens the observation. + + Example: + >>> import gymnasium as gym + >>> from gymnasium.wrappers import FlattenObservation + >>> env = gym.make("CarRacing-v2") + >>> env.observation_space.shape + (96, 96, 3) + >>> env = FlattenObservation(env) + >>> env.observation_space.shape + (27648,) + >>> obs, _ = env.reset() + >>> obs.shape + (27648,) + """ + + def __init__(self, env: gym.Env): + """Flattens the observations of an environment. + + Args: + env: The environment to apply the wrapper + """ + gym.utils.RecordConstructorArgs.__init__(self) + gym.ObservationWrapper.__init__(self, env) + + self.observation_space = flatten_space(env.observation_space) + + def observation(self, observation): + """Flattens an observation. + + Args: + observation: The observation to flatten + + Returns: + The flattened observation + """ + try: + return flatten(self.env.observation_space, observation) + except: + return np.array([flatten(self.env.observation_space, observation[i]) for i in range(len(observation))]) diff --git a/test/test_black_box.py b/test/test_black_box.py index 61926cf..1492958 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -9,7 +9,7 @@ from gymnasium.core import ActType, ObsType import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper -from fancy_gym.utils.time_aware_observation import TimeAwareObservation +from fancy_gym.utils.wrappers import TimeAwareObservation SEED = 1 ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] @@ -17,6 +17,8 @@ WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) +MAX_STEPS_FALLBACK = 500 + class Object(object): pass @@ -115,11 +117,6 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] @pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): - if not env.spec.max_episode_steps: - # Not all envs expose a max_episode_steps. - # To use those with MPs, they could be put in a time_limit-wrapper. - return True - basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env_id, wrapper_class = env_wrap @@ -127,7 +124,7 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': basis_generator_type}) + {'basis_generator_type': basis_generator_type}, fallback_max_steps=MAX_STEPS_FALLBACK) for i in range(5): env.reset(seed=SEED) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 6425ef8..f219bbb 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -11,7 +11,8 @@ from gymnasium import spaces import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper -from fancy_gym.utils.time_aware_observation import TimeAwareObservation +from fancy_gym.utils.wrappers import TimeAwareObservation +from fancy_gym.utils.make_env_helpers import ensure_finite_time SEED = 1 ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] @@ -19,6 +20,8 @@ WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) +MAX_STEPS_FALLBACK = 100 + class ToyEnv(gym.Env): observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) @@ -64,7 +67,7 @@ def setup(): def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]], add_time_aware_wrapper_before: bool): env_id, wrapper_class = env_wrap - env_step = TimeAwareObservation(fancy_gym.make(env_id, SEED)) + env_step = TimeAwareObservation(ensure_finite_time(fancy_gym.make(env_id, SEED), MAX_STEPS_FALLBACK)) wrappers = [wrapper_class] # has time aware wrapper @@ -75,15 +78,14 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, seed=SEED) + {'basis_generator_type': 'rbf'}, seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) assert env.learn_sub_trajectories + assert env.spec.max_episode_steps + assert env_step.spec.max_episode_steps assert env.traj_gen.learn_tau # This also verifies we are not adding the TimeAwareObservationWrapper twice - if env.observation_space.__class__ in [spaces.Dict]: - assert spaces.flatten_space(env.observation_space) == env_step.observation_space - else: - assert env.observation_space == env_step.observation_space + assert spaces.flatten_space(env_step.observation_space) == spaces.flatten_space(env.observation_space) done = True @@ -112,7 +114,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]], add_time_aware_wrapper_before: bool, replanning_time: int): env_id, wrapper_class = env_wrap - env_step = TimeAwareObservation(fancy_gym.make(env_id, SEED)) + env_step = TimeAwareObservation(ensure_finite_time(fancy_gym.make(env_id, SEED), MAX_STEPS_FALLBACK)) wrappers = [wrapper_class] # has time aware wrapper @@ -128,15 +130,14 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': phase_generator_type}, - {'basis_generator_type': basis_generator_type}, seed=SEED) + {'basis_generator_type': basis_generator_type}, seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) assert env.do_replanning + assert env.spec.max_episode_steps + assert env_step.spec.max_episode_steps assert callable(env.replanning_schedule) # This also verifies we are not adding the TimeAwareObservationWrapper twice - if env.observation_space.__class__ in [spaces.Dict]: - assert spaces.flatten_space(env.observation_space) == env_step.observation_space - else: - assert env.observation_space == env_step.observation_space + assert spaces.flatten_space(env_step.observation_space) == spaces.flatten_space(env.observation_space) env.reset(seed=SEED) @@ -177,7 +178,7 @@ def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_s }, {'basis_generator_type': basis_generator_type, }, - seed=SEED) + seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -209,7 +210,7 @@ def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_se }, {'basis_generator_type': basis_generator_type, }, - seed=SEED) + seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -242,7 +243,7 @@ def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_ }, {'basis_generator_type': basis_generator_type, }, - seed=SEED) + seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -297,7 +298,7 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i }, {'basis_generator_type': basis_generator_type, }, - seed=SEED) + seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -346,7 +347,7 @@ def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_ }, {'basis_generator_type': basis_generator_type, }, - seed=SEED) + seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) _ = env.reset(seed=SEED) for i in range(max_planning_times): action = env.action_space.sample() From f8ad65b790c5a1cdc6e485807d3780a0be670a23 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 14:25:59 +0200 Subject: [PATCH 055/198] Remove old file --- fancy_gym/utils/time_aware_observation.py | 79 ----------------------- 1 file changed, 79 deletions(-) delete mode 100644 fancy_gym/utils/time_aware_observation.py diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py deleted file mode 100644 index 61290dd..0000000 --- a/fancy_gym/utils/time_aware_observation.py +++ /dev/null @@ -1,79 +0,0 @@ -from gymnasium.spaces import Box, Dict -from gym.spaces import Box as OldBox -import gymnasium as gym -import numpy as np -import copy - - -class TimeAwareObservation(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): - """Augment the observation with the current time step in the episode. - - The observation space of the wrapped environment is assumed to be a flat :class:`Box` or flattable :class:`Dict`. - In particular, pixel observations are not supported. This wrapper will append the current progress within the current episode to the observation. - The progress will be indicated as a number between 0 and 1. - """ - - def __init__(self, env: gym.Env, enforce_dtype_float32=False): - """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box` or flattable :class:`Dict` observation space. - - Args: - env: The environment to apply the wrapper - """ - gym.utils.RecordConstructorArgs.__init__(self) - gym.ObservationWrapper.__init__(self, env) - allowed_classes = [Box, OldBox, Dict] - if enforce_dtype_float32: - assert env.observation_space.dtype == np.float32, 'TimeAwareObservation was given an environment with a dtype!=np.float32 ('+str( - env.observation_space.dtype)+'). This requirement can be removed by setting enforce_dtype_float32=False.' - assert env.observation_space.__class__ in allowed_classes, str(env.observation_space)+' is not supported. Only Box or Dict' - - if env.observation_space.__class__ in [Box, OldBox]: - dtype = env.observation_space.dtype - - low = np.append(env.observation_space.low, 0.0) - high = np.append(env.observation_space.high, 1.0) - - self.observation_space = Box(low, high, dtype=dtype) - else: - spaces = copy.copy(env.observation_space.spaces) - dtype = np.float64 - spaces['time_awareness'] = Box(0, 1, dtype=dtype) - - self.observation_space = Dict(spaces) - - self.is_vector_env = getattr(env, "is_vector_env", False) - - def observation(self, observation): - """Adds to the observation with the current time step. - - Args: - observation: The observation to add the time step to - - Returns: - The observation with the time step appended to (relative to total number of steps) - """ - return np.append(observation, self.t / self.env.spec.max_episode_steps) - - def step(self, action): - """Steps through the environment, incrementing the time step. - - Args: - action: The action to take - - Returns: - The environment's step using the action. - """ - self.t += 1 - return super().step(action) - - def reset(self, **kwargs): - """Reset the environment setting the time to zero. - - Args: - **kwargs: Kwargs to apply to env.reset() - - Returns: - The reset environment - """ - self.t = 0 - return super().reset(**kwargs) From b6089c4b83f9f565a55b3207a313e92aec38c9d7 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 15:52:17 +0200 Subject: [PATCH 056/198] ugly_hack_to_mitigate_metaworld_bug --- test/test_black_box.py | 2 +- test/test_replanning_sequencing.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 1492958..53b4434 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -17,7 +17,7 @@ WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) -MAX_STEPS_FALLBACK = 500 +MAX_STEPS_FALLBACK = 100 class Object(object): diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index f219bbb..49c0218 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -20,7 +20,7 @@ WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) -MAX_STEPS_FALLBACK = 100 +MAX_STEPS_FALLBACK = 50 class ToyEnv(gym.Env): @@ -155,10 +155,21 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra print(done, (i + 1), episode_steps) assert (i + 1) % episode_steps == 0 env.reset(seed=SEED) + ugly_hack_to_mitigate_metaworld_bug(env) assert replanning_schedule(None, None, None, None, length) +def ugly_hack_to_mitigate_metaworld_bug(env): + head = env + try: + for i in range(16): + head.curr_path_length = 0 + head = head.env + except: + pass + + @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) @pytest.mark.parametrize('sub_segment_steps', [5, 10]) From 7354257f8e45e73b003b9ad5a907e01ad6478b60 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 18 Jun 2023 17:47:54 +0200 Subject: [PATCH 057/198] Bug mitigation for metaworld refactored and extended --- test/test_black_box.py | 3 +++ test/test_replanning_sequencing.py | 13 ++----------- test/utils.py | 10 ++++++++++ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 53b4434..3f87375 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -10,6 +10,7 @@ from gymnasium.core import ActType, ObsType import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.utils.wrappers import TimeAwareObservation +from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] @@ -128,6 +129,7 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): for i in range(5): env.reset(seed=SEED) + ugly_hack_to_mitigate_metaworld_bug(env) # TODO: Remove, when metaworld fixed it upstream _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) length = info['trajectory_length'] @@ -330,6 +332,7 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): for i in range(5): if done: env.reset(seed=SEED) + ugly_hack_to_mitigate_metaworld_bug(env) action = env.action_space.sample() action[0] = tau action[1] = delay diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 49c0218..e38fbd5 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -13,6 +13,7 @@ import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.utils.wrappers import TimeAwareObservation from fancy_gym.utils.make_env_helpers import ensure_finite_time +from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] @@ -155,21 +156,11 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra print(done, (i + 1), episode_steps) assert (i + 1) % episode_steps == 0 env.reset(seed=SEED) - ugly_hack_to_mitigate_metaworld_bug(env) + ugly_hack_to_mitigate_metaworld_bug(env) # TODO: Remove, when metaworld fixed it upstream assert replanning_schedule(None, None, None, None, length) -def ugly_hack_to_mitigate_metaworld_bug(env): - head = env - try: - for i in range(16): - head.curr_path_length = 0 - head = head.env - except: - pass - - @pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) @pytest.mark.parametrize('sub_segment_steps', [5, 10]) diff --git a/test/utils.py b/test/utils.py index 157f840..782b151 100644 --- a/test/utils.py +++ b/test/utils.py @@ -100,3 +100,13 @@ def verify_reward(reward): def verify_done(done): assert isinstance( done, bool), f"Returned {done} as done flag, expected bool." + + +def ugly_hack_to_mitigate_metaworld_bug(env): + head = env + try: + for i in range(16): + head.curr_path_length = 0 + head = head.env + except: + pass From 9af42112b6ea1dfff686e0f5237c68c91e3b1d10 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 24 Jun 2023 10:37:35 +0200 Subject: [PATCH 058/198] Version bump (made very breaking changes) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 92e0d72..a38fb77 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]: setup( author='Fabian Otto, Onur Celik', name='fancy_gym', - version='0.3', + version='0.4', classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Science/Research', From 0fe56c4c0e6788c7c830a63462944cfe77d0ca57 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 24 Jun 2023 11:01:07 +0200 Subject: [PATCH 059/198] Stricter dependency versions --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index a38fb77..aa5319f 100644 --- a/setup.py +++ b/setup.py @@ -6,12 +6,12 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { - 'dmc': ['dm_control>=1.0.1', 'shimmy[dm-control]'], + 'dmc': ['dm_control>=1.0.1', 'shimmy[dm-control]', 'Shimmy==1.0.0'], 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@43abf981b97c01669af898833a740fb63605b8ac#egg=metaworld', - 'mujoco-py<2.2,>=2.1', + 'mujoco-py<2.2,>=2.1', 'gym>=0.15.4' ], 'box2d': ['gymnasium[box2d]>=0.26.0'], - 'mujoco': ['gymnasium[mujoco]>0.26.0'], + 'mujoco': ['mujoco==2.3.5', 'gymnasium[mujoco]>0.26.0'], } # All dependencies From 55c6cff6902d04a583d64db70230b05e0cd50b4c Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 24 Jun 2023 11:03:40 +0200 Subject: [PATCH 060/198] Fix: PyPi had troubles finding a compatible version for dm-control --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index aa5319f..ccc0643 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { - 'dmc': ['dm_control>=1.0.1', 'shimmy[dm-control]', 'Shimmy==1.0.0'], + 'dmc': ['dm-control==1.0.12', 'shimmy[dm-control]', 'Shimmy==1.0.0'], 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@43abf981b97c01669af898833a740fb63605b8ac#egg=metaworld', 'mujoco-py<2.2,>=2.1', 'gym>=0.15.4' ], From bc0dcb76422178e7ec19bfcc101f29b927dff72f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 24 Jun 2023 11:43:40 +0200 Subject: [PATCH 061/198] Fixed: Tried importing old dependency --- fancy_gym/utils/make_env_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 848c083..c8d195b 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -14,7 +14,7 @@ from fancy_gym.utils.env_compatibility import EnvCompatibility from fancy_gym.utils.wrappers import FlattenObservation try: - from dm_control import suite, manipulation + import shimmy from shimmy.dm_control_compatibility import EnvType except ImportError: pass From e8119798c34d7fb9df1d538523eac232ac241df7 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 24 Jun 2023 11:52:59 +0200 Subject: [PATCH 062/198] Updating some dependencies --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index ccc0643..e56310a 100644 --- a/setup.py +++ b/setup.py @@ -6,12 +6,12 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { - 'dmc': ['dm-control==1.0.12', 'shimmy[dm-control]', 'Shimmy==1.0.0'], + 'dmc': ['dm-control==1.0.13', 'shimmy[dm-control]', 'Shimmy==1.0.0'], 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@43abf981b97c01669af898833a740fb63605b8ac#egg=metaworld', 'mujoco-py<2.2,>=2.1', 'gym>=0.15.4' ], 'box2d': ['gymnasium[box2d]>=0.26.0'], - 'mujoco': ['mujoco==2.3.5', 'gymnasium[mujoco]>0.26.0'], + 'mujoco': ['mujoco==2.3.6', 'gymnasium[mujoco]>0.26.0'], } # All dependencies From ad7ddd3b28ceff2698f94e9dac548bea81bc49ce Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 24 Jun 2023 12:24:52 +0200 Subject: [PATCH 063/198] Fix: A missing ',' caused very weird bugs... --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e56310a..2e96d80 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ setup( ], extras_require=extras, install_requires=[ - 'gymnasium>=0.26.0' + 'gymnasium>=0.26.0', 'mp_pytorch<=0.1.3' ], packages=[package for package in find_packages( From 55f84da5e1c6289e5b03611634b6c8a2bbbf631d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 27 Jun 2023 21:39:03 +0200 Subject: [PATCH 064/198] A fancy icon for fancy gym --- README.md | 24 +++++++------ icon.svg | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 10 deletions(-) create mode 100644 icon.svg diff --git a/README.md b/README.md index 4db4f69..6988f38 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,17 @@ -# Fancy Gym +

+
+ +

+ Fancy Gym +
+
+

-`fancy_gym` offers a large variety of reinforcement learning environments under the unifying interface -of [OpenAI gym](https://gymlibrary.dev/). We provide support (under the OpenAI gym interface) for the benchmark suites -[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) -(DMC) and [Metaworld](https://meta-world.github.io/). If those are not sufficient and you want to create your own custom -gym environments, use [this guide](https://www.gymlibrary.dev/content/environment_creation/). We highly appreciate it, if -you would then submit a PR for this environment to become part of `fancy_gym`. -In comparison to existing libraries, we additionally support to control agents with movement primitives, such as Dynamic -Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP). +`fancy_gym` offers a large variety of reinforcement learning environments under the unifying interface of [Gymnasium](https://gymnasium.farama.org/). + +We provide support (under the Gymnasium interface) for the benchmark suites [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) (DMC) and [Metaworld](https://meta-world.github.io/). If those are not sufficient and you want to create your own custom gym environments, use [this guide] https://www.gymlibrary.dev/content/environment_creation/). We highly appreciate it, if you would then submit a PR for this environment to become part of `fancy_gym`. + +In comparison to existing libraries, we additionally support to control agents with movement primitives, such as Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP). ## Movement Primitive Environments (Episode-Based/Black-Box Environments) @@ -73,7 +77,7 @@ for i in range(1000): if done: obs = env.reset() -``` +``` When using `dm_control` tasks we expect the `env_id` to be specified as `dmc:domain_name-task_name` or for manipulation tasks as `dmc:manipulation-environment_name`. For `metaworld` tasks, we require the structure `metaworld:env_id-v2`, our diff --git a/icon.svg b/icon.svg new file mode 100644 index 0000000..64ec435 --- /dev/null +++ b/icon.svg @@ -0,0 +1,101 @@ + + + + From 1601a87cb1422a0e57b76c062a0dfae115dd373c Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 27 Jun 2023 21:43:22 +0200 Subject: [PATCH 065/198] Added attribution for the icon. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 6988f38..8cc343f 100644 --- a/README.md +++ b/README.md @@ -232,3 +232,6 @@ for i in range(5): rewards = 0 obs = env.reset() ``` + +### Icon Attribution +The icon is based on the [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) icon as can be found [here](https://gymnasium.farama.org/_static/img/gymnasium_black.svg). \ No newline at end of file From de118a31136ee2477a3d9e146d51ccd4b812c180 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 28 Jun 2023 19:42:41 +0200 Subject: [PATCH 066/198] Down to 20 failing tests (more mitigations of metaworld bug) --- test/test_replanning_sequencing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index e38fbd5..a8eb4b5 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -93,6 +93,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter for i in range(25): if done: env.reset(seed=SEED) + ugly_hack_to_mitigate_metaworld_bug(env) # TODO: Remove, when metaworld fixed it upstream action = env.action_space.sample() _obs, _reward, terminated, truncated, info = env.step(action) done = terminated or truncated From eb1d145dbd1097a524bc0bf467b41ae26ec874a8 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 28 Jun 2023 20:25:50 +0200 Subject: [PATCH 067/198] Fix Test: Delay was not supplied to mp-generator --- test/test_black_box.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 3f87375..139b1c2 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -286,7 +286,7 @@ def test_learn_delay(mp_type: str, delay: float): action = env.action_space.sample() action[0] = delay - _obs, _reward, terminated, truncated, info = env.step(env.action_space.sample()) + _obs, _reward, terminated, truncated, info = env.step(action) done = terminated or truncated length = info['trajectory_length'] From bf1eb496f6a92e86f8414fc77b2365f7ace480c7 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 28 Jun 2023 22:20:49 +0200 Subject: [PATCH 068/198] Make Icon Attribution h2 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8cc343f..c99d32b 100644 --- a/README.md +++ b/README.md @@ -233,5 +233,5 @@ for i in range(5): obs = env.reset() ``` -### Icon Attribution -The icon is based on the [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) icon as can be found [here](https://gymnasium.farama.org/_static/img/gymnasium_black.svg). \ No newline at end of file +## Icon Attribution +The icon is based on the [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) icon as can be found [here](https://gymnasium.farama.org/_static/img/gymnasium_black.svg). From cdabd3f478a599a64ae8476277a4df764d2d3287 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 7 Jul 2023 10:48:17 +0200 Subject: [PATCH 069/198] Downgrading mujoco 2.3.6 -> 2.3.3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2e96d80..c0766b7 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ extras = { 'mujoco-py<2.2,>=2.1', 'gym>=0.15.4' ], 'box2d': ['gymnasium[box2d]>=0.26.0'], - 'mujoco': ['mujoco==2.3.6', 'gymnasium[mujoco]>0.26.0'], + 'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'], } # All dependencies From 14a95eb5ca6cb4d8870a6448d04b8881e5efe256 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 7 Jul 2023 11:51:06 +0200 Subject: [PATCH 070/198] Fixed Typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c99d32b..24108f2 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ `fancy_gym` offers a large variety of reinforcement learning environments under the unifying interface of [Gymnasium](https://gymnasium.farama.org/). -We provide support (under the Gymnasium interface) for the benchmark suites [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) (DMC) and [Metaworld](https://meta-world.github.io/). If those are not sufficient and you want to create your own custom gym environments, use [this guide] https://www.gymlibrary.dev/content/environment_creation/). We highly appreciate it, if you would then submit a PR for this environment to become part of `fancy_gym`. +We provide support (under the Gymnasium interface) for the benchmark suites [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) (DMC) and [Metaworld](https://meta-world.github.io/). If those are not sufficient and you want to create your own custom gym environments, use [this guide](https://www.gymlibrary.dev/content/environment_creation/). We highly appreciate it, if you would then submit a PR for this environment to become part of `fancy_gym`. In comparison to existing libraries, we additionally support to control agents with movement primitives, such as Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP). From 27f8335a0d02c82f0deb63dfbdfe4dc31a635e60 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 7 Jul 2023 13:11:52 +0200 Subject: [PATCH 071/198] Remove dependence of wrapper on old gym --- fancy_gym/utils/wrappers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fancy_gym/utils/wrappers.py b/fancy_gym/utils/wrappers.py index 03542c7..7526269 100644 --- a/fancy_gym/utils/wrappers.py +++ b/fancy_gym/utils/wrappers.py @@ -1,5 +1,8 @@ from gymnasium.spaces import Box, Dict, flatten, flatten_space -from gym.spaces import Box as OldBox +try: + from gym.spaces import Box as OldBox +except ImportError: + OldBox = None import gymnasium as gym import numpy as np import copy From 67a5d197d9b1d7d39bbdc539d34de39e6afa56bb Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 7 Jul 2023 18:16:13 +0200 Subject: [PATCH 072/198] Weaker version requirement for dm-control --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c0766b7..0f8fd5d 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { - 'dmc': ['dm-control==1.0.13', 'shimmy[dm-control]', 'Shimmy==1.0.0'], + 'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'], 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@43abf981b97c01669af898833a740fb63605b8ac#egg=metaworld', 'mujoco-py<2.2,>=2.1', 'gym>=0.15.4' ], From ffbada2311403814297e656729124cbf760481d1 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 14 Jul 2023 14:28:31 +0200 Subject: [PATCH 073/198] Started implementing new fancy registry --- fancy_gym/envs/registry.py | 138 +++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 fancy_gym/envs/registry.py diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py new file mode 100644 index 0000000..e57e447 --- /dev/null +++ b/fancy_gym/envs/registry.py @@ -0,0 +1,138 @@ +from fancy_gym.utils.make_env_helpers import make_bb +from fancy_gym.utils.utils import nested_update + +from gymnasium import register as gym_register +from gymnasium import gym_make + +import copy + +_BB_DEFAULTS = { + 'ProMP': { + "wrappers": [], + "trajectory_generator_kwargs": { + 'trajectory_generator_type': 'promp' + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear' + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": 1.0, + "d_gains": 0.1, + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1, + 'basis_bandwidth_factor': 3.0, + }, + "black_box_kwargs": { + } + }, + 'DMP': { + "wrappers": [], + "trajectory_generator_kwargs": { + 'trajectory_generator_type': 'dmp' + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'exp' + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": 1.0, + "d_gains": 0.1, + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'rbf', + 'num_basis': 5 + }, + "black_box_kwargs": { + } + }, + 'ProDMP': { + "wrappers": [], + "trajectory_generator_kwargs": { + 'trajectory_generator_type': 'prodmp', + 'duration': 2.0, + 'weights_scale': 1.0, + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'exp', + 'tau': 1.5, + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": 1.0, + "d_gains": 0.1, + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'prodmp', + 'alpha': 10, + 'num_basis': 5, + }, + "black_box_kwargs": { + } + } +} + +KNOWN_MPS = list(_BB_DEFAULTS.keys()) +ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in KNOWN_MPS} + + +def register( + id, + entry_point, + register_step_based=True, # TODO: Detect + add_mp_types=KNOWN_MPS, + override_mp_config={}, + **kwargs +): + if register_step_based: + gym_register(id=id, entry_point=entry_point, **kwargs) + register_mps(id, override_mp_config, add_mp_types) + + +def register_mps(id, add_mp_types=KNOWN_MPS): + for mp_type in add_mp_types: + register_mp(id, mp_type) + + +def register_mp(id, mp_type): + assert mp_type in KNOWN_MPS, 'Unknown mp_type' + assert id not in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' + parts = id.split('-') + assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' + fancy_id = '-'.join(parts[:-1]+[mp_type, parts[-1]]) + register( + id=fancy_id, + entry_point=bb_env_constructor, + kwargs={ + 'underlying_id': id, + 'mp_type': mp_type + } + ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) + + +def bb_env_constructor(underlying_id, mp_type, step_based_kwargs={}, mp_config_override={}): + underlying_env = gym_make(underlying_id, **step_based_kwargs) + env_metadata = underlying_env.metadata + + config = copy.deepcopy(_BB_DEFAULTS[mp_type]) + metadata_config = env_metadata.get('mp_config', {}) + nested_update(config, metadata_config) + nested_update(config, mp_config_override) + + wrappers = config.pop("wrappers") + + traj_gen_kwargs = config.pop("trajectory_generator_kwargs", {}) + black_box_kwargs = config.pop('black_box_kwargs', {}) + contr_kwargs = config.pop("controller_kwargs", {}) + phase_kwargs = config.pop("phase_generator_kwargs", {}) + basis_kwargs = config.pop("basis_generator_kwargs", {}) + + return make_bb(underlying_env, wrappers=wrappers, + black_box_kwargs=black_box_kwargs, + traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs, + phase_kwargs=phase_kwargs, + basis_kwargs=basis_kwargs, **config) From 6c90f8ade24612eca25bb1c51dffb26e9fcba02b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 14 Jul 2023 14:29:08 +0200 Subject: [PATCH 074/198] Getting rid of some old code --- fancy_gym/utils/make_env_helpers.py | 251 +++------------------------- 1 file changed, 20 insertions(+), 231 deletions(-) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index c8d195b..4dc1f6b 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -1,11 +1,17 @@ -import logging +from fancy_gym.utils.wrappers import TimeAwareObservation +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper +from fancy_gym.black_box.factory.trajectory_generator_factory import get_trajectory_generator +from fancy_gym.black_box.factory.phase_generator_factory import get_phase_generator +from fancy_gym.black_box.factory.controller_factory import get_controller +from fancy_gym.black_box.factory.basis_generator_factory import get_basis_generator +from fancy_gym.black_box.black_box_wrapper import BlackBoxWrapper import uuid from collections.abc import MutableMapping -from copy import deepcopy from math import ceil from typing import Iterable, Type, Union, Optional import gymnasium as gym +from gymnasium import make import numpy as np from gymnasium.envs.registration import register, registry from gymnasium.wrappers import TimeLimit @@ -25,128 +31,37 @@ except Exception: # catch Exception as Import error does not catch missing mujoco-py pass -import fancy_gym -from fancy_gym.black_box.black_box_wrapper import BlackBoxWrapper -from fancy_gym.black_box.factory.basis_generator_factory import get_basis_generator -from fancy_gym.black_box.factory.controller_factory import get_controller -from fancy_gym.black_box.factory.phase_generator_factory import get_phase_generator -from fancy_gym.black_box.factory.trajectory_generator_factory import get_trajectory_generator -from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper -from fancy_gym.utils.wrappers import TimeAwareObservation -from fancy_gym.utils.utils import nested_update - -def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): - """ - TODO: Do we need this? - Generate a callable to create a new gym environment with a given seed. - The rank is added to the seed and can be used for example when using vector environments. - E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments - with seeds 123 through 130. - Hence, testing environments should be seeded with a value which is offset by the number of training environments. - Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns - - Args: - env_id: name of the environment - seed: seed for deterministic behaviour - rank: environment rank for deterministic over multiple seeds behaviour - return_callable: If True returns a callable to create the environment instead of the environment itself. - - Returns: - - """ - - def f(): - return make(env_id, seed + rank, **kwargs) - - return f if return_callable else f() - - -def make(env_id: str, seed: int, **kwargs): - """ - Converts an env_id to an environment with the gym API. - This also works for DeepMind Control Suite environments that are wrapped using the DMCWrapper, they can be - specified with "dmc:domain_name-task_name" - Analogously, metaworld tasks can be created as "metaworld:env_id-v2". - - Args: - env_id: spec or env_id for gym tasks, external environments require a domain specification - **kwargs: Additional kwargs for the constructor such as pixel observations, etc. - - Returns: Gym environment - - """ - - if ':' in env_id: - split_id = env_id.split(':') - framework, framework_env_id = split_id[-2:] - else: - framework = None - - if framework == 'metaworld': - # MetaWorld environment - env = make_metaworld(framework_env_id, seed, **kwargs) - elif framework == 'dmc': - # DeepMind Control environment - # ensures legacy compatability: - # shimmy expects dm_controll/..., while we used dmc:... in the past - env = make_gym('dm_control/'+framework_env_id, seed, **kwargs) - else: - env = make_gym(env_id, seed, **kwargs) - - if not env.spec.max_episode_steps == None: - # Hack: Some envs violate the gym spec in that they don't correctly expose the maximum episode steps - # Gymnasium disallows accessing private attributes, so we have to get creative to read the internal values - # TODO: Remove this, when all supported envs correctly implement this themselves - unwrapped = env.unwrapped if hasattr(env, 'unwrapped') else env - if hasattr(unwrapped, '_max_episode_steps'): - env.spec.max_episode_steps = unwrapped.__getattribute__('_max_episode_steps') - - # try: - env.reset(seed=seed) - # except TypeError: - # # Support for older gym envs that do not have seeding - # # env.seed(seed) - # np_random, _ = seeding.np_random(seed) - # env.np_random = np_random - env.action_space.seed(seed) - env.observation_space.seed(seed) - - return env - - -def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, fallback_max_steps=None, **kwargs): +def _make_wrapped_env(env: gym.Env, wrappers: Iterable[Type[gym.Wrapper]], seed=1, fallback_max_steps=None, **kwargs): """ Helper function for creating a wrapped gym environment using MPs. It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is provided to expose the interface for MPs. Args: - env_id: name of the environment + env: base environemnt to wrap wrappers: list of wrappers (at least an RawInterfaceWrapper), seed: seed of environment Returns: gym environment with all specified wrappers applied """ - # _env = gym.make(env_id) - _env = make(env_id, seed, **kwargs) if fallback_max_steps: - _env = ensure_finite_time(_env, fallback_max_steps) + env = ensure_finite_time(env, fallback_max_steps) has_black_box_wrapper = False for w in wrappers: # only wrap the environment if not BlackBoxWrapper, e.g. for vision if issubclass(w, RawInterfaceWrapper): has_black_box_wrapper = True - _env = w(_env) + env = w(env) if not has_black_box_wrapper: raise ValueError("A RawInterfaceWrapper is required in order to leverage movement primitive environments.") - return _env + return env def make_bb( - env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping, - controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed: int = 1, + env: Union[gym.Env, str], wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping, + controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, fallback_max_steps: int = None, **kwargs): """ This can also be used standalone for manually building a custom DMP environment. @@ -155,7 +70,7 @@ def make_bb( basis_kwargs: kwargs for the basis generator phase_kwargs: kwargs for the phase generator controller_kwargs: kwargs for the tracking controller - env_id: base_env_name, + env: step based environment (or environment id), wrappers: list of wrappers (at least an RawInterfaceWrapper), seed: seed of environment traj_gen_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP @@ -175,7 +90,10 @@ def make_bb( # Add as first wrapper in order to alter observation wrappers.insert(0, TimeAwareObservation) - env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, fallback_max_steps=fallback_max_steps, **kwargs) + if isinstance(env, str): + env = make(env) + + env = _make_wrapped_env(env=env, wrappers=wrappers, fallback_max_steps=fallback_max_steps, **kwargs) # BB expects a spaces.Box to be exposed, need to convert for dict-observations if type(env.observation_space) == gym.spaces.dict.Dict: @@ -235,104 +153,6 @@ def get_env_duration(env: gym.Env): return duration -def make_bb_env_helper(**kwargs): - """ - Helper function for registering a black box gym environment. - Args: - **kwargs: expects at least the following: - { - "name": base environment name. - "wrappers": list of wrappers (at least an BlackBoxWrapper is required), - "traj_gen_kwargs": { - "trajectory_generator_type": type_of_your_movement_primitive, - non default arguments for the movement primitive instance - ... - } - "controller_kwargs": { - "controller_type": type_of_your_controller, - non default arguments for the tracking_controller instance - ... - }, - "basis_generator_kwargs": { - "basis_generator_type": type_of_your_basis_generator, - non default arguments for the basis generator instance - ... - }, - "phase_generator_kwargs": { - "phase_generator_type": type_of_your_phase_generator, - non default arguments for the phase generator instance - ... - }, - } - - Returns: MP wrapped gym env - - """ - seed = kwargs.pop("seed", None) - wrappers = kwargs.pop("wrappers") - - traj_gen_kwargs = kwargs.pop("trajectory_generator_kwargs", {}) - black_box_kwargs = kwargs.pop('black_box_kwargs', {}) - contr_kwargs = kwargs.pop("controller_kwargs", {}) - phase_kwargs = kwargs.pop("phase_generator_kwargs", {}) - basis_kwargs = kwargs.pop("basis_generator_kwargs", {}) - - return make_bb(env_id=kwargs.pop("name"), wrappers=wrappers, - black_box_kwargs=black_box_kwargs, - traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs, - phase_kwargs=phase_kwargs, - basis_kwargs=basis_kwargs, **kwargs, seed=seed) - - -# Deprecated: With shimmy gym now has native support for deepmind envs -# def make_dmc( -# env_id: str, -# seed: int = None, -# visualize_reward: bool = True, -# time_limit: Union[None, float] = None, -# **kwargs -# ): -# if not re.match(r"\w+-\w+", env_id): -# raise ValueError("env_id does not have the following structure: 'domain_name-task_name'") -# domain_name, task_name = env_id.split("-") -# -# if task_name.endswith("_vision"): -# # TODO -# raise ValueError("The vision interface for manipulation tasks is currently not supported.") -# -# if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL: -# raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.') -# -# # env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1' -# gym_id = uuid.uuid4().hex + '-v1' -# -# task_kwargs = {'random': seed} -# if time_limit is not None: -# task_kwargs['time_limit'] = time_limit -# -# # create task -# # Accessing private attribute because DMC does not expose time_limit or step_limit. -# # Only the current time_step/time as well as the control_timestep can be accessed. -# if domain_name == "manipulation": -# env = manipulation.load(environment_name=task_name, seed=seed) -# max_episode_steps = ceil(env._time_limit / env.control_timestep()) -# else: -# env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs, -# visualize_reward=visualize_reward, environment_kwargs=kwargs) -# max_episode_steps = int(env._step_limit) -# -# register( -# id=gym_id, -# entry_point='fancy_gym.dmc.dmc_wrapper:DMCWrapper', -# kwargs={'env': lambda: env}, -# max_episode_steps=max_episode_steps, -# ) -# -# env = gym.make(gym_id) -# env.seed(seed) -# return env - - def make_metaworld(env_id: str, seed: int, render_mode: Optional[str] = None, **kwargs): if env_id not in metaworld.ML1.ENV_NAMES: raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.') @@ -362,37 +182,6 @@ def make_metaworld(env_id: str, seed: int, render_mode: Optional[str] = None, ** return env -def make_gym(env_id, seed, **kwargs): - """ - Create - Args: - env_id: - seed: - **kwargs: - - Returns: - - """ - # Getting the existing keywords to allow for nested dict updates for BB envs - # gym only allows for non nested updates. - try: - all_kwargs = deepcopy(registry.get(env_id).kwargs) - except AttributeError as e: - logging.error(f'The gym environment with id {env_id} could not been found.') - raise e - nested_update(all_kwargs, kwargs) - kwargs = all_kwargs - - # Add seed to kwargs for bb environments to pass seed to step environments - all_bb_envs = sum(fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values(), []) - if env_id in all_bb_envs: - kwargs.update({"seed": seed}) - - # Gym - env = gym.make(env_id, **kwargs) - return env - - def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]): """ When using DMC check if a manually specified time limit matches the trajectory duration the MP receives. From f375a6e4df20996af3cd88345c5ba607a6bc8253 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 14 Jul 2023 14:31:36 +0200 Subject: [PATCH 075/198] Ported classic_control envs to fancy registry --- fancy_gym/envs/__init__.py | 295 ++++-------------- .../hole_reacher/hole_reacher.py | 32 +- .../simple_reacher/simple_reacher.py | 28 +- .../viapoint_reacher/viapoint_reacher.py | 27 +- 4 files changed, 152 insertions(+), 230 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 65a82dc..2de5d10 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -1,7 +1,8 @@ from copy import deepcopy import numpy as np -from gymnasium import register +from gymnasium import register as gym_register +from .registry import register, ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS from . import classic_control, mujoco from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv @@ -17,84 +18,12 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ - BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING + BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \ - MAX_EPISODE_STEPS_TABLE_TENNIS - -ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} - -DEFAULT_BB_DICT_ProMP = { - "name": 'EnvName', - "wrappers": [], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'promp' - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear' - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1, - 'basis_bandwidth_factor': 3.0, - }, - "black_box_kwargs": { - } -} - -DEFAULT_BB_DICT_DMP = { - "name": 'EnvName', - "wrappers": [], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'dmp' - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'exp' - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'rbf', - 'num_basis': 5 - } -} - -DEFAULT_BB_DICT_ProDMP = { - "name": 'EnvName', - "wrappers": [], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'prodmp', - 'duration': 2.0, - 'weights_scale': 1.0, - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'exp', - 'tau': 1.5, - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'prodmp', - 'alpha': 10, - 'num_basis': 5, - }, - "black_box_kwargs": { - } -} + MAX_EPISODE_STEPS_TABLE_TENNIS # Classic Control -## Simple Reacher +# Simple Reacher register( id='SimpleReacher-v0', entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv', @@ -113,8 +42,7 @@ register( } ) -## Viapoint Reacher - +# Viapoint Reacher register( id='ViaPointReacher-v0', entry_point='fancy_gym.envs.classic_control:ViaPointReacherEnv', @@ -126,7 +54,7 @@ register( } ) -## Hole Reacher +# Hole Reacher register( id='HoleReacher-v0', entry_point='fancy_gym.envs.classic_control:HoleReacherEnv', @@ -145,9 +73,9 @@ register( # Mujoco -## Mujoco Reacher +# Mujoco Reacher for _dims in [5, 7]: - register( + gym_register( id=f'Reacher{_dims}d-v0', entry_point='fancy_gym.envs.mujoco:ReacherEnv', max_episode_steps=MAX_EPISODE_STEPS_REACHER, @@ -156,7 +84,7 @@ for _dims in [5, 7]: } ) - register( + gym_register( id=f'Reacher{_dims}dSparse-v0', entry_point='fancy_gym.envs.mujoco:ReacherEnv', max_episode_steps=MAX_EPISODE_STEPS_REACHER, @@ -167,7 +95,7 @@ for _dims in [5, 7]: } ) -register( +gym_register( id='HopperJumpSparse-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, @@ -176,7 +104,7 @@ register( } ) -register( +gym_register( id='HopperJump-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, @@ -188,43 +116,43 @@ register( } ) -register( +gym_register( id='AntJump-v0', entry_point='fancy_gym.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, ) -register( +gym_register( id='HalfCheetahJump-v0', entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, ) -register( +gym_register( id='HopperJumpOnBox-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, ) -register( +gym_register( id='HopperThrow-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, ) -register( +gym_register( id='HopperThrowInBasket-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, ) -register( +gym_register( id='Walker2DJump-v0', entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, ) -register( +gym_register( id='BeerPong-v0', entry_point='fancy_gym.envs.mujoco:BeerPongEnv', max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, @@ -232,7 +160,7 @@ register( # Box pushing environments with different rewards for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: - register( + gym_register( id='BoxPushing{}-v0'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, @@ -240,7 +168,7 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: # Here we use the same reward as in BeerPong-v0, but now consider after the release, # only one time step, i.e. we simulate until the end of th episode -register( +gym_register( id='BeerPongStepBased-v0', entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', max_episode_steps=FIXED_RELEASE_STEP, @@ -248,7 +176,7 @@ register( # Table Tennis environments for ctxt_dim in [2, 4]: - register( + gym_register( id='TableTennis{}D-v0'.format(ctxt_dim), entry_point='fancy_gym.envs.mujoco:TableTennisEnv', max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, @@ -258,13 +186,13 @@ for ctxt_dim in [2, 4]: } ) -register( +gym_register( id='TableTennisWind-v0', entry_point='fancy_gym.envs.mujoco:TableTennisWind', max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, ) -register( +gym_register( id='TableTennisGoalSwitching-v0', entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, @@ -276,98 +204,13 @@ register( # movement Primitive Environments -## Simple Reacher -_versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}DMP-{_name[1]}' - kwargs_dict_simple_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_simple_reacher_dmp['wrappers'].append(classic_control.simple_reacher.MPWrapper) - kwargs_dict_simple_reacher_dmp['controller_kwargs']['p_gains'] = 0.6 - kwargs_dict_simple_reacher_dmp['controller_kwargs']['d_gains'] = 0.075 - kwargs_dict_simple_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50 - kwargs_dict_simple_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2 - kwargs_dict_simple_reacher_dmp['name'] = f"{_v}" - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_simple_reacher_dmp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) +# Simple Reacher [DONE] - _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper) - kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6 - kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075 - kwargs_dict_simple_reacher_promp['name'] = _v - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_simple_reacher_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# Viapoint reacher [DONE] -# Viapoint reacher -kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_via_point_reacher_dmp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) -kwargs_dict_via_point_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity' -kwargs_dict_via_point_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50 -kwargs_dict_via_point_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0" -register( - id='ViaPointReacherDMP-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, - kwargs=kwargs_dict_via_point_reacher_dmp -) -ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") +# Hole Reacher [DONE] -kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) -kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) -kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' -kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0" -register( - id="ViaPointReacherProMP-v0", - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_via_point_reacher_promp -) -ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") - -## Hole Reacher -_versions = ["HoleReacher-v0"] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}DMP-{_name[1]}' - kwargs_dict_hole_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_hole_reacher_dmp['wrappers'].append(classic_control.hole_reacher.MPWrapper) - kwargs_dict_hole_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity' - # TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check - kwargs_dict_hole_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 - kwargs_dict_hole_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2.5 - kwargs_dict_hole_reacher_dmp['name'] = _v - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, - kwargs=kwargs_dict_hole_reacher_dmp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - - _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper) - kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2 - kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' - kwargs_dict_hole_reacher_promp['name'] = f"{_v}" - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_hole_reacher_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## ReacherNd +# ReacherNd _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"] for _v in _versions: _name = _v.split("-") @@ -376,7 +219,7 @@ for _v in _versions: kwargs_dict_reacher_dmp['wrappers'].append(mujoco.reacher.MPWrapper) kwargs_dict_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_reacher_dmp['name'] = _v - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, @@ -388,14 +231,14 @@ for _v in _versions: kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper) kwargs_dict_reacher_promp['name'] = _v - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_reacher_promp ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## -## Beerpong ProMP +# Beerpong ProMP _versions = ['BeerPong-v0'] for _v in _versions: _name = _v.split("-") @@ -408,14 +251,14 @@ for _v in _versions: kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 kwargs_dict_bp_promp['name'] = _v - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) -### BP with Fixed release +# BP with Fixed release _versions = ["BeerPongStepBased-v0", 'BeerPong-v0'] for _v in _versions: if _v != 'BeerPong-v0': @@ -431,7 +274,7 @@ for _v in _versions: kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 kwargs_dict_bp_promp['name'] = _v - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp @@ -439,7 +282,7 @@ for _v in _versions: ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## -## Table Tennis needs to be fixed according to Zhou's implementation +# Table Tennis needs to be fixed according to Zhou's implementation # TODO: Add later when finished # ######################################################################################################################## @@ -452,7 +295,7 @@ for _v in _versions: # kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) # kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper) # kwargs_dict_ant_jump_promp['name'] = _v -# register( +# gym_register( # id=_env_id, # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_ant_jump_promp @@ -469,7 +312,7 @@ for _v in _versions: # kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) # kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper) # kwargs_dict_halfcheetah_jump_promp['name'] = _v -# register( +# gym_register( # id=_env_id, # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_halfcheetah_jump_promp @@ -479,7 +322,7 @@ for _v in _versions: # ######################################################################################################################## -## HopperJump +# HopperJump _versions = ['HopperJump-v0', 'HopperJumpSparse-v0', # 'HopperJumpOnBox-v0', 'HopperThrow-v0', 'HopperThrowInBasket-v0' ] @@ -490,7 +333,7 @@ for _v in _versions: kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper) kwargs_dict_hopper_jump_promp['name'] = _v - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hopper_jump_promp @@ -499,7 +342,7 @@ for _v in _versions: # ######################################################################################################################## -## Box Pushing +# Box Pushing _versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0'] for _v in _versions: _name = _v.split("-") @@ -509,9 +352,9 @@ for _v in _versions: kwargs_dict_box_pushing_promp['name'] = _v kwargs_dict_box_pushing_promp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) kwargs_dict_box_pushing_promp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) - kwargs_dict_box_pushing_promp['basis_generator_kwargs']['basis_bandwidth_factor'] = 2 # 3.5, 4 to try + kwargs_dict_box_pushing_promp['basis_generator_kwargs']['basis_bandwidth_factor'] = 2 # 3.5, 4 to try - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_box_pushing_promp @@ -535,16 +378,16 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t: t % 25 == 0 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desired'] = True - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_box_pushing_prodmp ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) -## Table Tennis +# Table Tennis _versions = ['TableTennis2D-v0', 'TableTennis4D-v0', 'TableTennisWind-v0', 'TableTennisGoalSwitching-v0'] for _v in _versions: _name = _v.split("-") @@ -565,7 +408,7 @@ for _v in _versions: kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 1 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_goal'] = 1 kwargs_dict_tt_promp['black_box_kwargs']['verbose'] = 2 - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_tt_promp @@ -595,7 +438,7 @@ for _v in _versions: kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_tt_prodmp @@ -624,8 +467,8 @@ for _v in _versions: kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3 - kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 50 == 0 - register( + kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t: t % 50 == 0 + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_tt_prodmp @@ -640,16 +483,16 @@ for _v in _versions: # kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) # kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper) # kwargs_dict_walker2d_jump_promp['name'] = _v -# register( +# gym_register( # id=_env_id, # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_walker2d_jump_promp # ) # ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) -### Depricated, we will not provide non random starts anymore +# Depricated, we will not provide non random starts anymore """ -register( +gym_register( id='SimpleReacher-v1', entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, @@ -659,7 +502,7 @@ register( } ) -register( +gym_register( id='LongSimpleReacher-v1', entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, @@ -668,7 +511,7 @@ register( "random_start": False } ) -register( +gym_register( id='HoleReacher-v1', entry_point='fancy_gym.envs.classic_control:HoleReacherEnv', max_episode_steps=200, @@ -683,7 +526,7 @@ register( "collision_penalty": 100, } ) -register( +gym_register( id='HoleReacher-v2', entry_point='fancy_gym.envs.classic_control:HoleReacherEnv', max_episode_steps=200, @@ -700,7 +543,7 @@ register( ) # CtxtFree are v0, Contextual are v1 -register( +gym_register( id='AntJump-v0', entry_point='fancy_gym.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, @@ -710,7 +553,7 @@ register( } ) # CtxtFree are v0, Contextual are v1 -register( +gym_register( id='HalfCheetahJump-v0', entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, @@ -719,7 +562,7 @@ register( "context": False } ) -register( +gym_register( id='HopperJump-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, @@ -732,12 +575,12 @@ register( """ -### Deprecated used for CorL paper +# Deprecated used for CorL paper """ _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] for i in _vs: _env_id = f'ALRReacher{i}-v0' - register( + gym_register( id=_env_id, entry_point='fancy_gym.envs.mujoco:ReacherEnv', max_episode_steps=200, @@ -750,7 +593,7 @@ for i in _vs: ) _env_id = f'ALRReacherSparse{i}-v0' - register( + gym_register( id=_env_id, entry_point='fancy_gym.envs.mujoco:ReacherEnv', max_episode_steps=200, @@ -764,7 +607,7 @@ for i in _vs: _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] for i in _vs: _env_id = f'ALRReacher{i}ProMP-v0' - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper', kwargs={ @@ -787,7 +630,7 @@ for i in _vs: ) _env_id = f'ALRReacherSparse{i}ProMP-v0' - register( + gym_register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper', kwargs={ @@ -809,7 +652,7 @@ for i in _vs: } ) - register( + gym_register( id='HopperJumpOnBox-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, @@ -818,7 +661,7 @@ for i in _vs: "context": False } ) - register( + gym_register( id='HopperThrow-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, @@ -827,7 +670,7 @@ for i in _vs: "context": False } ) - register( + gym_register( id='HopperThrowInBasket-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, @@ -836,7 +679,7 @@ for i in _vs: "context": False } ) - register( + gym_register( id='Walker2DJump-v0', entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, @@ -845,12 +688,12 @@ for i in _vs: "context": False } ) - register(id='TableTennis2DCtxt-v1', + gym_register(id='TableTennis2DCtxt-v1', entry_point='fancy_gym.envs.mujoco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, kwargs={'ctxt_dim': 2, 'fixed_goal': True}) - register( + gym_register( id='BeerPong-v0', entry_point='fancy_gym.envs.mujoco:BeerBongEnv', max_episode_steps=300, diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py index 0ed03f2..1fdf464 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py +++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py @@ -8,11 +8,41 @@ from gymnasium.core import ObsType from matplotlib import patches from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from . import MPWrapper MAX_EPISODE_STEPS_HOLEREACHER = 200 class HoleReacherEnv(BaseReacherDirectEnv): + + metadata = { + 'mp_config': { + 'ProMP': { + 'wrappers': [MPWrapper], + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 2, + }, + }, + 'DMP': { + 'wrappers': [MPWrapper], + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + 'trajectory_generator_kwargs': { + # TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check + 'weight_scale': 500, + }, + 'phase_generator_kwargs': { + 'alpha_phase': 2.5, + }, + }, + 'ProDMP': {}, + } + } + def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"): @@ -166,7 +196,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): # all points that are above the hole r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( - line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) + line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) # check if any of those points are below surface nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index 4ef25ea..bb72848 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -6,6 +6,7 @@ from gymnasium import spaces from gymnasium.core import ObsType from fancy_gym.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv +from . import MPWrapper class SimpleReacherEnv(BaseReacherTorqueEnv): @@ -15,6 +16,32 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): towards the end of the trajectory. """ + metadata = { + 'mp_config': { + 'ProMP': { + 'wrappers': [MPWrapper], + 'controller_kwargs': { + 'p_gains': 0.6, + 'd_gains': 0.075, + }, + }, + 'DMP': { + 'wrappers': [MPWrapper], + 'controller_kwargs': { + 'p_gains': 0.6, + 'd_gains': 0.075, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 50, + }, + 'phase_generator_kwargs': { + 'alpha_phase': 2, + }, + }, + 'ProDMP': {}, + } + } + def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True, allow_self_collision: bool = False, ): super().__init__(n_links, random_start, allow_self_collision) @@ -126,4 +153,3 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): self.fig.canvas.draw() self.fig.canvas.flush_events() - diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py index ba5efd2..d0d04fb 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -7,10 +7,35 @@ from gymnasium import spaces from gymnasium.core import ObsType from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from . import MPWrapper class ViaPointReacherEnv(BaseReacherDirectEnv): + metadata = { + 'mp_config': { + 'ProMP': { + 'wrappers': [MPWrapper], + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + }, + 'DMP': { + 'wrappers': [MPWrapper], + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 50, + }, + 'phase_generator_kwargs': { + 'alpha_phase': 2, + }, + }, + 'ProDMP': {}, + } + } + def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None, target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000): @@ -184,5 +209,3 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') plt.pause(0.01) - - From 30bafd7a4f3ef3cf47ab1ac86cea7092a41d0d15 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 14 Jul 2023 15:58:58 +0200 Subject: [PATCH 076/198] Allow skipping merge with default bb config --- fancy_gym/envs/registry.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index e57e447..e7c2f09 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -114,12 +114,15 @@ def register_mp(id, mp_type): ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) -def bb_env_constructor(underlying_id, mp_type, step_based_kwargs={}, mp_config_override={}): - underlying_env = gym_make(underlying_id, **step_based_kwargs) +def bb_env_constructor(underlying_id, mp_type, mp_config_override={}, **kwargs): + underlying_env = gym_make(underlying_id, **kwargs) env_metadata = underlying_env.metadata - config = copy.deepcopy(_BB_DEFAULTS[mp_type]) - metadata_config = env_metadata.get('mp_config', {}) + metadata_config = copy.deepcopy(env_metadata.get('mp_config', {}).get(mp_type, {})) + global_inherit_defaults = env_metadata.get('mp_config', {}).get('inherit_defaults', True) + inherit_defaults = metadata_config.pop('inherit_defaults', global_inherit_defaults) + + config = copy.deepcopy(_BB_DEFAULTS[mp_type]) if inherit_defaults else {} nested_update(config, metadata_config) nested_update(config, mp_config_override) From 9fa932d2bb112e210a05ea731fcdb3098ff8bf52 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 10:33:39 +0200 Subject: [PATCH 077/198] minor refactoring --- fancy_gym/utils/make_env_helpers.py | 37 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 4dc1f6b..ec4a0c1 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -15,6 +15,7 @@ from gymnasium import make import numpy as np from gymnasium.envs.registration import register, registry from gymnasium.wrappers import TimeLimit +from gymnasium import make as gym_make from fancy_gym.utils.env_compatibility import EnvCompatibility from fancy_gym.utils.wrappers import FlattenObservation @@ -32,7 +33,7 @@ except Exception: pass -def _make_wrapped_env(env: gym.Env, wrappers: Iterable[Type[gym.Wrapper]], seed=1, fallback_max_steps=None, **kwargs): +def _make_wrapped_env(env: gym.Env, wrappers: Iterable[Type[gym.Wrapper]], seed=1, fallback_max_steps=None): """ Helper function for creating a wrapped gym environment using MPs. It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is @@ -62,7 +63,7 @@ def _make_wrapped_env(env: gym.Env, wrappers: Iterable[Type[gym.Wrapper]], seed= def make_bb( env: Union[gym.Env, str], wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping, controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, - fallback_max_steps: int = None, **kwargs): + time_limit: int, fallback_max_steps: int = None): """ This can also be used standalone for manually building a custom DMP environment. Args: @@ -78,7 +79,7 @@ def make_bb( Returns: DMP wrapped gym env """ - _verify_time_limit(traj_gen_kwargs.get("duration"), kwargs.get("time_limit")) + _verify_time_limit(traj_gen_kwargs.get("duration"), time_limit) learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories') do_replanning = black_box_kwargs.get('replanning_schedule') @@ -93,7 +94,7 @@ def make_bb( if isinstance(env, str): env = make(env) - env = _make_wrapped_env(env=env, wrappers=wrappers, fallback_max_steps=fallback_max_steps, **kwargs) + env = _make_wrapped_env(env=env, wrappers=wrappers, fallback_max_steps=fallback_max_steps) # BB expects a spaces.Box to be exposed, need to convert for dict-observations if type(env.observation_space) == gym.spaces.dict.Dict: @@ -153,6 +154,34 @@ def get_env_duration(env: gym.Env): return duration +def make(env_id: str, **kwargs): + """ + Converts an env_id to an environment with the gym API. + This also works for DeepMind Control Suite environments that are wrapped using the DMCWrapper, they can be + specified with "dmc/domain_name-task_name" + Analogously, metaworld tasks can be created as "metaworld/env_id-v2". + Args: + env_id: spec or env_id for gym tasks, external environments require a domain specification + **kwargs: Additional kwargs for the constructor such as pixel observations, etc. + Returns: Gym environment + """ + + if env_id.startswith('metaworld'): + env = make_metaworld(env_id.replace('metaworld', '')[1:], **kwargs) + + env = gym_make(env_id, **kwargs) + + if not env.spec.max_episode_steps == None: + # Hack: Some envs violate the gym spec in that they don't correctly expose the maximum episode steps + # Gymnasium disallows accessing private attributes, so we have to get creative to read the internal values + # TODO: Remove this, when all supported envs correctly implement this themselves + unwrapped = env.unwrapped if hasattr(env, 'unwrapped') else env + if hasattr(unwrapped, '_max_episode_steps'): + env.spec.max_episode_steps = unwrapped.__getattribute__('_max_episode_steps') + + return env + + def make_metaworld(env_id: str, seed: int, render_mode: Optional[str] = None, **kwargs): if env_id not in metaworld.ML1.ENV_NAMES: raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.') From f6e1718c1a59c248bb42e2eb975570757a91c12e Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 10:34:38 +0200 Subject: [PATCH 078/198] metadata.mp_config now expected in MP_wrapper (implementing Fabian's feedback) --- fancy_gym/envs/registry.py | 51 ++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index e7c2f09..4d09acc 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -1,10 +1,30 @@ +from typing import Tuple, Union + +import copy +import importlib +import numpy as np from fancy_gym.utils.make_env_helpers import make_bb from fancy_gym.utils.utils import nested_update from gymnasium import register as gym_register from gymnasium import gym_make -import copy +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper + + +class DefaultMPWrapper(RawInterfaceWrapper): + @property + def context_mask(self): + return np.full(self.env.observation_space.shape, True) + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_pos + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_vel + _BB_DEFAULTS = { 'ProMP': { @@ -82,22 +102,26 @@ ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in KNOWN_MP def register( id, entry_point, + mp_wrapper=DefaultMPWrapper, register_step_based=True, # TODO: Detect add_mp_types=KNOWN_MPS, - override_mp_config={}, **kwargs ): + if not callable(mp_wrapper): # mp_wrapper can be given as a String (same notation as for entry_point) + mod_name, attr_name = mp_wrapper.split(":") + mod = importlib.import_module(mod_name) + mp_wrapper = getattr(mod, attr_name) if register_step_based: gym_register(id=id, entry_point=entry_point, **kwargs) - register_mps(id, override_mp_config, add_mp_types) + register_mps(id, mp_wrapper, add_mp_types) -def register_mps(id, add_mp_types=KNOWN_MPS): +def register_mps(id, mp_wrapper, add_mp_types=KNOWN_MPS): for mp_type in add_mp_types: - register_mp(id, mp_type) + register_mp(id, mp_wrapper, mp_type) -def register_mp(id, mp_type): +def register_mp(id, mp_wrapper, mp_type): assert mp_type in KNOWN_MPS, 'Unknown mp_type' assert id not in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' parts = id.split('-') @@ -108,14 +132,16 @@ def register_mp(id, mp_type): entry_point=bb_env_constructor, kwargs={ 'underlying_id': id, + 'mp_wrapper': mp_wrapper, 'mp_type': mp_type } ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) -def bb_env_constructor(underlying_id, mp_type, mp_config_override={}, **kwargs): - underlying_env = gym_make(underlying_id, **kwargs) +def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, **kwargs): + raw_underlying_env = gym_make(underlying_id, **kwargs) + underlying_env = mp_wrapper(raw_underlying_env) env_metadata = underlying_env.metadata metadata_config = copy.deepcopy(env_metadata.get('mp_config', {}).get(mp_type, {})) @@ -134,8 +160,11 @@ def bb_env_constructor(underlying_id, mp_type, mp_config_override={}, **kwargs): phase_kwargs = config.pop("phase_generator_kwargs", {}) basis_kwargs = config.pop("basis_generator_kwargs", {}) - return make_bb(underlying_env, wrappers=wrappers, + return make_bb(underlying_env, + wrappers=wrappers, black_box_kwargs=black_box_kwargs, - traj_gen_kwargs=traj_gen_kwargs, controller_kwargs=contr_kwargs, + traj_gen_kwargs=traj_gen_kwargs, + controller_kwargs=contr_kwargs, phase_kwargs=phase_kwargs, - basis_kwargs=basis_kwargs, **config) + basis_kwargs=basis_kwargs, + **config) From 9d03542282f2e4267aabd796e731e028cae83180 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 10:56:30 +0200 Subject: [PATCH 079/198] Move mp_config out of metadata and onto MPWrappers --- .../hole_reacher/hole_reacher.py | 28 ------------------- .../hole_reacher/mp_wrapper.py | 24 ++++++++++++++++ .../simple_reacher/mp_wrapper.py | 22 +++++++++++++++ .../simple_reacher/simple_reacher.py | 26 ----------------- .../viapoint_reacher/mp_wrapper.py | 20 +++++++++++++ .../viapoint_reacher/viapoint_reacher.py | 24 ---------------- fancy_gym/envs/registry.py | 10 +++---- 7 files changed, 71 insertions(+), 83 deletions(-) diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py index 1fdf464..c9e0a61 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py +++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py @@ -15,34 +15,6 @@ MAX_EPISODE_STEPS_HOLEREACHER = 200 class HoleReacherEnv(BaseReacherDirectEnv): - metadata = { - 'mp_config': { - 'ProMP': { - 'wrappers': [MPWrapper], - 'controller_kwargs': { - 'controller_type': 'velocity', - }, - 'trajectory_generator_kwargs': { - 'weight_scale': 2, - }, - }, - 'DMP': { - 'wrappers': [MPWrapper], - 'controller_kwargs': { - 'controller_type': 'velocity', - }, - 'trajectory_generator_kwargs': { - # TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check - 'weight_scale': 500, - }, - 'phase_generator_kwargs': { - 'alpha_phase': 2.5, - }, - }, - 'ProDMP': {}, - } - } - def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"): diff --git a/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py b/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py index d160b5c..c8e6dcc 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py +++ b/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py @@ -7,6 +7,30 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 2, + }, + }, + 'DMP': { + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + 'trajectory_generator_kwargs': { + # TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check + 'weight_scale': 500, + }, + 'phase_generator_kwargs': { + 'alpha_phase': 2.5, + }, + }, + 'ProDMP': {}, + } + @property def context_mask(self): return np.hstack([ diff --git a/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py b/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py index 6d1fda1..2ee3cd1 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py +++ b/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py @@ -7,6 +7,28 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'p_gains': 0.6, + 'd_gains': 0.075, + }, + }, + 'DMP': { + 'controller_kwargs': { + 'p_gains': 0.6, + 'd_gains': 0.075, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 50, + }, + 'phase_generator_kwargs': { + 'alpha_phase': 2, + }, + }, + 'ProDMP': {}, + } + @property def context_mask(self): return np.hstack([ diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index bb72848..5c63cf8 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -16,32 +16,6 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): towards the end of the trajectory. """ - metadata = { - 'mp_config': { - 'ProMP': { - 'wrappers': [MPWrapper], - 'controller_kwargs': { - 'p_gains': 0.6, - 'd_gains': 0.075, - }, - }, - 'DMP': { - 'wrappers': [MPWrapper], - 'controller_kwargs': { - 'p_gains': 0.6, - 'd_gains': 0.075, - }, - 'trajectory_generator_kwargs': { - 'weight_scale': 50, - }, - 'phase_generator_kwargs': { - 'alpha_phase': 2, - }, - }, - 'ProDMP': {}, - } - } - def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True, allow_self_collision: bool = False, ): super().__init__(n_links, random_start, allow_self_collision) diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py b/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py index 47da749..c07b651 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py @@ -7,6 +7,26 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + }, + 'DMP': { + 'controller_kwargs': { + 'controller_type': 'velocity', + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 50, + }, + 'phase_generator_kwargs': { + 'alpha_phase': 2, + }, + }, + 'ProDMP': {}, + } + @property def context_mask(self): return np.hstack([ diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py index d0d04fb..febccc7 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -12,30 +12,6 @@ from . import MPWrapper class ViaPointReacherEnv(BaseReacherDirectEnv): - metadata = { - 'mp_config': { - 'ProMP': { - 'wrappers': [MPWrapper], - 'controller_kwargs': { - 'controller_type': 'velocity', - }, - }, - 'DMP': { - 'wrappers': [MPWrapper], - 'controller_kwargs': { - 'controller_type': 'velocity', - }, - 'trajectory_generator_kwargs': { - 'weight_scale': 50, - }, - 'phase_generator_kwargs': { - 'alpha_phase': 2, - }, - }, - 'ProDMP': {}, - } - } - def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None, target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000): diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 4d09acc..0172eaa 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -142,14 +142,14 @@ def register_mp(id, mp_wrapper, mp_type): def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, **kwargs): raw_underlying_env = gym_make(underlying_id, **kwargs) underlying_env = mp_wrapper(raw_underlying_env) - env_metadata = underlying_env.metadata - metadata_config = copy.deepcopy(env_metadata.get('mp_config', {}).get(mp_type, {})) - global_inherit_defaults = env_metadata.get('mp_config', {}).get('inherit_defaults', True) - inherit_defaults = metadata_config.pop('inherit_defaults', global_inherit_defaults) + mp_config = underlying_env.get('mp_config', {}) + active_mp_config = copy.deepcopy(mp_config.get(mp_type, {})) + global_inherit_defaults = mp_config.get('inherit_defaults', True) + inherit_defaults = active_mp_config.pop('inherit_defaults', global_inherit_defaults) config = copy.deepcopy(_BB_DEFAULTS[mp_type]) if inherit_defaults else {} - nested_update(config, metadata_config) + nested_update(config, active_mp_config) nested_update(config, mp_config_override) wrappers = config.pop("wrappers") From 17d370e2ba9d36416f568c2ae588e08390eb3668 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 11:44:04 +0200 Subject: [PATCH 080/198] Allow overriding mp_config during register and make (also better errors for DefaultMPWrapper) --- fancy_gym/envs/registry.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 0172eaa..9e37fcc 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -15,14 +15,21 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class DefaultMPWrapper(RawInterfaceWrapper): @property def context_mask(self): + # If the env already defines a context_mask, we will use that + if hasattr(self.env, 'context_mask'): + return self.env.context_mask + + # Otherwise we will use the whole observation as the context. (Write a custom MPWrapper to change this behavior) return np.full(self.env.observation_space.shape, True) @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + assert hasattr(self.env, 'current_pos'), 'DefaultMPWrapper was unable to access env.current_pos. Please write a custom MPWrapper (recommended) or expose this attribute directly.' return self.env.current_pos @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + assert hasattr(self.env, 'current_vel'), 'DefaultMPWrapper was unable to access env.current_vel. Please write a custom MPWrapper (recommended) or expose this attribute directly.' return self.env.current_vel @@ -105,6 +112,7 @@ def register( mp_wrapper=DefaultMPWrapper, register_step_based=True, # TODO: Detect add_mp_types=KNOWN_MPS, + mp_config_override={}, **kwargs ): if not callable(mp_wrapper): # mp_wrapper can be given as a String (same notation as for entry_point) @@ -113,15 +121,15 @@ def register( mp_wrapper = getattr(mod, attr_name) if register_step_based: gym_register(id=id, entry_point=entry_point, **kwargs) - register_mps(id, mp_wrapper, add_mp_types) + register_mps(id, mp_wrapper, add_mp_types, mp_config_override) -def register_mps(id, mp_wrapper, add_mp_types=KNOWN_MPS): +def register_mps(id, mp_wrapper, add_mp_types=KNOWN_MPS, mp_config_override={}): for mp_type in add_mp_types: - register_mp(id, mp_wrapper, mp_type) + register_mp(id, mp_wrapper, mp_type, mp_config_override.get(mp_type, {})) -def register_mp(id, mp_wrapper, mp_type): +def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): assert mp_type in KNOWN_MPS, 'Unknown mp_type' assert id not in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' parts = id.split('-') @@ -133,13 +141,14 @@ def register_mp(id, mp_wrapper, mp_type): kwargs={ 'underlying_id': id, 'mp_wrapper': mp_wrapper, - 'mp_type': mp_type + 'mp_type': mp_type, + '_mp_config_override_register': mp_config_override } ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) -def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, **kwargs): +def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, _mp_config_override_register={}, **kwargs): raw_underlying_env = gym_make(underlying_id, **kwargs) underlying_env = mp_wrapper(raw_underlying_env) @@ -150,6 +159,7 @@ def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={} config = copy.deepcopy(_BB_DEFAULTS[mp_type]) if inherit_defaults else {} nested_update(config, active_mp_config) + nested_update(config, _mp_config_override_register) nested_update(config, mp_config_override) wrappers = config.pop("wrappers") From 8d26cccc35d5b35a1f4101bcdcccfd9df0b3ddf1 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 11:44:50 +0200 Subject: [PATCH 081/198] ported mp_config for mujoco/box_pushing --- fancy_gym/envs/mujoco/reacher/mp_wrapper.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fancy_gym/envs/mujoco/reacher/mp_wrapper.py b/fancy_gym/envs/mujoco/reacher/mp_wrapper.py index 0464640..d47737a 100644 --- a/fancy_gym/envs/mujoco/reacher/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/reacher/mp_wrapper.py @@ -7,6 +7,16 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': {}, + 'DMP': { + 'phase_generator_kwargs': { + 'alpha_phase': 2, + }, + }, + 'ProDMP': {}, + } + @property def context_mask(self): return np.concatenate([[False] * self.n_links, # cos From 1b061b2a378f45fa0ff99ec4d72bae4ecfdef256 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 11:45:32 +0200 Subject: [PATCH 082/198] ported mp_config for mujoco/box_pushing --- fancy_gym/envs/mujoco/box_pushing/__init__.py | 2 +- .../envs/mujoco/box_pushing/mp_wrapper.py | 43 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/fancy_gym/envs/mujoco/box_pushing/__init__.py b/fancy_gym/envs/mujoco/box_pushing/__init__.py index c5e6d2f..d683024 100644 --- a/fancy_gym/envs/mujoco/box_pushing/__init__.py +++ b/fancy_gym/envs/mujoco/box_pushing/__init__.py @@ -1 +1 @@ -from .mp_wrapper import MPWrapper +from .mp_wrapper import MPWrapper, ReplanMPWrapper diff --git a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py index 09b2d65..03121f9 100644 --- a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py @@ -6,6 +6,19 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'p_gains': 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]), + 'd_gains': 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]), + }, + 'basis_generator_kwargs': { + 'basis_bandwidth_factor': 2 # 3.5, 4 to try + } + }, + 'DMP': {}, + 'ProDMP': {}, + } # Random x goal + random init pos @property @@ -27,3 +40,33 @@ class MPWrapper(RawInterfaceWrapper): @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: return self.data.qvel[:7].copy() + + +class ReplanMPWrapper(MPWrapper): + mp_config = { + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': { + 'controller_kwargs': { + 'p_gains': 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]), + 'd_gains': 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]), + }, + 'trajectory_generator_kwargs': { + 'weights_scale': 0.3, + 'goal_scale': 0.3, + 'auto_scale_basis': True, + 'goal_offset': 1.0, + 'disable_goal': True, + }, + 'basis_generator_kwargs': { + 'num_basis': 5, + 'basis_bandwidth_factor': 3, + 'alpha_phase': 3, + }, + 'black_box_kwargs': { + 'max_planning_times': 4, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, + 'condition_on_desired': True, + } + } + } From dbf2be1006015e72f167b1114713ed7a4a3d4265 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 11:45:53 +0200 Subject: [PATCH 083/198] refactoring env registration wip --- fancy_gym/envs/__init__.py | 160 ++++++++++--------------------------- 1 file changed, 40 insertions(+), 120 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 2de5d10..62fe5b7 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -5,9 +5,14 @@ from gymnasium import register as gym_register from .registry import register, ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS from . import classic_control, mujoco -from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv +from .classic_control.simple_reacher import MPWrapper as MPWrapper_SimpleReacher +from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv +from .classic_control.hole_reacher import MPWrapper as MPWrapper_HoleReacher from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv +from .classic_control.viapoint_reacher import MPWrapper as MPWrapper_ViaPointReacher +from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER +from .mujoco.reacher.mp_wrapper import MPWrapper as MPWrapper_Reacher from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG, FIXED_RELEASE_STEP from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP @@ -15,7 +20,6 @@ from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET -from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING @@ -26,7 +30,8 @@ from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWin # Simple Reacher register( id='SimpleReacher-v0', - entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv', + entry_point=SimpleReacherEnv, + mp_wrapper=MPWrapper_SimpleReacher, max_episode_steps=200, kwargs={ "n_links": 2, @@ -35,7 +40,8 @@ register( register( id='LongSimpleReacher-v0', - entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv', + entry_point=SimpleReacherEnv, + mp_wrapper=MPWrapper_SimpleReacher, max_episode_steps=200, kwargs={ "n_links": 5, @@ -45,7 +51,8 @@ register( # Viapoint Reacher register( id='ViaPointReacher-v0', - entry_point='fancy_gym.envs.classic_control:ViaPointReacherEnv', + entry_point=ViaPointReacherEnv, + mp_wrapper=MPWrapper_ViaPointReacher, max_episode_steps=200, kwargs={ "n_links": 5, @@ -57,7 +64,8 @@ register( # Hole Reacher register( id='HoleReacher-v0', - entry_point='fancy_gym.envs.classic_control:HoleReacherEnv', + entry_point=HoleReacherEnv, + mp_wrapper=MPWrapper_HoleReacher, max_episode_steps=200, kwargs={ "n_links": 5, @@ -74,39 +82,44 @@ register( # Mujoco # Mujoco Reacher -for _dims in [5, 7]: - gym_register( - id=f'Reacher{_dims}d-v0', - entry_point='fancy_gym.envs.mujoco:ReacherEnv', +for dims in [5, 7]: + register( + id=f'Reacher{dims}d-v0', + entry_point=ReacherEnv, + mp_wrapper=MPWrapper_Reacher, max_episode_steps=MAX_EPISODE_STEPS_REACHER, kwargs={ - "n_links": _dims, + "n_links": dims, } ) - gym_register( - id=f'Reacher{_dims}dSparse-v0', - entry_point='fancy_gym.envs.mujoco:ReacherEnv', + register( + id=f'Reacher{dims}dSparse-v0', + entry_point=ReacherEnv, + mp_wrapper=MPWrapper_Reacher, max_episode_steps=MAX_EPISODE_STEPS_REACHER, kwargs={ "sparse": True, 'reward_weight': 200, - "n_links": _dims, + "n_links": dims, } ) -gym_register( + +register( id='HopperJumpSparse-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', + mp_wrapper=mujoco.hopper_jump.MPWrapper, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "sparse": True, } ) -gym_register( +register( id='HopperJump-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', + mp_wrapper=mujoco.hopper_jump.MPWrapper, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "sparse": False, @@ -160,9 +173,18 @@ gym_register( # Box pushing environments with different rewards for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: - gym_register( + register( id='BoxPushing{}-v0'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), + mp_wrapper=mujoco.box_pushing.MPWrapper, + max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, + ) + + register( + id='BoxPushing{}Replan-v0'.format(reward_type), + entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), + mp_wrapper=mujoco.box_pushing.ReplanMPWrapper, + register_step_based=False, max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, ) @@ -202,42 +224,6 @@ gym_register( ) -# movement Primitive Environments - -# Simple Reacher [DONE] - -# Viapoint reacher [DONE] - -# Hole Reacher [DONE] - -# ReacherNd -_versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}DMP-{_name[1]}' - kwargs_dict_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_reacher_dmp['wrappers'].append(mujoco.reacher.MPWrapper) - kwargs_dict_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2 - kwargs_dict_reacher_dmp['name'] = _v - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, - kwargs=kwargs_dict_reacher_dmp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - - _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper) - kwargs_dict_reacher_promp['name'] = _v - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_reacher_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) -######################################################################################################################## # Beerpong ProMP _versions = ['BeerPong-v0'] for _v in _versions: @@ -321,72 +307,6 @@ for _v in _versions: # # ######################################################################################################################## - -# HopperJump -_versions = ['HopperJump-v0', 'HopperJumpSparse-v0', - # 'HopperJumpOnBox-v0', 'HopperThrow-v0', 'HopperThrowInBasket-v0' - ] -# TODO: Check if all environments work with the same MPWrapper -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper) - kwargs_dict_hopper_jump_promp['name'] = _v - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_hopper_jump_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# ######################################################################################################################## - -# Box Pushing -_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0'] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_box_pushing_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_box_pushing_promp['wrappers'].append(mujoco.box_pushing.MPWrapper) - kwargs_dict_box_pushing_promp['name'] = _v - kwargs_dict_box_pushing_promp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) - kwargs_dict_box_pushing_promp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) - kwargs_dict_box_pushing_promp['basis_generator_kwargs']['basis_bandwidth_factor'] = 2 # 3.5, 4 to try - - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_box_pushing_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' - kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) - kwargs_dict_box_pushing_prodmp['name'] = _v - kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) - kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) - kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 - kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 - kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True - kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['disable_goal'] = True - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 - kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t: t % 25 == 0 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desired'] = True - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_box_pushing_prodmp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) - # Table Tennis _versions = ['TableTennis2D-v0', 'TableTennis4D-v0', 'TableTennisWind-v0', 'TableTennisGoalSwitching-v0'] for _v in _versions: From 83d5d39b426efcc55d3f759748f8dbee0b56b5bb Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Thu, 20 Jul 2023 11:48:19 +0200 Subject: [PATCH 084/198] removed deprecated envs --- fancy_gym/envs/__init__.py | 230 ------------------------------------- 1 file changed, 230 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 62fe5b7..7c83d49 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -394,233 +394,3 @@ for _v in _versions: kwargs=kwargs_dict_tt_prodmp ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) -# -# ## Walker2DJump -# _versions = ['Walker2DJump-v0'] -# for _v in _versions: -# _name = _v.split("-") -# _env_id = f'{_name[0]}ProMP-{_name[1]}' -# kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) -# kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper) -# kwargs_dict_walker2d_jump_promp['name'] = _v -# gym_register( -# id=_env_id, -# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', -# kwargs=kwargs_dict_walker2d_jump_promp -# ) -# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# Depricated, we will not provide non random starts anymore -""" -gym_register( - id='SimpleReacher-v1', - entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 2, - "random_start": False - } -) - -gym_register( - id='LongSimpleReacher-v1', - entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False - } -) -gym_register( - id='HoleReacher-v1', - entry_point='fancy_gym.envs.classic_control:HoleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False, - "allow_self_collision": False, - "allow_wall_collision": False, - "hole_width": 0.25, - "hole_depth": 1, - "hole_x": None, - "collision_penalty": 100, - } -) -gym_register( - id='HoleReacher-v2', - entry_point='fancy_gym.envs.classic_control:HoleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False, - "allow_self_collision": False, - "allow_wall_collision": False, - "hole_width": 0.25, - "hole_depth": 1, - "hole_x": 2, - "collision_penalty": 1, - } -) - -# CtxtFree are v0, Contextual are v1 -gym_register( - id='AntJump-v0', - entry_point='fancy_gym.envs.mujoco:AntJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -gym_register( - id='HalfCheetahJump-v0', - entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - "context": False - } -) -gym_register( - id='HopperJump-v0', - entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, - "context": False, - "healthy_reward": 1.0 - } -) - -""" - -# Deprecated used for CorL paper -""" -_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] -for i in _vs: - _env_id = f'ALRReacher{i}-v0' - gym_register( - id=_env_id, - entry_point='fancy_gym.envs.mujoco:ReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 0, - "n_links": 5, - "balance": False, - '_ctrl_cost_weight': i - } - ) - - _env_id = f'ALRReacherSparse{i}-v0' - gym_register( - id=_env_id, - entry_point='fancy_gym.envs.mujoco:ReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 5, - "balance": False, - '_ctrl_cost_weight': i - } - ) - _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] -for i in _vs: - _env_id = f'ALRReacher{i}ProMP-v0' - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"{_env_id.replace('ProMP', '')}", - "wrappers": [mujoco.reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 4, - "policy_type": "motor", - # "weights_scale": 5, - "n_zero_basis": 1, - "zero_start": True, - "policy_kwargs": { - "p_gains": 1, - "d_gains": 0.1 - } - } - } - ) - - _env_id = f'ALRReacherSparse{i}ProMP-v0' - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"{_env_id.replace('ProMP', '')}", - "wrappers": [mujoco.reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 4, - "policy_type": "motor", - # "weights_scale": 5, - "n_zero_basis": 1, - "zero_start": True, - "policy_kwargs": { - "p_gains": 1, - "d_gains": 0.1 - } - } - } - ) - - gym_register( - id='HopperJumpOnBox-v0', - entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - "context": False - } - ) - gym_register( - id='HopperThrow-v0', - entry_point='fancy_gym.envs.mujoco:HopperThrowEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, - "context": False - } - ) - gym_register( - id='HopperThrowInBasket-v0', - entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - "context": False - } - ) - gym_register( - id='Walker2DJump-v0', - entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, - "context": False - } - ) - gym_register(id='TableTennis2DCtxt-v1', - entry_point='fancy_gym.envs.mujoco:TTEnvGym', - max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim': 2, 'fixed_goal': True}) - - gym_register( - id='BeerPong-v0', - entry_point='fancy_gym.envs.mujoco:BeerBongEnv', - max_episode_steps=300, - kwargs={ - "rndm_goal": False, - "cup_goal_pos": [0.1, -2.0], - "frame_skip": 2 - } - ) -""" From 64e6ac532302b62b98d78b4c04d09974d3d0dbf1 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 10:03:18 +0200 Subject: [PATCH 085/198] ported mp_config for mujoco/beerpong --- fancy_gym/envs/mujoco/beerpong/mp_wrapper.py | 37 ++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py index 17a11e1..452ee05 100644 --- a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py @@ -6,6 +6,23 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'phase_generator_kwargs': { + 'learn_tau': True + }, + 'controller_kwargs': { + 'p_gains': np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), + 'd_gains': np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), + }, + 'basis_generator_kwargs': { + 'num_basis': 2, + 'num_basis_zero_start': 2, + }, + }, + 'DMP': {}, + 'ProDMP': {}, + } @property def context_mask(self) -> np.ndarray: @@ -39,3 +56,23 @@ class MPWrapper(RawInterfaceWrapper): xyz[-1] = 0.840 self.model.body_pos[self.cup_table_id] = xyz return self.get_observation_from_step(self.get_obs()) + + +class MPWrapper_FixedRelease(MPWrapper): + mp_config = { + 'ProMP': { + 'phase_generator_kwargs': { + 'tau': 0.62, + }, + 'controller_kwargs': { + 'p_gains': np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), + 'd_gains': np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), + }, + 'basis_generator_kwargs': { + 'num_basis': 2, + 'num_basis_zero_start': 2, + }, + }, + 'DMP': {}, + 'ProDMP': {}, + } From 9ba3fa9dbc477d6466ca47cbec0a01273c4bca45 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 10:25:01 +0200 Subject: [PATCH 086/198] ported mp_config for mujoco/table_tennis --- .../envs/mujoco/table_tennis/mp_wrapper.py | 102 +++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index e33ed6c..3e5a464 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -7,6 +7,53 @@ from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, j class TT_MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'phase_generator_kwargs': { + 'learn_tau': False, + 'learn_delay': False, + 'tau_bound': [0.8, 1.5], + 'delay_bound': [0.05, 0.15], + }, + 'controller_kwargs': { + 'p_gains': 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + 'd_gains': 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]), + }, + 'basis_generator_kwargs': { + 'num_basis': 3, + 'num_basis_zero_start': 1, + 'num_basis_zero_goal': 1, + }, + 'black_box_kwargs': { + 'verbose': 2, + }, + }, + 'DMP': {}, + 'ProDMP': { + 'phase_generator_kwargs': { + 'learn_tau': True, + 'learn_delay': True, + 'tau_bound': [0.8, 1.5], + 'delay_bound': [0.05, 0.15], + 'alpha_phase': 3, + }, + 'controller_kwargs': { + 'p_gains': 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + 'd_gains': 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]), + }, + 'basis_generator_kwargs': { + 'num_basis': 3, + 'alpha': 25, + 'basis_bandwidth_factor': 3, + }, + 'trajectory_generator_kwargs': { + 'weights_scale': 0.7, + 'auto_scale_basis': True, + 'relative_goal': True, + 'disable_goal': True, + }, + }, + } # Random x goal + random init pos @property @@ -16,7 +63,7 @@ class TT_MPWrapper(RawInterfaceWrapper): [False] * 7, # joints velocity [True] * 2, # position ball x, y [False] * 1, # position ball z - #[True] * 3, # velocity ball x, y, z + # [True] * 3, # velocity ball x, y, z [True] * 2, # target landing position # [True] * 1, # time ]) @@ -39,7 +86,42 @@ class TT_MPWrapper(RawInterfaceWrapper): return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]: return self.get_invalid_traj_step_return(action, pos_traj, return_contextual_obs) + +class TT_MPWrapper_Replan(TT_MPWrapper): + mp_config = { + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': { + 'phase_generator_kwargs': { + 'learn_tau': True, + 'learn_delay': True, + 'tau_bound': [0.8, 1.5], + 'delay_bound': [0.05, 0.15], + 'alpha_phase': 3, + }, + 'controller_kwargs': { + 'p_gains': 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + 'd_gains': 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]), + }, + 'basis_generator_kwargs': { + 'num_basis': 2, + 'alpha': 25, + 'basis_bandwidth_factor': 3, + }, + 'trajectory_generator_kwargs': { + 'auto_scale_basis': True, + 'goal_offset': 1.0, + }, + 'black_box_kwargs': { + 'max_planning_times': 3, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % 50 == 0, + }, + }, + } + + class TTVelObs_MPWrapper(TT_MPWrapper): + # Will inherit mp_config from TT_MPWrapper @property def context_mask(self): @@ -51,4 +133,20 @@ class TTVelObs_MPWrapper(TT_MPWrapper): [True] * 3, # velocity ball x, y, z [True] * 2, # target landing position # [True] * 1, # time - ]) \ No newline at end of file + ]) + + +class TTVelObs_MPWrapper_Replan(TT_MPWrapper_Replan): + # Will inherit mp_config from TT_MPWrapper_Replan + + @property + def context_mask(self): + return np.hstack([ + [False] * 7, # joints position + [False] * 7, # joints velocity + [True] * 2, # position ball x, y + [False] * 1, # position ball z + [True] * 3, # velocity ball x, y, z + [True] * 2, # target landing position + # [True] * 1, # time + ]) From a069aaac652ac1ba0661705cbc055a6bb45b6f8b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 10:25:28 +0200 Subject: [PATCH 087/198] Removed old manual registration of mp-envs (port to fancy registry) --- fancy_gym/envs/__init__.py | 268 +++++++++++-------------------------- 1 file changed, 79 insertions(+), 189 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 7c83d49..09e4e9b 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -15,6 +15,8 @@ from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.reacher.mp_wrapper import MPWrapper as MPWrapper_Reacher from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG, FIXED_RELEASE_STEP +from .mujoco.beerpong.mp_wrapper import MPWrapper as MPWrapper_Beerpong +from .mujoco.beerpong.mp_wrapper import MPWrapper_FixedRelease as MPWrapper_Beerpong_FixedRelease from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX @@ -25,6 +27,10 @@ from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTempo BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \ MAX_EPISODE_STEPS_TABLE_TENNIS +from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper as MPWrapper_TableTennis +from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper_Replan as MPWrapper_TableTennis_Replan +from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper as MPWrapper_TableTennis_VelObs +from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper_Replan as MPWrapper_TableTennis_VelObs_Replan # Classic Control # Simple Reacher @@ -129,46 +135,73 @@ register( } ) -gym_register( +# TODO: Add [MPs] later when finished (old TODO I moved here during refactor) +register( id='AntJump-v0', entry_point='fancy_gym.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, + add_mp_types=[], ) -gym_register( +register( id='HalfCheetahJump-v0', entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, + add_mp_types=[], ) -gym_register( +register( id='HopperJumpOnBox-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, + add_mp_types=[], ) -gym_register( +register( id='HopperThrow-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, + add_mp_types=[], ) -gym_register( +register( id='HopperThrowInBasket-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, + add_mp_types=[], ) -gym_register( +register( id='Walker2DJump-v0', entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, + add_mp_types=[], ) -gym_register( +register( # [MPDone id='BeerPong-v0', entry_point='fancy_gym.envs.mujoco:BeerPongEnv', + mp_wrapper=MPWrapper_Beerpong, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, + add_mp_types=['ProMP'], +) + +# Here we use the same reward as in BeerPong-v0, but now consider after the release, +# only one time step, i.e. we simulate until the end of th episode +register( + id='BeerPongStepBased-v0', + entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', + mp_wrapper=MPWrapper_Beerpong_FixedRelease, + max_episode_steps=FIXED_RELEASE_STEP, + add_mp_types=['ProMP'], +) + +register( + id='BeerPongFixedRelease-v0', + entry_point='fancy_gym.envs.mujoco:BeerPongEnv', + mp_wrapper=MPWrapper_Beerpong_FixedRelease, + max_episode_steps=FIXED_RELEASE_STEP, + add_mp_types=['ProMP'], ) # Box pushing environments with different rewards @@ -188,209 +221,66 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, ) -# Here we use the same reward as in BeerPong-v0, but now consider after the release, -# only one time step, i.e. we simulate until the end of th episode -gym_register( - id='BeerPongStepBased-v0', - entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', - max_episode_steps=FIXED_RELEASE_STEP, -) - # Table Tennis environments for ctxt_dim in [2, 4]: - gym_register( + register( id='TableTennis{}D-v0'.format(ctxt_dim), entry_point='fancy_gym.envs.mujoco:TableTennisEnv', + mp_wrapper=MPWrapper_TableTennis, max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, + add_mp_types=['ProMP', 'ProDMP'], kwargs={ "ctxt_dim": ctxt_dim, 'frame_skip': 4, } ) -gym_register( + register( + id='TableTennis{}DReplan-v0'.format(ctxt_dim), + entry_point='fancy_gym.envs.mujoco:TableTennisEnv', + mp_wrapper=MPWrapper_TableTennis, + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, + add_mp_types=['ProDMP'], + kwargs={ + "ctxt_dim": ctxt_dim, + 'frame_skip': 4, + } + ) + +register( id='TableTennisWind-v0', entry_point='fancy_gym.envs.mujoco:TableTennisWind', + mp_wrapper=MPWrapper_TableTennis_VelObs, + add_mp_types=['ProMP', 'ProDMP'], max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, ) -gym_register( +register( + id='TableTennisWindReplan-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisWind', + mp_wrapper=MPWrapper_TableTennis_VelObs_Replan, + add_mp_types=['ProDMP'], + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, +) + +register( id='TableTennisGoalSwitching-v0', entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', + mp_wrapper=MPWrapper_TableTennis, + add_mp_types=['ProMP', 'ProDMP'], max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, kwargs={ 'goal_switching_step': 99 } ) - -# Beerpong ProMP -_versions = ['BeerPong-v0'] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) - kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True - kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) - kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) - kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 - kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 - kwargs_dict_bp_promp['name'] = _v - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_bp_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# BP with Fixed release -_versions = ["BeerPongStepBased-v0", 'BeerPong-v0'] -for _v in _versions: - if _v != 'BeerPong-v0': - _name = _v.split("-") - _env_id = f'{_name[0]}ProMP-{_name[1]}' - else: - _env_id = 'BeerPongFixedReleaseProMP-v0' - kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) - kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62 - kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) - kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) - kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 - kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 - kwargs_dict_bp_promp['name'] = _v - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_bp_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) -######################################################################################################################## - -# Table Tennis needs to be fixed according to Zhou's implementation - -# TODO: Add later when finished -# ######################################################################################################################## -# -# ## AntJump -# _versions = ['AntJump-v0'] -# for _v in _versions: -# _name = _v.split("-") -# _env_id = f'{_name[0]}ProMP-{_name[1]}' -# kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) -# kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper) -# kwargs_dict_ant_jump_promp['name'] = _v -# gym_register( -# id=_env_id, -# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', -# kwargs=kwargs_dict_ant_jump_promp -# ) -# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) -# -# ######################################################################################################################## -# -# ## HalfCheetahJump -# _versions = ['HalfCheetahJump-v0'] -# for _v in _versions: -# _name = _v.split("-") -# _env_id = f'{_name[0]}ProMP-{_name[1]}' -# kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) -# kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper) -# kwargs_dict_halfcheetah_jump_promp['name'] = _v -# gym_register( -# id=_env_id, -# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', -# kwargs=kwargs_dict_halfcheetah_jump_promp -# ) -# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) -# -# ######################################################################################################################## - -# Table Tennis -_versions = ['TableTennis2D-v0', 'TableTennis4D-v0', 'TableTennisWind-v0', 'TableTennisGoalSwitching-v0'] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_tt_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - if _v == 'TableTennisWind-v0': - kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) - else: - kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) - kwargs_dict_tt_promp['name'] = _v - kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) - kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) - kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = False - kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = False - kwargs_dict_tt_promp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] - kwargs_dict_tt_promp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] - kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3 - kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 1 - kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_goal'] = 1 - kwargs_dict_tt_promp['black_box_kwargs']['verbose'] = 2 - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_tt_promp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' - kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - if _v == 'TableTennisWind-v0': - kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) - else: - kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) - kwargs_dict_tt_prodmp['name'] = _v - kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) - kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.7 - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['relative_goal'] = True - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['disable_goal'] = True - kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] - kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] - kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True - kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True - kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 3 - kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. - kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 - kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_tt_prodmp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) - -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' - kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - if _v == 'TableTennisWind-v0': - kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) - else: - kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) - kwargs_dict_tt_prodmp['name'] = _v - kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) - kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = False - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] - kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] - kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True - kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True - kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 2 - kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. - kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 - kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 - kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3 - kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t: t % 50 == 0 - gym_register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_tt_prodmp - ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) +register( + id='TableTennisGoalSwitchingReplan-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', + mp_wrapper=MPWrapper_TableTennis_Replan, + add_mp_types=['ProDMP'], + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, + kwargs={ + 'goal_switching_step': 99 + } +) From ce34df181458cbd11e87dde707b946794ec87c44 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 11:05:45 +0200 Subject: [PATCH 088/198] No longer export old custom make --- fancy_gym/__init__.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index f6f690a..32308fa 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -1,5 +1,6 @@ from fancy_gym import dmc, meta, open_ai -from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank +from fancy_gym.utils.make_env_helpers import make_bb +from .envs.registry import register from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS # Convenience function for all MP environments from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS @@ -7,7 +8,5 @@ from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = { - key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + - ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} From 21a9c0f8a316749f17606da831ef7952957684df Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 11:06:08 +0200 Subject: [PATCH 089/198] Fixed Bug leading to infinite recusrion when making mp envs --- fancy_gym/envs/registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 9e37fcc..abaa295 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -7,7 +7,7 @@ from fancy_gym.utils.make_env_helpers import make_bb from fancy_gym.utils.utils import nested_update from gymnasium import register as gym_register -from gymnasium import gym_make +from gymnasium import make as gym_make from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper @@ -135,7 +135,7 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): parts = id.split('-') assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' fancy_id = '-'.join(parts[:-1]+[mp_type, parts[-1]]) - register( + gym_register( id=fancy_id, entry_point=bb_env_constructor, kwargs={ From b0f7dc6c7cd5d38ca8593f08e725ca47544f2ca4 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 11:13:15 +0200 Subject: [PATCH 090/198] test/utils.py must use gym.make now (was fancy_gym.make) --- test/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/utils.py b/test/utils.py index 782b151..501d15e 100644 --- a/test/utils.py +++ b/test/utils.py @@ -2,7 +2,7 @@ from typing import List, Type import gymnasium as gym import numpy as np -from fancy_gym import make +from gymnasium import make def run_env(env_id: str, iterations: int = None, seed: int = 0, wrappers: List[Type[gym.Wrapper]] = [], From eb9b6e1e22a4211834d627f9783dc7abdc554208 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 12:20:49 +0200 Subject: [PATCH 091/198] test_black_box.py should use vanilla env.make --- test/test_black_box.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 139b1c2..76bd73e 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -4,7 +4,7 @@ from typing import Tuple, Type, Union, Optional, Callable import gymnasium as gym import numpy as np import pytest -from gymnasium import register +from gymnasium import register, make from gymnasium.core import ActType, ObsType import fancy_gym @@ -13,7 +13,7 @@ from fancy_gym.utils.wrappers import TimeAwareObservation from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 -ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] +ENV_IDS = ['Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) @@ -102,7 +102,7 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) info_keys = list(info.keys()) - env_step = fancy_gym.make(env_id, SEED) + env_step = make(env_id) env_step.reset() _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) info_keys_step = info.keys() @@ -161,7 +161,7 @@ def test_context_space(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapp {'phase_generator_type': 'exp'}, {'basis_generator_type': 'rbf'}) # check if observation space matches with the specified mask values which are true - env_step = fancy_gym.make(env_id, SEED) + env_step = make(env_id) wrapper = wrapper_class(env_step) assert env.observation_space.shape == wrapper.context_mask[wrapper.context_mask].shape @@ -231,8 +231,9 @@ def test_learn_tau(mp_type: str, tau: float): 'learn_delay': False }, {'basis_generator_type': basis_generator_type, - }, seed=SEED) + }) + env.reset(seed=SEED) done = True for i in range(5): if done: @@ -277,8 +278,9 @@ def test_learn_delay(mp_type: str, delay: float): 'learn_delay': True }, {'basis_generator_type': basis_generator_type, - }, seed=SEED) + }) + env.reset(seed=SEED) done = True for i in range(5): if done: @@ -323,7 +325,9 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): 'learn_delay': True }, {'basis_generator_type': basis_generator_type, - }, seed=SEED) + }) + + env.reset(seed=SEED) if env.spec.max_episode_steps * env.dt < delay + tau: return From 3e586a14076d43088785c3d762d7c40fdc77ad79 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 12:21:18 +0200 Subject: [PATCH 092/198] Added tests for new fancy registry --- test/test_fancy_registry.py | 69 +++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 test/test_fancy_registry.py diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py new file mode 100644 index 0000000..0862243 --- /dev/null +++ b/test/test_fancy_registry.py @@ -0,0 +1,69 @@ +from typing import Tuple, Type, Union, Optional, Callable + +import gymnasium as gym +import numpy as np +import pytest +from gymnasium import make +from gymnasium.core import ActType, ObsType + +import fancy_gym +from fancy_gym import register + +ENV_IDS = ['Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] + + +class Object(object): + pass + + +class ToyEnv(gym.Env): + observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) + action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) + dt = 0.02 + + def __init__(self, a: int = 0, b: float = 0.0, c: list = [], d: dict = {}, e: Object = Object()): + self.a, self.b, self.c, self.d, self.e = a, b, c, d, e + + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, + options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: + obs, options = np.array([-1]), {} + return obs, options + + def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: + obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} + return obs, reward, terminated, truncated, info + + def render(self, mode="human"): + pass + + +@pytest.fixture(scope="session", autouse=True) +def setup(): + register( + id=f'toy2-v0', + entry_point='test.test_black_box:ToyEnv', + max_episode_steps=50, + ) + + +@pytest.mark.parametrize('env_id', ENV_IDS) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) +def test_make_mp(env_id: str, mp_type: str): + parts = id.split('-') + assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' + fancy_id = '-'.join(parts[:-1]+[mp_type, parts[-1]]) + + make(fancy_id) + + +def test_make_raw_toy(): + make('toy2-v0') + + +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) +def test_make_mp_toy(mp_type: str): + parts = id.split('-') + assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' + fancy_id = '-'.join(['toy2', mp_type, 'v0']) + + make(fancy_id) From 99a02b83477c946876e438e3fb7501f6ae8c1d7b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 12:21:34 +0200 Subject: [PATCH 093/198] Started work on new adapter & mp_config port for metaworld --- fancy_gym/meta/__init__.py | 6 +++ fancy_gym/meta/metaworld_adapter.py | 78 +++++++++++++++++++++++++++++ fancy_gym/utils/make_env_helpers.py | 62 +---------------------- 3 files changed, 86 insertions(+), 60 deletions(-) create mode 100644 fancy_gym/meta/metaworld_adapter.py diff --git a/fancy_gym/meta/__init__.py b/fancy_gym/meta/__init__.py index 2bcbfd8..ec07a45 100644 --- a/fancy_gym/meta/__init__.py +++ b/fancy_gym/meta/__init__.py @@ -1,3 +1,5 @@ +from typing import Iterable, Type, Union, Optional + from copy import deepcopy from gymnasium import register @@ -5,6 +7,10 @@ from gymnasium import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper +from . import metaworld_adapter + +metaworld_adapter.register_all_ML1() + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} # MetaWorld diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py new file mode 100644 index 0000000..5404e5e --- /dev/null +++ b/fancy_gym/meta/metaworld_adapter.py @@ -0,0 +1,78 @@ +import numpy as np +from gymnasium import register as gym_register +from fancy_gym import register + +import uuid + +import gymnasium as gym +import numpy as np + +from fancy_gym.utils.env_compatibility import EnvCompatibility + +try: + import metaworld +except Exception: + # catch Exception as Import error does not catch missing mujoco-py + # TODO: Print info? + pass + + +class MujocoMapSpacesWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): + def __init__(self, env: gym.Env): + gym.utils.RecordConstructorArgs.__init__(self) + gym.Wrapper.__init__(self, env) + + eos = env.observation_space + eas = env.observation_space + + Obs_Space_Class = getattr(gym.spaces, str(eos.__class__).split("'")[1].split('.')[-1]) + Act_Space_Class = getattr(gym.spaces, str(eas.__class__).split("'")[1].split('.')[-1]) + + self.observation_space = Obs_Space_Class(low=eos.low, high=eos.high, dtype=eos.dtype) + self.action_space = Act_Space_Class(low=eas.low, high=eas.high, dtype=eas.dtype) + + +def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] = None, **kwargs): + if underlying_id not in metaworld.ML1.ENV_NAMES: + raise ValueError(f'Specified environment "{underlying_id}" not present in metaworld ML1.') + + _env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[underlying_id + "-goal-observable"](seed=seed, **kwargs) + + # setting this avoids generating the same initialization after each reset + _env._freeze_rand_vec = False + # New argument to use global seeding + _env.seeded_rand_vec = True + + max_episode_steps = _env.max_path_length + + # TODO remove this as soon as there is support for the new API + _env = EnvCompatibility(_env, render_mode) + + gym_id = '_metaworld_compat_' + uuid.uuid4().hex + '-v0' + + gym_register( + id=gym_id, + entry_point=lambda: _env, + max_episode_steps=max_episode_steps, + ) + + # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld + env = gym.make(gym_id, disable_env_checker=True) + env = MujocoMapSpacesWrapper(env) + return env + + +def register_all_ML1(**kwargs): + for env_id in metaworld.ML1.ENV_NAMES: + _env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=0) + max_episode_steps = _env.max_path_length + + gym_register( + id='metaworld/'+env_id, + entry_point=make_metaworld, + max_episode_steps=max_episode_steps, + kwargs={ + 'underlying_id': env_id + }, + **kwargs + ) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index ec4a0c1..5266a60 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -15,7 +15,6 @@ from gymnasium import make import numpy as np from gymnasium.envs.registration import register, registry from gymnasium.wrappers import TimeLimit -from gymnasium import make as gym_make from fancy_gym.utils.env_compatibility import EnvCompatibility from fancy_gym.utils.wrappers import FlattenObservation @@ -63,7 +62,7 @@ def _make_wrapped_env(env: gym.Env, wrappers: Iterable[Type[gym.Wrapper]], seed= def make_bb( env: Union[gym.Env, str], wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping, controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, - time_limit: int, fallback_max_steps: int = None): + time_limit: int = None, fallback_max_steps: int = None, **kwargs): """ This can also be used standalone for manually building a custom DMP environment. Args: @@ -92,7 +91,7 @@ def make_bb( wrappers.insert(0, TimeAwareObservation) if isinstance(env, str): - env = make(env) + env = make(env, **kwargs) env = _make_wrapped_env(env=env, wrappers=wrappers, fallback_max_steps=fallback_max_steps) @@ -154,63 +153,6 @@ def get_env_duration(env: gym.Env): return duration -def make(env_id: str, **kwargs): - """ - Converts an env_id to an environment with the gym API. - This also works for DeepMind Control Suite environments that are wrapped using the DMCWrapper, they can be - specified with "dmc/domain_name-task_name" - Analogously, metaworld tasks can be created as "metaworld/env_id-v2". - Args: - env_id: spec or env_id for gym tasks, external environments require a domain specification - **kwargs: Additional kwargs for the constructor such as pixel observations, etc. - Returns: Gym environment - """ - - if env_id.startswith('metaworld'): - env = make_metaworld(env_id.replace('metaworld', '')[1:], **kwargs) - - env = gym_make(env_id, **kwargs) - - if not env.spec.max_episode_steps == None: - # Hack: Some envs violate the gym spec in that they don't correctly expose the maximum episode steps - # Gymnasium disallows accessing private attributes, so we have to get creative to read the internal values - # TODO: Remove this, when all supported envs correctly implement this themselves - unwrapped = env.unwrapped if hasattr(env, 'unwrapped') else env - if hasattr(unwrapped, '_max_episode_steps'): - env.spec.max_episode_steps = unwrapped.__getattribute__('_max_episode_steps') - - return env - - -def make_metaworld(env_id: str, seed: int, render_mode: Optional[str] = None, **kwargs): - if env_id not in metaworld.ML1.ENV_NAMES: - raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.') - - _env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs) - - # setting this avoids generating the same initialization after each reset - _env._freeze_rand_vec = False - # New argument to use global seeding - _env.seeded_rand_vec = True - - max_episode_steps = _env.max_path_length - - # TODO remove this as soon as there is support for the new API - _env = EnvCompatibility(_env, render_mode) - - gym_id = uuid.uuid4().hex + '-v1' - - register( - id=gym_id, - entry_point=lambda: _env, - max_episode_steps=max_episode_steps, - ) - - # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld - env = gym.make(gym_id, disable_env_checker=True) - return env - - def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]): """ When using DMC check if a manually specified time limit matches the trajectory duration the MP receives. From 6787b02bbd77986f08aebe6b56910409df19c33b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 12:26:41 +0200 Subject: [PATCH 094/198] Fixed missing import in metaworld_adapter --- fancy_gym/meta/metaworld_adapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py index 5404e5e..b0dda4d 100644 --- a/fancy_gym/meta/metaworld_adapter.py +++ b/fancy_gym/meta/metaworld_adapter.py @@ -1,6 +1,7 @@ +from typing import Iterable, Type, Union, Optional + import numpy as np from gymnasium import register as gym_register -from fancy_gym import register import uuid From 0fb0506e495e304f66706e47bd36d46c67315f13 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 12:31:47 +0200 Subject: [PATCH 095/198] Fix: Many tests still tried to seed during make --- test/test_replanning_sequencing.py | 21 ++++++++++++++------- test/utils.py | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index a8eb4b5..001028e 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -79,7 +79,8 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) + {'basis_generator_type': 'rbf'}, fallback_max_steps=MAX_STEPS_FALLBACK) + env.reset(seed=SEED) assert env.learn_sub_trajectories assert env.spec.max_episode_steps @@ -132,7 +133,8 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': phase_generator_type}, - {'basis_generator_type': basis_generator_type}, seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) + {'basis_generator_type': basis_generator_type}, fallback_max_steps=MAX_STEPS_FALLBACK) + env.reset(seed=SEED) assert env.do_replanning assert env.spec.max_episode_steps @@ -181,7 +183,8 @@ def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_s }, {'basis_generator_type': basis_generator_type, }, - seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) + fallback_max_steps=MAX_STEPS_FALLBACK) + _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -213,7 +216,8 @@ def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_se }, {'basis_generator_type': basis_generator_type, }, - seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) + fallback_max_steps=MAX_STEPS_FALLBACK) + _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -246,7 +250,8 @@ def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_ }, {'basis_generator_type': basis_generator_type, }, - seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) + fallback_max_steps=MAX_STEPS_FALLBACK) + _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -301,7 +306,8 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i }, {'basis_generator_type': basis_generator_type, }, - seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) + fallback_max_steps=MAX_STEPS_FALLBACK) + _ = env.reset(seed=SEED) done = False planning_times = 0 @@ -350,7 +356,8 @@ def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_ }, {'basis_generator_type': basis_generator_type, }, - seed=SEED, fallback_max_steps=MAX_STEPS_FALLBACK) + fallback_max_steps=MAX_STEPS_FALLBACK) + _ = env.reset(seed=SEED) for i in range(max_planning_times): action = env.action_space.sample() diff --git a/test/utils.py b/test/utils.py index 501d15e..8f92bbd 100644 --- a/test/utils.py +++ b/test/utils.py @@ -22,7 +22,7 @@ def run_env(env_id: str, iterations: int = None, seed: int = 0, wrappers: List[T Returns: observations, rewards, terminations, truncations, actions """ - env: gym.Env = make(env_id, seed=seed) + env: gym.Env = make(env_id) for w in wrappers: env = w(env) rewards = [] From e2ff915a7116abb9cafc139b23ce44f0f94772c0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 13:11:31 +0200 Subject: [PATCH 096/198] Fix: Cannot use .get for object attributes --- fancy_gym/envs/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index abaa295..ff9775b 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -152,7 +152,7 @@ def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={} raw_underlying_env = gym_make(underlying_id, **kwargs) underlying_env = mp_wrapper(raw_underlying_env) - mp_config = underlying_env.get('mp_config', {}) + mp_config = getattr(underlying_env, 'mp_config') if hasattr(underlying_env, 'mp_config') else {} active_mp_config = copy.deepcopy(mp_config.get(mp_type, {})) global_inherit_defaults = mp_config.get('inherit_defaults', True) inherit_defaults = active_mp_config.pop('inherit_defaults', global_inherit_defaults) From fc4b98ea7d0faa862e4c5d6916ddf66382a5cde6 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 13:12:07 +0200 Subject: [PATCH 097/198] Allow envs to already be wrapped with a RawInterfaceWrapper --- fancy_gym/utils/make_env_helpers.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 5266a60..cebf7aa 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -49,6 +49,12 @@ def _make_wrapped_env(env: gym.Env, wrappers: Iterable[Type[gym.Wrapper]], seed= if fallback_max_steps: env = ensure_finite_time(env, fallback_max_steps) has_black_box_wrapper = False + head = env + while hasattr(head, 'env'): + if isinstance(head, RawInterfaceWrapper): + has_black_box_wrapper = True + break + head = head.env for w in wrappers: # only wrap the environment if not BlackBoxWrapper, e.g. for vision if issubclass(w, RawInterfaceWrapper): From 95b2c255e9c8b8525d3bec5bf569b275f6b5abe3 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 13:13:02 +0200 Subject: [PATCH 098/198] Fixed typo --- test/test_fancy_registry.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py index 0862243..31bd9a6 100644 --- a/test/test_fancy_registry.py +++ b/test/test_fancy_registry.py @@ -47,9 +47,9 @@ def setup(): @pytest.mark.parametrize('env_id', ENV_IDS) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) +@pytest.mark.parametrize('mp_type', ['ProMP', 'DMP', 'ProDMP']) def test_make_mp(env_id: str, mp_type: str): - parts = id.split('-') + parts = env_id.split('-') assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' fancy_id = '-'.join(parts[:-1]+[mp_type, parts[-1]]) @@ -60,10 +60,8 @@ def test_make_raw_toy(): make('toy2-v0') -@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) +@pytest.mark.parametrize('mp_type', ['ProMP', 'DMP', 'ProDMP']) def test_make_mp_toy(mp_type: str): - parts = id.split('-') - assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' fancy_id = '-'.join(['toy2', mp_type, 'v0']) make(fancy_id) From 127d355ccc01782c1ebc1bd030ce2b803d512399 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 13:15:15 +0200 Subject: [PATCH 099/198] Move into our own namespace ('fancy') --- fancy_gym/envs/__init__.py | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 09e4e9b..da4b98f 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -35,7 +35,7 @@ from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper_Replan as MPWrapp # Classic Control # Simple Reacher register( - id='SimpleReacher-v0', + id='fancy/SimpleReacher-v0', entry_point=SimpleReacherEnv, mp_wrapper=MPWrapper_SimpleReacher, max_episode_steps=200, @@ -45,7 +45,7 @@ register( ) register( - id='LongSimpleReacher-v0', + id='fancy/LongSimpleReacher-v0', entry_point=SimpleReacherEnv, mp_wrapper=MPWrapper_SimpleReacher, max_episode_steps=200, @@ -56,7 +56,7 @@ register( # Viapoint Reacher register( - id='ViaPointReacher-v0', + id='fancy/ViaPointReacher-v0', entry_point=ViaPointReacherEnv, mp_wrapper=MPWrapper_ViaPointReacher, max_episode_steps=200, @@ -69,7 +69,7 @@ register( # Hole Reacher register( - id='HoleReacher-v0', + id='fancy/HoleReacher-v0', entry_point=HoleReacherEnv, mp_wrapper=MPWrapper_HoleReacher, max_episode_steps=200, @@ -90,7 +90,7 @@ register( # Mujoco Reacher for dims in [5, 7]: register( - id=f'Reacher{dims}d-v0', + id=f'fancy/Reacher{dims}d-v0', entry_point=ReacherEnv, mp_wrapper=MPWrapper_Reacher, max_episode_steps=MAX_EPISODE_STEPS_REACHER, @@ -100,7 +100,7 @@ for dims in [5, 7]: ) register( - id=f'Reacher{dims}dSparse-v0', + id=f'fancy/Reacher{dims}dSparse-v0', entry_point=ReacherEnv, mp_wrapper=MPWrapper_Reacher, max_episode_steps=MAX_EPISODE_STEPS_REACHER, @@ -113,7 +113,7 @@ for dims in [5, 7]: register( - id='HopperJumpSparse-v0', + id='fancy/HopperJumpSparse-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', mp_wrapper=mujoco.hopper_jump.MPWrapper, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, @@ -123,7 +123,7 @@ register( ) register( - id='HopperJump-v0', + id='fancy/HopperJump-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv', mp_wrapper=mujoco.hopper_jump.MPWrapper, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, @@ -137,49 +137,49 @@ register( # TODO: Add [MPs] later when finished (old TODO I moved here during refactor) register( - id='AntJump-v0', + id='fancy/AntJump-v0', entry_point='fancy_gym.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, add_mp_types=[], ) register( - id='HalfCheetahJump-v0', + id='fancy/HalfCheetahJump-v0', entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, add_mp_types=[], ) register( - id='HopperJumpOnBox-v0', + id='fancy/HopperJumpOnBox-v0', entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, add_mp_types=[], ) register( - id='HopperThrow-v0', + id='fancy/HopperThrow-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, add_mp_types=[], ) register( - id='HopperThrowInBasket-v0', + id='fancy/HopperThrowInBasket-v0', entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, add_mp_types=[], ) register( - id='Walker2DJump-v0', + id='fancy/Walker2DJump-v0', entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, add_mp_types=[], ) register( # [MPDone - id='BeerPong-v0', + id='fancy/BeerPong-v0', entry_point='fancy_gym.envs.mujoco:BeerPongEnv', mp_wrapper=MPWrapper_Beerpong, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, @@ -189,7 +189,7 @@ register( # [MPDone # Here we use the same reward as in BeerPong-v0, but now consider after the release, # only one time step, i.e. we simulate until the end of th episode register( - id='BeerPongStepBased-v0', + id='fancy/BeerPongStepBased-v0', entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', mp_wrapper=MPWrapper_Beerpong_FixedRelease, max_episode_steps=FIXED_RELEASE_STEP, @@ -197,7 +197,7 @@ register( ) register( - id='BeerPongFixedRelease-v0', + id='fancy/BeerPongFixedRelease-v0', entry_point='fancy_gym.envs.mujoco:BeerPongEnv', mp_wrapper=MPWrapper_Beerpong_FixedRelease, max_episode_steps=FIXED_RELEASE_STEP, @@ -207,14 +207,14 @@ register( # Box pushing environments with different rewards for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: register( - id='BoxPushing{}-v0'.format(reward_type), + id='fancy/BoxPushing{}-v0'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), mp_wrapper=mujoco.box_pushing.MPWrapper, max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, ) register( - id='BoxPushing{}Replan-v0'.format(reward_type), + id='fancy/BoxPushing{}Replan-v0'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), mp_wrapper=mujoco.box_pushing.ReplanMPWrapper, register_step_based=False, @@ -224,7 +224,7 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: # Table Tennis environments for ctxt_dim in [2, 4]: register( - id='TableTennis{}D-v0'.format(ctxt_dim), + id='fancy/TableTennis{}D-v0'.format(ctxt_dim), entry_point='fancy_gym.envs.mujoco:TableTennisEnv', mp_wrapper=MPWrapper_TableTennis, max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, @@ -236,7 +236,7 @@ for ctxt_dim in [2, 4]: ) register( - id='TableTennis{}DReplan-v0'.format(ctxt_dim), + id='fancy/TableTennis{}DReplan-v0'.format(ctxt_dim), entry_point='fancy_gym.envs.mujoco:TableTennisEnv', mp_wrapper=MPWrapper_TableTennis, max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, @@ -248,7 +248,7 @@ for ctxt_dim in [2, 4]: ) register( - id='TableTennisWind-v0', + id='fancy/TableTennisWind-v0', entry_point='fancy_gym.envs.mujoco:TableTennisWind', mp_wrapper=MPWrapper_TableTennis_VelObs, add_mp_types=['ProMP', 'ProDMP'], @@ -256,7 +256,7 @@ register( ) register( - id='TableTennisWindReplan-v0', + id='fancy/TableTennisWindReplan-v0', entry_point='fancy_gym.envs.mujoco:TableTennisWind', mp_wrapper=MPWrapper_TableTennis_VelObs_Replan, add_mp_types=['ProDMP'], @@ -264,7 +264,7 @@ register( ) register( - id='TableTennisGoalSwitching-v0', + id='fancy/TableTennisGoalSwitching-v0', entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', mp_wrapper=MPWrapper_TableTennis, add_mp_types=['ProMP', 'ProDMP'], @@ -275,7 +275,7 @@ register( ) register( - id='TableTennisGoalSwitchingReplan-v0', + id='fancy/TableTennisGoalSwitchingReplan-v0', entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', mp_wrapper=MPWrapper_TableTennis_Replan, add_mp_types=['ProDMP'], From 58bb8c0395605b2bc6f0dbc5498c36230e3576b7 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 13:24:16 +0200 Subject: [PATCH 100/198] Fix: Env in new namespace --- test/test_fancy_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py index 31bd9a6..e67f5df 100644 --- a/test/test_fancy_registry.py +++ b/test/test_fancy_registry.py @@ -9,7 +9,7 @@ from gymnasium.core import ActType, ObsType import fancy_gym from fancy_gym import register -ENV_IDS = ['Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] +ENV_IDS = ['fancy/Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] class Object(object): From 3a9143fd936e6fc1903ba6337e84f68cc705907e Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 15:14:14 +0200 Subject: [PATCH 101/198] removed file deprecated --- fancy_gym/dmc/dmc_wrapper.py | 188 ----------------------------------- 1 file changed, 188 deletions(-) delete mode 100644 fancy_gym/dmc/dmc_wrapper.py diff --git a/fancy_gym/dmc/dmc_wrapper.py b/fancy_gym/dmc/dmc_wrapper.py deleted file mode 100644 index b4f2292..0000000 --- a/fancy_gym/dmc/dmc_wrapper.py +++ /dev/null @@ -1,188 +0,0 @@ -# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py -# License: MIT -# Copyright (c) 2020 Denis Yarats -import collections -from collections.abc import MutableMapping -from typing import Any, Dict, Tuple, Optional, Union, Callable, SupportsFloat - -import gymnasium as gym -import numpy as np -from dm_control import composer -from dm_control.rl import control -from dm_env import specs -from gymnasium import spaces -from gymnasium.core import ObsType, ActType - - -def _spec_to_box(spec): - def extract_min_max(s): - assert s.dtype == np.float64 or s.dtype == np.float32, \ - f"Only float64 and float32 types are allowed, instead {s.dtype} was found" - dim = int(np.prod(s.shape)) - if type(s) == specs.Array: - bound = np.inf * np.ones(dim, dtype=s.dtype) - return -bound, bound - elif type(s) == specs.BoundedArray: - zeros = np.zeros(dim, dtype=s.dtype) - return s.minimum + zeros, s.maximum + zeros - - mins, maxs = [], [] - for s in spec: - mn, mx = extract_min_max(s) - mins.append(mn) - maxs.append(mx) - low = np.concatenate(mins, axis=0) - high = np.concatenate(maxs, axis=0) - assert low.shape == high.shape - return spaces.Box(low, high, dtype=s.dtype) - - -def _flatten_obs(obs: MutableMapping): - """ - Flattens an observation of type MutableMapping, e.g. a dict to a 1D array. - Args: - obs: observation to flatten - - Returns: 1D array of observation - - """ - - if not isinstance(obs, MutableMapping): - raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.') - - # Keep key order consistent for non OrderedDicts - keys = obs.keys() if isinstance(obs, collections.OrderedDict) else sorted(obs.keys()) - - obs_vals = [np.array([obs[key]]) if np.isscalar(obs[key]) else obs[key].ravel() for key in keys] - return np.concatenate(obs_vals) - - -class DMCWrapper(gym.Env): - def __init__(self, - env: Callable[[], Union[composer.Environment, control.Environment]], - ): - - raise Exception('The fancy_gym dmc-wrapper is deprecated; shimmy should be used instead.') - - # TODO: Currently this is required to be a function because dmc does not allow to copy composers environments - self._env = env() - - # action and observation space - self._action_space = _spec_to_box([self._env.action_spec()]) - self._observation_space = _spec_to_box(self._env.observation_spec().values()) - - self._window = None - self.id = 'dmc' - - def __getattr__(self, item): - """Propagate only non-existent properties to wrapped env.""" - if item.startswith('_'): - raise AttributeError("attempted to get missing private attribute '{}'".format(item)) - if item in self.__dict__: - return getattr(self, item) - return getattr(self._env, item) - - def _get_obs(self, time_step): - obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype) - return obs - - @property - def observation_space(self): - return self._observation_space - - @property - def action_space(self): - return self._action_space - - @property - def dt(self): - return self._env.control_timestep() - - def seed(self, seed=None): - self._action_space.seed(seed) - self._observation_space.seed(seed) - - def step(self, action: ActType) -> Tuple[ObsType, SupportsFloat, bool, bool, Dict[str, Any]]: - assert self._action_space.contains(action) - extra = {'internal_state': self._env.physics.get_state().copy()} - time_step = self._env.step(action) - reward = time_step.reward or 0. - terminated = False - truncated = time_step.last() and time_step.discount > 0 - obs = self._get_obs(time_step) - extra['discount'] = time_step.discount - - return obs, reward, terminated, truncated, extra - - def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ - -> Tuple[ObsType, Dict[str, Any]]: - time_step = self._env.reset() - obs = self._get_obs(time_step) - return obs, {} - - def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False, - segmentation=False, scene_option=None, render_flag_overrides=None): - - # assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode - if mode == "rgb_array": - return self._env.physics.render(height=height, width=width, camera_id=camera_id, overlays=overlays, - depth=depth, segmentation=segmentation, scene_option=scene_option, - render_flag_overrides=render_flag_overrides) - - # Render max available buffer size. Larger is only possible by altering the XML. - img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight, - width=self._env.physics.model.vis.global_.offwidth, - camera_id=camera_id, overlays=overlays, depth=depth, segmentation=segmentation, - scene_option=scene_option, render_flag_overrides=render_flag_overrides) - - if depth: - img = np.dstack([img.astype(np.uint8)] * 3) - - if mode == 'human': - try: - import cv2 - if self._window is None: - self._window = cv2.namedWindow(self.id, cv2.WINDOW_AUTOSIZE) - cv2.imshow(self.id, img[..., ::-1]) # Image in BGR - cv2.waitKey(1) - except ImportError: - raise gym.error.DependencyNotInstalled("Rendering requires opencv. Run `pip install opencv-python`") - # PYGAME seems to destroy some global rendering configs from the physics render - # except ImportError: - # import pygame - # img_copy = img.copy().transpose((1, 0, 2)) - # if self._window is None: - # pygame.init() - # pygame.display.init() - # self._window = pygame.display.set_mode(img_copy.shape[:2]) - # self.clock = pygame.time.Clock() - # - # surf = pygame.surfarray.make_surface(img_copy) - # self._window.blit(surf, (0, 0)) - # pygame.event.pump() - # self.clock.tick(30) - # pygame.display.flip() - - def close(self): - super().close() - if self._window is not None: - try: - import cv2 - cv2.destroyWindow(self.id) - except ImportError: - import pygame - - pygame.display.quit() - pygame.quit() - - @property - def reward_range(self) -> Tuple[float, float]: - reward_spec = self._env.reward_spec() - if isinstance(reward_spec, specs.BoundedArray): - return reward_spec.minimum, reward_spec.maximum - return -float('inf'), float('inf') - - @property - def metadata(self): - return {'render.modes': ['human', 'rgb_array'], - 'video.frames_per_second': round(1.0 / self._env.control_timestep())} From edd8c75ed34d16ab49bd31134dfb64e9688f1139 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 23 Jul 2023 15:26:11 +0200 Subject: [PATCH 102/198] Consistent quotation marks --- fancy_gym/envs/registry.py | 60 +++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index ff9775b..ee52acc 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -35,69 +35,69 @@ class DefaultMPWrapper(RawInterfaceWrapper): _BB_DEFAULTS = { 'ProMP': { - "wrappers": [], - "trajectory_generator_kwargs": { + 'wrappers': [], + 'trajectory_generator_kwargs': { 'trajectory_generator_type': 'promp' }, - "phase_generator_kwargs": { + 'phase_generator_kwargs': { 'phase_generator_type': 'linear' }, - "controller_kwargs": { + 'controller_kwargs': { 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, + 'p_gains': 1.0, + 'd_gains': 0.1, }, - "basis_generator_kwargs": { + 'basis_generator_kwargs': { 'basis_generator_type': 'zero_rbf', 'num_basis': 5, 'num_basis_zero_start': 1, 'basis_bandwidth_factor': 3.0, }, - "black_box_kwargs": { + 'black_box_kwargs': { } }, 'DMP': { - "wrappers": [], - "trajectory_generator_kwargs": { + 'wrappers': [], + 'trajectory_generator_kwargs': { 'trajectory_generator_type': 'dmp' }, - "phase_generator_kwargs": { + 'phase_generator_kwargs': { 'phase_generator_type': 'exp' }, - "controller_kwargs": { + 'controller_kwargs': { 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, + 'p_gains': 1.0, + 'd_gains': 0.1, }, - "basis_generator_kwargs": { + 'basis_generator_kwargs': { 'basis_generator_type': 'rbf', 'num_basis': 5 }, - "black_box_kwargs": { + 'black_box_kwargs': { } }, 'ProDMP': { - "wrappers": [], - "trajectory_generator_kwargs": { + 'wrappers': [], + 'trajectory_generator_kwargs': { 'trajectory_generator_type': 'prodmp', 'duration': 2.0, 'weights_scale': 1.0, }, - "phase_generator_kwargs": { + 'phase_generator_kwargs': { 'phase_generator_type': 'exp', 'tau': 1.5, }, - "controller_kwargs": { + 'controller_kwargs': { 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, + 'p_gains': 1.0, + 'd_gains': 0.1, }, - "basis_generator_kwargs": { + 'basis_generator_kwargs': { 'basis_generator_type': 'prodmp', 'alpha': 10, 'num_basis': 5, }, - "black_box_kwargs": { + 'black_box_kwargs': { } } } @@ -116,7 +116,7 @@ def register( **kwargs ): if not callable(mp_wrapper): # mp_wrapper can be given as a String (same notation as for entry_point) - mod_name, attr_name = mp_wrapper.split(":") + mod_name, attr_name = mp_wrapper.split(':') mod = importlib.import_module(mod_name) mp_wrapper = getattr(mod, attr_name) if register_step_based: @@ -162,13 +162,13 @@ def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={} nested_update(config, _mp_config_override_register) nested_update(config, mp_config_override) - wrappers = config.pop("wrappers") + wrappers = config.pop('wrappers') - traj_gen_kwargs = config.pop("trajectory_generator_kwargs", {}) + traj_gen_kwargs = config.pop('trajectory_generator_kwargs', {}) black_box_kwargs = config.pop('black_box_kwargs', {}) - contr_kwargs = config.pop("controller_kwargs", {}) - phase_kwargs = config.pop("phase_generator_kwargs", {}) - basis_kwargs = config.pop("basis_generator_kwargs", {}) + contr_kwargs = config.pop('controller_kwargs', {}) + phase_kwargs = config.pop('phase_generator_kwargs', {}) + basis_kwargs = config.pop('basis_generator_kwargs', {}) return make_bb(underlying_env, wrappers=wrappers, From bb20bb53ec0c1a69d61fd3d28b2fadad252044f9 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 24 Jul 2023 10:30:34 +0200 Subject: [PATCH 103/198] entry_point does not have to be given, when skipping registration of step-based --- fancy_gym/envs/registry.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index ee52acc..7580e47 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -108,13 +108,15 @@ ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in KNOWN_MP def register( id, - entry_point, + entry_point=None, mp_wrapper=DefaultMPWrapper, register_step_based=True, # TODO: Detect add_mp_types=KNOWN_MPS, mp_config_override={}, **kwargs ): + if register_step_based: + assert entry_point != None, 'You need to provide an entry-point, when registering step-based.' if not callable(mp_wrapper): # mp_wrapper can be given as a String (same notation as for entry_point) mod_name, attr_name = mp_wrapper.split(':') mod = importlib.import_module(mod_name) From 2fc44667c6bfc9948be1e00e22c938fd9b39e90f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 24 Jul 2023 10:33:00 +0200 Subject: [PATCH 104/198] fancy_gym.upgrade allows adding mp-versions for already registered envs --- fancy_gym/__init__.py | 2 +- fancy_gym/envs/registry.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index 32308fa..77f245f 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -1,6 +1,6 @@ from fancy_gym import dmc, meta, open_ai from fancy_gym.utils.make_env_helpers import make_bb -from .envs.registry import register +from .envs.registry import register, upgrade from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS # Convenience function for all MP environments from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 7580e47..3696fed 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -126,6 +126,24 @@ def register( register_mps(id, mp_wrapper, add_mp_types, mp_config_override) +def upgrade( + id, + mp_wrapper=DefaultMPWrapper, + add_mp_types=KNOWN_MPS, + mp_config_override={}, + **kwargs +): + register( + id, + entry_point=None, + mp_wrapper=mp_wrapper, + register_step_based=False, + add_mp_types=add_mp_types, + mp_config_override={}, + **kwargs + ) + + def register_mps(id, mp_wrapper, add_mp_types=KNOWN_MPS, mp_config_override={}): for mp_type in add_mp_types: register_mp(id, mp_wrapper, mp_type, mp_config_override.get(mp_type, {})) From ae1033a18c4697ecc2c54d1f2c156837b11f2a48 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 29 Jul 2023 11:26:48 +0200 Subject: [PATCH 105/198] Remember mp-envs for each ns seperately (replicate legacy functionality) --- fancy_gym/envs/registry.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 3696fed..9cf2135 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -3,14 +3,15 @@ from typing import Tuple, Union import copy import importlib import numpy as np +from collections import defaultdict + from fancy_gym.utils.make_env_helpers import make_bb from fancy_gym.utils.utils import nested_update +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from gymnasium import register as gym_register from gymnasium import make as gym_make -from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper - class DefaultMPWrapper(RawInterfaceWrapper): @property @@ -104,6 +105,7 @@ _BB_DEFAULTS = { KNOWN_MPS = list(_BB_DEFAULTS.keys()) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in KNOWN_MPS} +FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS = defaultdict(lambda: {mp_type: [] for mp_type in KNOWN_MPS}) def register( @@ -152,9 +154,19 @@ def register_mps(id, mp_wrapper, add_mp_types=KNOWN_MPS, mp_config_override={}): def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): assert mp_type in KNOWN_MPS, 'Unknown mp_type' assert id not in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' + + parts = id.split('/') + if len(parts) == 1: + ns, name = 'root', parts[0] + elif len(parts) == 2: + ns, name = parts[0], parts[1] + else: + raise ValueError('env id can not contain multiple "/".') + parts = id.split('-') assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' fancy_id = '-'.join(parts[:-1]+[mp_type, parts[-1]]) + gym_register( id=fancy_id, entry_point=bb_env_constructor, @@ -165,7 +177,9 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): '_mp_config_override_register': mp_config_override } ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) + FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns][mp_type].append(fancy_id) def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, _mp_config_override_register={}, **kwargs): From 6b07705a884c958bd14b589f8439d1833eaf77e0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 29 Jul 2023 11:27:58 +0200 Subject: [PATCH 106/198] Fix: fancy_gym.upgrade ignored mp_config_override-arg --- fancy_gym/envs/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 9cf2135..862f24d 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -141,7 +141,7 @@ def upgrade( mp_wrapper=mp_wrapper, register_step_based=False, add_mp_types=add_mp_types, - mp_config_override={}, + mp_config_override=mp_config_override, **kwargs ) From d48836368c816e3ca94a6ade7ec321100bd2d950 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 29 Jul 2023 11:37:03 +0200 Subject: [PATCH 107/198] Print warning when trying to register already registered step-based env --- fancy_gym/envs/registry.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 862f24d..41419ee 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -11,6 +11,7 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from gymnasium import register as gym_register from gymnasium import make as gym_make +from gymnasium.envs.registration import registry as gym_registry class DefaultMPWrapper(RawInterfaceWrapper): @@ -117,6 +118,8 @@ def register( mp_config_override={}, **kwargs ): + if register_step_based and id in gym_registry: + print(f'[Info] Gymnasium env with id "{id}" already exists. You should supply register_step_based=False or use fancy_gym.upgrade if you only want to register mp versions of an existing env.') if register_step_based: assert entry_point != None, 'You need to provide an entry-point, when registering step-based.' if not callable(mp_wrapper): # mp_wrapper can be given as a String (same notation as for entry_point) From e7436630180f9eb7d7c83b2d76c944c4f2f8869d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 17:41:44 +0200 Subject: [PATCH 108/198] Ported dmc envs to mp-config --- fancy_gym/dmc/__init__.py | 248 +++--------------- .../dmc/manipulation/reach_site/mp_wrapper.py | 24 ++ fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py | 19 ++ fancy_gym/dmc/suite/cartpole/mp_wrapper.py | 24 ++ fancy_gym/dmc/suite/reacher/mp_wrapper.py | 20 ++ 5 files changed, 118 insertions(+), 217 deletions(-) diff --git a/fancy_gym/dmc/__init__.py b/fancy_gym/dmc/__init__.py index 5d7466c..28e1a0a 100644 --- a/fancy_gym/dmc/__init__.py +++ b/fancy_gym/dmc/__init__.py @@ -1,247 +1,61 @@ from copy import deepcopy from gymnasium.wrappers import FlattenObservation +from gymnasium.envs.registration import register + +from ..envs.registry import register from . import manipulation, suite -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} - -from gymnasium.envs.registration import register - -DEFAULT_BB_DICT_ProMP = { - "name": 'EnvName', - "wrappers": [FlattenObservation], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'promp' - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear' - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": 50., - "d_gains": 1., - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } -} - -DEFAULT_BB_DICT_DMP = { - "name": 'EnvName', - "wrappers": [FlattenObservation], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'dmp' - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'exp' - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": 50., - "d_gains": 1., - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'rbf', - 'num_basis': 5 - } -} - # DeepMind Control Suite (DMC) -kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_bic_dmp['name'] = f"dm_control/ball_in_cup-catch-v0" -kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper) -# bandwidth_factor=2 -kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1 register( - id=f'dmc_ball_in_cup-catch_dmp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_bic_dmp + id=f"dm_control/ball_in_cup-catch-v0", + register_step_based=False, + mp_wrapper=suite.ball_in_cup.MPWrapper, + add_mp_types=['DMP', 'ProMP'], ) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") -kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_bic_promp['name'] = f"dm_control/ball_in_cup-catch-v0" -kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper) register( - id=f'dmc_ball_in_cup-catch_promp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_bic_promp + id=f"dm_control/reacher-easy-v0", + register_step_based=False, + mp_wrapper=suite.reacher.MPWrapper, + add_mp_types=['DMP', 'ProMP'], ) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") -kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_easy_dmp['name'] = f"dm_control/reacher-easy-v0" -kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper) -# bandwidth_factor=2 -kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -# TODO: weight scale 50, but goal scale 0.1 -kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 register( - id=f'dmc_reacher-easy_dmp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_bic_dmp + id=f"dm_control/reacher-hard-v0", + register_step_based=False, + mp_wrapper=suite.reacher.MPWrapper, + add_mp_types=['DMP', 'ProMP'], ) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") - -kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_easy_promp['name'] = f"dm_control/reacher-easy-v0" -kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper) -kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 -register( - id=f'dmc_reacher-easy_promp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_reacher_easy_promp -) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") - -kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_hard_dmp['name'] = f"dm_control/reacher-hard-v0" -kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper) -# bandwidth_factor = 2 -kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -# TODO: weight scale 50, but goal scale 0.1 -kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 -register( - id=f'dmc_reacher-hard_dmp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_reacher_hard_dmp -) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") - -kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_hard_promp['name'] = f"dm_control/reacher-hard-v0" -kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper) -kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 -register( - id=f'dmc_reacher-hard_promp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_reacher_hard_promp -) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0") _dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] - for _task in _dmc_cartpole_tasks: - _env_id = f'dmc_cartpole-{_task}_dmp-v0' - kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_cartpole_dmp['name'] = f"dm_control/cartpole-{_task}-v0" - kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper) - # bandwidth_factor = 2 - kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2 - # TODO: weight scale 50, but goal scale 0.1 - kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 - kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10 - kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10 register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_cartpole_dmp + id=f'dmc_cartpole-{_task}_dmp-v0', + register_step_based=False, + mp_wrapper=suite.cartpole.MPWrapper, + add_mp_types=['DMP', 'ProMP'], ) - ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - _env_id = f'dmc_cartpole-{_task}_promp-v0' - kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_cartpole_promp['name'] = f"dm_control/cartpole-{_task}-v0" - kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper) - kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10 - kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10 - kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_cartpole_promp - ) - ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole2poles_dmp['name'] = f"dm_control/cartpole-two_poles-v0" -kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) -# bandwidth_factor = 2 -kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -# TODO: weight scale 50, but goal scale 0.1 -kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 -kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10 -kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10 -_env_id = f'dmc_cartpole-two_poles_dmp-v0' register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_cartpole2poles_dmp + id=f"dm_control/cartpole-two_poles-v0", + register_step_based=False, + mp_wrapper=suite.cartpole.TwoPolesMPWrapper, + add_mp_types=['DMP', 'ProMP'], ) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) -kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole2poles_promp['name'] = f"dm_control/cartpole-two_poles-v0" -kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) -kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10 -kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10 -kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 -_env_id = f'dmc_cartpole-two_poles_promp-v0' register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_cartpole2poles_promp + id=f"dm_control/cartpole-three_poles-v0", + register_step_based=False, + mp_wrapper=suite.cartpole.ThreePolesMPWrapper, + add_mp_types=['DMP', 'ProMP'], ) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole3poles_dmp['name'] = f"dm_control/cartpole-three_poles-v0" -kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) -# bandwidth_factor = 2 -kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -# TODO: weight scale 50, but goal scale 0.1 -kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 -kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10 -kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10 -_env_id = f'dmc_cartpole-three_poles_dmp-v0' -register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_cartpole3poles_dmp -) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - -kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole3poles_promp['name'] = f"dm_control/cartpole-three_poles-v0" -kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) -kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10 -kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10 -kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 -_env_id = f'dmc_cartpole-three_poles_promp-v0' -register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_cartpole3poles_promp -) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # DeepMind Manipulation -kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_mani_reach_site_features_dmp['name'] = f"dm_control/reach_site_features-v0" -kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper) -kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -# TODO: weight scale 50, but goal scale 0.1 -kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 -kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity' register( - id=f'dmc_manipulation-reach_site_dmp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_mani_reach_site_features_dmp + id=f"dm_control/reach_site_features-v0", + register_step_based=False, + mp_wrapper=manipulation.reach_site.MPWrapper, + add_mp_types=['DMP', 'ProMP'], ) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") - -kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_mani_reach_site_features_promp['name'] = f"dm_control/reach_site_features-v0" -kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper) -kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 -kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity' -register( - id=f'dmc_manipulation-reach_site_promp-v0', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_mani_reach_site_features_promp -) -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0") diff --git a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py index 908cee1..bc3445a 100644 --- a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py +++ b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py @@ -6,6 +6,30 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'controller_type': 'velocity', + 'p_gains': 50.0, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 0.2, + }, + }, + 'DMP': { + 'controller_kwargs': { + 'controller_type': 'velocity', + 'p_gains': 50.0, + }, + 'phase_generator': { + 'alpha_phase': 2, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 500, + }, + }, + 'ProDMP': {}, + } @property def context_mask(self) -> np.ndarray: diff --git a/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py b/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py index 94f9041..aef9896 100644 --- a/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py +++ b/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py @@ -6,6 +6,25 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'p_gains': 50.0, + }, + }, + 'DMP': { + 'controller_kwargs': { + 'p_gains': 50.0, + }, + 'phase_generator': { + 'alpha_phase': 2, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 10 + }, + }, + 'ProDMP': {}, + } @property def context_mask(self) -> np.ndarray: diff --git a/fancy_gym/dmc/suite/cartpole/mp_wrapper.py b/fancy_gym/dmc/suite/cartpole/mp_wrapper.py index 85afa83..9373cf2 100644 --- a/fancy_gym/dmc/suite/cartpole/mp_wrapper.py +++ b/fancy_gym/dmc/suite/cartpole/mp_wrapper.py @@ -6,6 +6,30 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'p_gains': 10, + 'd_gains': 10, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 0.2, + }, + }, + 'DMP': { + 'controller_kwargs': { + 'p_gains': 10, + 'd_gains': 10, + }, + 'phase_generator': { + 'alpha_phase': 2, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 500, + }, + }, + 'ProDMP': {}, + } def __init__(self, env, n_poles: int = 1): self.n_poles = n_poles diff --git a/fancy_gym/dmc/suite/reacher/mp_wrapper.py b/fancy_gym/dmc/suite/reacher/mp_wrapper.py index 2d0aee5..5fcf5a7 100644 --- a/fancy_gym/dmc/suite/reacher/mp_wrapper.py +++ b/fancy_gym/dmc/suite/reacher/mp_wrapper.py @@ -6,6 +6,26 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + 'controller_kwargs': { + 'p_gains': 50.0, + 'weight_scale': 0.2, + }, + }, + 'DMP': { + 'controller_kwargs': { + 'p_gains': 50.0, + }, + 'phase_generator': { + 'alpha_phase': 2, + }, + 'trajectory_generator_kwargs': { + 'weight_scale': 500, + }, + }, + 'ProDMP': {}, + } @property def context_mask(self) -> np.ndarray: From e63a0a50df1748e24310daca669f17bc43303c8d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 17:42:27 +0200 Subject: [PATCH 109/198] Ported metaworld to mp-config --- fancy_gym/meta/__init__.py | 172 ++------------------ fancy_gym/meta/base_metaworld_mp_wrapper.py | 55 ++++++- 2 files changed, 70 insertions(+), 157 deletions(-) diff --git a/fancy_gym/meta/__init__.py b/fancy_gym/meta/__init__.py index ec07a45..78ec73c 100644 --- a/fancy_gym/meta/__init__.py +++ b/fancy_gym/meta/__init__.py @@ -2,7 +2,7 @@ from typing import Iterable, Type, Union, Optional from copy import deepcopy -from gymnasium import register +from ..envs.registry import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper @@ -14,118 +14,24 @@ metaworld_adapter.register_all_ML1() ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} # MetaWorld - -DEFAULT_BB_DICT_ProMP = { - "name": 'EnvName', - "wrappers": [], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'promp', - 'weights_scale': 10, - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear' - }, - "controller_kwargs": { - 'controller_type': 'metaworld', - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - }, - 'black_box_kwargs': { - 'condition_on_desired': False, - } -} - -DEFAULT_BB_DICT_ProDMP = { - "name": 'EnvName', - "wrappers": [], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'prodmp', - 'auto_scale_basis': True, - 'weights_scale': 10, - # 'goal_scale': 0., - 'disable_goal': True, - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'exp', - # 'alpha_phase' : 3, - }, - "controller_kwargs": { - 'controller_type': 'metaworld', - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'prodmp', - 'num_basis': 5, - 'alpha': 10 - }, - 'black_box_kwargs': { - 'condition_on_desired': False, - } - -} - _goal_change_envs = ["assembly-v2", "pick-out-of-hole-v2", "plate-slide-v2", "plate-slide-back-v2", "plate-slide-side-v2", "plate-slide-back-side-v2"] for _task in _goal_change_envs: - task_id_split = _task.split("-") - name = "".join([s.capitalize() for s in task_id_split[:-1]]) - - # ProMP - _env_id = f'{name}ProMP-{task_id_split[-1]}' - kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_change_promp['name'] = f'metaworld:{_task}' - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_goal_change_promp + id=f'metaworld/{_task}', + register_step_based=False, + mp_wrapper=goal_change_mp_wrapper.MPWrapper, + add_mp_types=['ProMP', 'ProDMP'], ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - # ProDMP - _env_id = f'{name}ProDMP-{task_id_split[-1]}' - kwargs_dict_goal_change_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - kwargs_dict_goal_change_prodmp['wrappers'].append(goal_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_change_prodmp['name'] = f'metaworld:{_task}' - - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_goal_change_prodmp - ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) _object_change_envs = ["bin-picking-v2", "hammer-v2", "sweep-into-v2"] for _task in _object_change_envs: - task_id_split = _task.split("-") - name = "".join([s.capitalize() for s in task_id_split[:-1]]) - - # ProMP - _env_id = f'{name}ProMP-{task_id_split[-1]}' - kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper) - kwargs_dict_object_change_promp['name'] = f'metaworld:{_task}' register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_object_change_promp + id=f'metaworld/{_task}', + register_step_based=False, + mp_wrapper=object_change_mp_wrapper.MPWrapper, + add_mp_types=['ProMP', 'ProDMP'], ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - # ProDMP - _env_id = f'{name}ProDMP-{task_id_split[-1]}' - kwargs_dict_object_change_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - kwargs_dict_object_change_prodmp['wrappers'].append(object_change_mp_wrapper.MPWrapper) - kwargs_dict_object_change_prodmp['name'] = f'metaworld:{_task}' - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_object_change_prodmp - ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) _goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press-wall-v2", "button-press-topdown-v2", "button-press-topdown-wall-v2", "coffee-button-v2", "coffee-pull-v2", @@ -139,62 +45,18 @@ _goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press "shelf-place-v2", "sweep-v2", "window-open-v2", "window-close-v2" ] for _task in _goal_and_object_change_envs: - task_id_split = _task.split("-") - name = "".join([s.capitalize() for s in task_id_split[:-1]]) - - # ProMP - _env_id = f'{name}ProMP-{task_id_split[-1]}' - kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_and_object_change_promp['name'] = f'metaworld:{_task}' - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_goal_and_object_change_promp + id=f'metaworld/{_task}', + register_step_based=False, + mp_wrapper=goal_object_change_mp_wrapper.MPWrapper, + add_mp_types=['ProMP', 'ProDMP'], ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - # ProDMP - _env_id = f'{name}ProDMP-{task_id_split[-1]}' - kwargs_dict_goal_and_object_change_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - kwargs_dict_goal_and_object_change_prodmp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_and_object_change_prodmp['name'] = f'metaworld:{_task}' - - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_goal_and_object_change_prodmp - ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) _goal_and_endeffector_change_envs = ["basketball-v2"] for _task in _goal_and_endeffector_change_envs: - task_id_split = _task.split("-") - name = "".join([s.capitalize() for s in task_id_split[:-1]]) - - # ProMP - _env_id = f'{name}ProMP-{task_id_split[-1]}' - kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_and_endeffector_change_promp['name'] = f'metaworld:{_task}' - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_goal_and_endeffector_change_promp + id=f'metaworld/{_task}', + register_step_based=False, + mp_wrapper=goal_endeffector_change_mp_wrapper.MPWrapper, + add_mp_types=['ProMP', 'ProDMP'], ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - # ProDMP - _env_id = f'{name}ProDMP-{task_id_split[-1]}' - kwargs_dict_goal_and_endeffector_change_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - kwargs_dict_goal_and_endeffector_change_prodmp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_and_endeffector_change_prodmp['name'] = f'metaworld:{_task}' - - register( - id=_env_id, - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_goal_and_endeffector_change_prodmp - ) - ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) diff --git a/fancy_gym/meta/base_metaworld_mp_wrapper.py b/fancy_gym/meta/base_metaworld_mp_wrapper.py index 0f1a9a9..12338fd 100644 --- a/fancy_gym/meta/base_metaworld_mp_wrapper.py +++ b/fancy_gym/meta/base_metaworld_mp_wrapper.py @@ -6,12 +6,63 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class BaseMetaworldMPWrapper(RawInterfaceWrapper): + mp_config = { + 'inherit_defaults': False, + 'ProMP': { + 'wrappers': [], + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'promp', + 'weights_scale': 10, + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + }, + 'controller_kwargs': { + 'controller_type': 'metaworld', + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + }, + 'black_box_kwargs': { + 'condition_on_desired': False, + }, + }, + 'DMP': {}, + 'ProDMP': { + 'wrappers': [], + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'prodmp', + 'auto_scale_basis': True, + 'weights_scale': 10, + # 'goal_scale': 0., + 'disable_goal': True, + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'exp', + # 'alpha_phase' : 3, + }, + 'controller_kwargs': { + 'controller_type': 'metaworld', + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'prodmp', + 'num_basis': 5, + 'alpha': 10 + }, + 'black_box_kwargs': { + 'condition_on_desired': False, + }, + }, + } + @property def current_pos(self) -> Union[float, int, np.ndarray]: - r_close = self.env.data.get_joint_qpos("r_close") + r_close = self.env.data.get_joint_qpos('r_close') return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close]) @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: return np.zeros(4, ) - # raise NotImplementedError("Velocity cannot be retrieved.") + # raise NotImplementedError('Velocity cannot be retrieved.') From bc3f540daf0d8b1c24c0026a76645cc25db1b652 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 17:51:37 +0200 Subject: [PATCH 110/198] weight_scale -> weights_scale --- fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py | 4 ++-- fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py | 2 +- fancy_gym/dmc/suite/cartpole/mp_wrapper.py | 4 ++-- fancy_gym/dmc/suite/reacher/mp_wrapper.py | 4 ++-- fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py | 4 ++-- fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py | 2 +- .../envs/classic_control/viapoint_reacher/mp_wrapper.py | 2 +- fancy_gym/examples/example_replanning_envs.py | 6 ++++-- fancy_gym/examples/examples_movement_primitives.py | 4 ++-- 9 files changed, 17 insertions(+), 15 deletions(-) diff --git a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py index bc3445a..fbfd592 100644 --- a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py +++ b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py @@ -13,7 +13,7 @@ class MPWrapper(RawInterfaceWrapper): 'p_gains': 50.0, }, 'trajectory_generator_kwargs': { - 'weight_scale': 0.2, + 'weights_scale': 0.2, }, }, 'DMP': { @@ -25,7 +25,7 @@ class MPWrapper(RawInterfaceWrapper): 'alpha_phase': 2, }, 'trajectory_generator_kwargs': { - 'weight_scale': 500, + 'weights_scale': 500, }, }, 'ProDMP': {}, diff --git a/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py b/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py index aef9896..4441fb0 100644 --- a/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py +++ b/fancy_gym/dmc/suite/ball_in_cup/mp_wrapper.py @@ -20,7 +20,7 @@ class MPWrapper(RawInterfaceWrapper): 'alpha_phase': 2, }, 'trajectory_generator_kwargs': { - 'weight_scale': 10 + 'weights_scale': 10 }, }, 'ProDMP': {}, diff --git a/fancy_gym/dmc/suite/cartpole/mp_wrapper.py b/fancy_gym/dmc/suite/cartpole/mp_wrapper.py index 9373cf2..d4c8dcc 100644 --- a/fancy_gym/dmc/suite/cartpole/mp_wrapper.py +++ b/fancy_gym/dmc/suite/cartpole/mp_wrapper.py @@ -13,7 +13,7 @@ class MPWrapper(RawInterfaceWrapper): 'd_gains': 10, }, 'trajectory_generator_kwargs': { - 'weight_scale': 0.2, + 'weights_scale': 0.2, }, }, 'DMP': { @@ -25,7 +25,7 @@ class MPWrapper(RawInterfaceWrapper): 'alpha_phase': 2, }, 'trajectory_generator_kwargs': { - 'weight_scale': 500, + 'weights_scale': 500, }, }, 'ProDMP': {}, diff --git a/fancy_gym/dmc/suite/reacher/mp_wrapper.py b/fancy_gym/dmc/suite/reacher/mp_wrapper.py index 5fcf5a7..8741d91 100644 --- a/fancy_gym/dmc/suite/reacher/mp_wrapper.py +++ b/fancy_gym/dmc/suite/reacher/mp_wrapper.py @@ -10,7 +10,7 @@ class MPWrapper(RawInterfaceWrapper): 'ProMP': { 'controller_kwargs': { 'p_gains': 50.0, - 'weight_scale': 0.2, + 'weights_scale': 0.2, }, }, 'DMP': { @@ -21,7 +21,7 @@ class MPWrapper(RawInterfaceWrapper): 'alpha_phase': 2, }, 'trajectory_generator_kwargs': { - 'weight_scale': 500, + 'weights_scale': 500, }, }, 'ProDMP': {}, diff --git a/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py b/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py index c8e6dcc..4c56f87 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py +++ b/fancy_gym/envs/classic_control/hole_reacher/mp_wrapper.py @@ -13,7 +13,7 @@ class MPWrapper(RawInterfaceWrapper): 'controller_type': 'velocity', }, 'trajectory_generator_kwargs': { - 'weight_scale': 2, + 'weights_scale': 2, }, }, 'DMP': { @@ -22,7 +22,7 @@ class MPWrapper(RawInterfaceWrapper): }, 'trajectory_generator_kwargs': { # TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check - 'weight_scale': 500, + 'weights_scale': 500, }, 'phase_generator_kwargs': { 'alpha_phase': 2.5, diff --git a/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py b/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py index 2ee3cd1..d2f90d5 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py +++ b/fancy_gym/envs/classic_control/simple_reacher/mp_wrapper.py @@ -20,7 +20,7 @@ class MPWrapper(RawInterfaceWrapper): 'd_gains': 0.075, }, 'trajectory_generator_kwargs': { - 'weight_scale': 50, + 'weights_scale': 50, }, 'phase_generator_kwargs': { 'alpha_phase': 2, diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py b/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py index c07b651..b915ec0 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/mp_wrapper.py @@ -18,7 +18,7 @@ class MPWrapper(RawInterfaceWrapper): 'controller_type': 'velocity', }, 'trajectory_generator_kwargs': { - 'weight_scale': 50, + 'weights_scale': 50, }, 'phase_generator_kwargs': { 'alpha_phase': 2, diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index 977ce9e..05be6ad 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -1,5 +1,6 @@ import fancy_gym + def example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False): env = fancy_gym.make(env_name, seed=seed) env.reset() @@ -15,6 +16,7 @@ def example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1 env.close() del env + def example_custom_replanning_envs(seed=0, iteration=100, render=True): # id for a step-based environment base_env_id = "BoxPushingDense-v0" @@ -22,7 +24,7 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] trajectory_generator_kwargs = {'trajectory_generator_type': 'prodmp', - 'weight_scale': 1} + 'weights_scale': 1} phase_generator_kwargs = {'phase_generator_type': 'exp'} controller_kwargs = {'controller_type': 'velocity'} basis_generator_kwargs = {'basis_generator_type': 'prodmp', @@ -59,4 +61,4 @@ if __name__ == "__main__": example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False) # run a custom replanning environment - example_custom_replanning_envs(seed=0, iteration=8, render=True) \ No newline at end of file + example_custom_replanning_envs(seed=0, iteration=8, render=True) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 7388b4b..5913774 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -114,7 +114,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # For a ProMP trajectory_generator_kwargs = {'trajectory_generator_type': 'promp', - 'weight_scale': 2} + 'weights_scale': 2} phase_generator_kwargs = {'phase_generator_type': 'linear'} controller_kwargs = {'controller_type': 'velocity'} basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', @@ -124,7 +124,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # # For a DMP # trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp', - # 'weight_scale': 500} + # 'weights_scale': 500} # phase_generator_kwargs = {'phase_generator_type': 'exp', # 'alpha_phase': 2.5} # controller_kwargs = {'controller_type': 'velocity'} From 20d0be3c8d42de4aa8220813ed8d6ff7cb6f4aa5 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 17:56:28 +0200 Subject: [PATCH 111/198] Replicate legacy behavior in exporting lists off all mp envs --- fancy_gym/__init__.py | 14 ++++++-------- fancy_gym/envs/registry.py | 12 ++++++------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index 77f245f..4e62ecf 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -1,12 +1,10 @@ from fancy_gym import dmc, meta, open_ai +from fancy_gym import envs as fancy from fancy_gym.utils.make_env_helpers import make_bb from .envs.registry import register, upgrade -from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS -# Convenience function for all MP environments -from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS -from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS -from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS +from .envs.registry import ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS, MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS -ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = { - key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] - for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['dmc'] +ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['fancy'] +ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['metaworld'] +ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['gym'] diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 41419ee..8016dc2 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -105,8 +105,8 @@ _BB_DEFAULTS = { } KNOWN_MPS = list(_BB_DEFAULTS.keys()) -ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in KNOWN_MPS} -FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS = defaultdict(lambda: {mp_type: [] for mp_type in KNOWN_MPS}) +ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in KNOWN_MPS} +MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS = defaultdict(lambda: {mp_type: [] for mp_type in KNOWN_MPS}) def register( @@ -156,11 +156,11 @@ def register_mps(id, mp_wrapper, add_mp_types=KNOWN_MPS, mp_config_override={}): def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): assert mp_type in KNOWN_MPS, 'Unknown mp_type' - assert id not in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' + assert id not in ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' parts = id.split('/') if len(parts) == 1: - ns, name = 'root', parts[0] + ns, name = 'gym', parts[0] elif len(parts) == 2: ns, name = parts[0], parts[1] else: @@ -181,8 +181,8 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): } ) - ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) - FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns][mp_type].append(fancy_id) + ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) + MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns][mp_type].append(fancy_id) def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, _mp_config_override_register={}, **kwargs): From b4793c89bc2634428139027b09de44c77a4e5475 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 17:59:30 +0200 Subject: [PATCH 112/198] Fix: ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS no longer exists in fancy registry --- fancy_gym/envs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index da4b98f..cabee26 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -2,7 +2,7 @@ from copy import deepcopy import numpy as np from gymnasium import register as gym_register -from .registry import register, ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS +from .registry import register from . import classic_control, mujoco from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv From 5519fd5a344a73ccbc6310e2a15b0cead42405ab Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 18:14:50 +0200 Subject: [PATCH 113/198] Fix: Wrong name for ns: dmc -> dm_controll --- fancy_gym/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index 4e62ecf..b3a6b29 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -4,7 +4,7 @@ from fancy_gym.utils.make_env_helpers import make_bb from .envs.registry import register, upgrade from .envs.registry import ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS, MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['dmc'] +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['dm_controll'] ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['fancy'] ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['metaworld'] ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['gym'] From e30b6c0e0476f8049f7f78ed9e09688aed5ba599 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 18:15:51 +0200 Subject: [PATCH 114/198] Removed old code samples from meta mp docs --- fancy_gym/meta/goal_change_mp_wrapper.py | 13 ------------- .../meta/goal_endeffector_change_mp_wrapper.py | 13 ------------- fancy_gym/meta/goal_object_change_mp_wrapper.py | 13 ------------- 3 files changed, 39 deletions(-) diff --git a/fancy_gym/meta/goal_change_mp_wrapper.py b/fancy_gym/meta/goal_change_mp_wrapper.py index a8eabb5..41cd9be 100644 --- a/fancy_gym/meta/goal_change_mp_wrapper.py +++ b/fancy_gym/meta/goal_change_mp_wrapper.py @@ -9,19 +9,6 @@ class MPWrapper(BaseMetaworldMPWrapper): and no secondary objects or end effectors are altered at the start of an episode. You can verify this by executing the code below for your environment id and check if the output is non-zero at the same indices. - ```python - import fancy_gym - env = fancy_gym.make(env_id, 1) - print(env.reset() - env.reset()) - array([ 0. , 0. , 0. , 0. , 0, - 0 , 0 , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0 , 0 , 0 , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , !=0 , !=0 , !=0]) - ``` """ @property diff --git a/fancy_gym/meta/goal_endeffector_change_mp_wrapper.py b/fancy_gym/meta/goal_endeffector_change_mp_wrapper.py index c299597..ec89702 100644 --- a/fancy_gym/meta/goal_endeffector_change_mp_wrapper.py +++ b/fancy_gym/meta/goal_endeffector_change_mp_wrapper.py @@ -9,19 +9,6 @@ class MPWrapper(BaseMetaworldMPWrapper): and no secondary objects or end effectors are altered at the start of an episode. You can verify this by executing the code below for your environment id and check if the output is non-zero at the same indices. - ```python - import fancy_gym - env = fancy_gym.make(env_id, 1) - print(env.reset() - env.reset()) - array([ !=0 , !=0 , !=0 , 0. , 0., - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , !=0 , !=0 , - !=0 , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , !=0 , !=0 , !=0]) - ``` """ @property diff --git a/fancy_gym/meta/goal_object_change_mp_wrapper.py b/fancy_gym/meta/goal_object_change_mp_wrapper.py index ae667a6..b42f142 100644 --- a/fancy_gym/meta/goal_object_change_mp_wrapper.py +++ b/fancy_gym/meta/goal_object_change_mp_wrapper.py @@ -9,19 +9,6 @@ class MPWrapper(BaseMetaworldMPWrapper): and no secondary objects or end effectors are altered at the start of an episode. You can verify this by executing the code below for your environment id and check if the output is non-zero at the same indices. - ```python - import fancy_gym - env = fancy_gym.make(env_id, 1) - print(env.reset() - env.reset()) - array([ 0. , 0. , 0. , 0. , !=0, - !=0 , !=0 , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , !=0 , !=0 , !=0 , - 0. , 0. , 0. , 0. , 0. , - 0. , 0. , 0. , 0. , 0. , - 0. , !=0 , !=0 , !=0]) - ``` """ @property From 9c6d7956ce5116400ec33af2b9e2baf9a84afab0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 18:16:19 +0200 Subject: [PATCH 115/198] Extended Tests: Ensure no known namespace is empty --- test/test_fancy_registry.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py index e67f5df..961d3b6 100644 --- a/test/test_fancy_registry.py +++ b/test/test_fancy_registry.py @@ -10,6 +10,7 @@ import fancy_gym from fancy_gym import register ENV_IDS = ['fancy/Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] +KNOWN_NS = ['dm_controll', 'fancy', 'metaworld', 'gym'] class Object(object): @@ -65,3 +66,8 @@ def test_make_mp_toy(mp_type: str): fancy_id = '-'.join(['toy2', mp_type, 'v0']) make(fancy_id) + + +@pytest.mark.parametrize('ns', KNOWN_NS) +def test_ns_nonempty(ns): + assert len(fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns]), f'The namespace {ns} is empty even though, it should not be...' From 9020eb92ea78a56b8174febd7da08f30c5f00df3 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 18:16:47 +0200 Subject: [PATCH 116/198] Fix: test/test_replanning_sequencing.py still referenced old fancy_gym.make (No longer exists as we now just use gym.make) --- test/test_replanning_sequencing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 001028e..24f7a12 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -5,7 +5,7 @@ from typing import Tuple, Type, Union, Optional import gymnasium as gym import numpy as np import pytest -from gymnasium import register +from gymnasium import register, make from gymnasium.core import ActType, ObsType from gymnasium import spaces @@ -68,7 +68,7 @@ def setup(): def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]], add_time_aware_wrapper_before: bool): env_id, wrapper_class = env_wrap - env_step = TimeAwareObservation(ensure_finite_time(fancy_gym.make(env_id, SEED), MAX_STEPS_FALLBACK)) + env_step = TimeAwareObservation(ensure_finite_time(make(env_id, SEED), MAX_STEPS_FALLBACK)) wrappers = [wrapper_class] # has time aware wrapper @@ -117,7 +117,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]], add_time_aware_wrapper_before: bool, replanning_time: int): env_id, wrapper_class = env_wrap - env_step = TimeAwareObservation(ensure_finite_time(fancy_gym.make(env_id, SEED), MAX_STEPS_FALLBACK)) + env_step = TimeAwareObservation(ensure_finite_time(make(env_id, SEED), MAX_STEPS_FALLBACK)) wrappers = [wrapper_class] # has time aware wrapper From fb8f81afeaceaa5653a675931592b69879ede976 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 18:26:45 +0200 Subject: [PATCH 117/198] Don't use defaultdicts for MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS (is ugly when exporting) --- fancy_gym/envs/registry.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 8016dc2..83ca1ac 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -105,8 +105,9 @@ _BB_DEFAULTS = { } KNOWN_MPS = list(_BB_DEFAULTS.keys()) -ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in KNOWN_MPS} -MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS = defaultdict(lambda: {mp_type: [] for mp_type in KNOWN_MPS}) +_KNOWN_MPS_PLUS_ALL = KNOWN_MPS + ['all'] +ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {mp_type: [] for mp_type in _KNOWN_MPS_PLUS_ALL} +MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS = {} def register( @@ -182,7 +183,11 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): ) ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type].append(fancy_id) + ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'].append(fancy_id) + if ns not in MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS: + MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns] = {mp_type: [] for mp_type in _KNOWN_MPS_PLUS_ALL} MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns][mp_type].append(fancy_id) + MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns]['all'].append(fancy_id) def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, _mp_config_override_register={}, **kwargs): From caf483a23b7907b8371dd6bb65702381d5fdc96c Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 18:31:50 +0200 Subject: [PATCH 118/198] Fixed Typo dm_controll -> dm_control --- fancy_gym/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index b3a6b29..c1155fb 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -4,7 +4,7 @@ from fancy_gym.utils.make_env_helpers import make_bb from .envs.registry import register, upgrade from .envs.registry import ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS, MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS -ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['dm_controll'] +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['dm_control'] ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['fancy'] ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['metaworld'] ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['gym'] From 221e3fee5ac63f4bf614a3091ab14e502544eafb Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 18:34:27 +0200 Subject: [PATCH 119/198] Fix: Some tests omitted new ns 'fancy' when trying to access our envs --- test/test_black_box.py | 2 +- test/test_replanning_sequencing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index 76bd73e..fd9da4f 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -13,7 +13,7 @@ from fancy_gym.utils.wrappers import TimeAwareObservation from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 -ENV_IDS = ['Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] +ENV_IDS = ['fancy/Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 24f7a12..a6c3008 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -16,7 +16,7 @@ from fancy_gym.utils.make_env_helpers import ensure_finite_time from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 -ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] +ENV_IDS = ['fancy/Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) From 14223c56baee10f8725d986e74e07827a3d4dd57 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 30 Jul 2023 19:32:32 +0200 Subject: [PATCH 120/198] Fix: Controllers not getting kwargs --- fancy_gym/black_box/factory/controller_factory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fancy_gym/black_box/factory/controller_factory.py b/fancy_gym/black_box/factory/controller_factory.py index 8b2d865..7a4bc34 100644 --- a/fancy_gym/black_box/factory/controller_factory.py +++ b/fancy_gym/black_box/factory/controller_factory.py @@ -11,11 +11,11 @@ def get_controller(controller_type: str, **kwargs): if controller_type == "motor": return PDController(**kwargs) elif controller_type == "velocity": - return VelController() + return VelController(**kwargs) elif controller_type == "position": - return PosController() + return PosController(**kwargs) elif controller_type == "metaworld": - return MetaWorldController() + return MetaWorldController(**kwargs) else: raise ValueError(f"Specified controller type {controller_type} not supported, " f"please choose one of {ALL_TYPES}.") From fb1282fe1e1ff20f008bdcbe8ca02db62fad19f9 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 14 Aug 2023 10:51:43 +0200 Subject: [PATCH 121/198] Use new namespace convention for mp-versions of envs --- fancy_gym/envs/registry.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 83ca1ac..0f7d65d 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -167,9 +167,11 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): else: raise ValueError('env id can not contain multiple "/".') - parts = id.split('-') + parts = name.split('-') assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' - fancy_id = '-'.join(parts[:-1]+[mp_type, parts[-1]]) + fancy_name = '-'.join(parts[:-1]+[mp_type, parts[-1]]) + + fancy_id = f'{ns}_{mp_type}/{fancy_name}' gym_register( id=fancy_id, From eefcbcb0f0f68e3df47543ed18e49c3889fbcfa4 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 14 Aug 2023 11:57:06 +0200 Subject: [PATCH 122/198] tiny fix to env references in test --- test/test_replanning_sequencing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index a6c3008..0563a76 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -16,7 +16,7 @@ from fancy_gym.utils.make_env_helpers import ensure_finite_time from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 -ENV_IDS = ['fancy/Reacher5d-v0', 'dmc:ball_in_cup-catch-v0', 'metaworld:reach-v2', 'Reacher-v2'] +ENV_IDS = ['fancy/Reacher5d-v0', 'dmc/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) From 14d545acee4cd180ee30d274ce52d3955d126b1e Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 14 Aug 2023 16:08:13 +0200 Subject: [PATCH 123/198] Fixed: Name generation wrong for mp envs --- fancy_gym/envs/registry.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 0f7d65d..58a637f 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -169,9 +169,8 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): parts = name.split('-') assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' - fancy_name = '-'.join(parts[:-1]+[mp_type, parts[-1]]) - fancy_id = f'{ns}_{mp_type}/{fancy_name}' + fancy_id = f'{ns}_{mp_type}/{name}' gym_register( id=fancy_id, From 1fb5368cc24ee617ab99f0df6d03ea4cc958a201 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 14 Aug 2023 16:08:32 +0200 Subject: [PATCH 124/198] Fix: Multiple issues in test/test_fancy_registry.py --- test/test_fancy_registry.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py index 961d3b6..aad076b 100644 --- a/test/test_fancy_registry.py +++ b/test/test_fancy_registry.py @@ -9,8 +9,7 @@ from gymnasium.core import ActType, ObsType import fancy_gym from fancy_gym import register -ENV_IDS = ['fancy/Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] -KNOWN_NS = ['dm_controll', 'fancy', 'metaworld', 'gym'] +KNOWN_NS = ['dm_control', 'fancy', 'metaworld', 'gym'] class Object(object): @@ -41,33 +40,39 @@ class ToyEnv(gym.Env): @pytest.fixture(scope="session", autouse=True) def setup(): register( - id=f'toy2-v0', + id=f'dummy/toy2-v0', entry_point='test.test_black_box:ToyEnv', max_episode_steps=50, ) -@pytest.mark.parametrize('env_id', ENV_IDS) +@pytest.mark.parametrize('env_id', ['dummy/toy2-v0']) @pytest.mark.parametrize('mp_type', ['ProMP', 'DMP', 'ProDMP']) def test_make_mp(env_id: str, mp_type: str): - parts = env_id.split('-') - assert len(parts) >= 2 and parts[-1].startswith('v'), 'Malformed env id, must end in -v{int}.' - fancy_id = '-'.join(parts[:-1]+[mp_type, parts[-1]]) + parts = env_id.split('/') + if len(parts) == 1: + ns, name = 'gym', parts[0] + elif len(parts) == 2: + ns, name = parts[0], parts[1] + else: + raise ValueError('env id can not contain multiple "/".') + + fancy_id = f'{ns}_{mp_type}/{name}' make(fancy_id) def test_make_raw_toy(): - make('toy2-v0') + make('dummy/toy2-v0') @pytest.mark.parametrize('mp_type', ['ProMP', 'DMP', 'ProDMP']) def test_make_mp_toy(mp_type: str): - fancy_id = '-'.join(['toy2', mp_type, 'v0']) + fancy_id = f'dummy_{mp_type}/toy2-v0' make(fancy_id) @pytest.mark.parametrize('ns', KNOWN_NS) def test_ns_nonempty(ns): - assert len(fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns]), f'The namespace {ns} is empty even though, it should not be...' + assert len(fancy_gym.MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns]), f'The namespace {ns} is empty even though, it should not be...' From f3ffa714cbd0a86221951367eae7108b1f9d708a Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 14 Aug 2023 16:09:28 +0200 Subject: [PATCH 125/198] Cleaning up test/test_dmc_envs.py --- test/test_dmc_envs.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py index 3888f59..7a96d94 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc_envs.py @@ -7,47 +7,31 @@ import pytest import fancy_gym from test.utils import run_env, run_env_determinism -# SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] -# MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] -DM_CONTROL_IDS = [spec.id for spec in gym.envs.registry.values() if +DMC_IDS = [spec.id for spec in gym.envs.registry.values() if spec.id.startswith('dm_control/') and 'compatibility-env-v0' not in spec.id and 'lqr-lqr' not in spec.id] -DM_control_MP_IDS = list(chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) +DMC_MP_IDS = list(chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) SEED = 1 -@pytest.mark.parametrize('env_id', DM_CONTROL_IDS) +@pytest.mark.parametrize('env_id', DMC_IDS) def test_step_dm_control_functionality(env_id: str): """Tests that suite step environments run without errors using random actions.""" run_env(env_id, 5000, wrappers=[gym.wrappers.FlattenObservation]) -@pytest.mark.parametrize('env_id', DM_CONTROL_IDS) +@pytest.mark.parametrize('env_id', DMC_IDS) def test_step_dm_control_determinism(env_id: str): """Tests that for step environments identical seeds produce identical trajectories.""" run_env_determinism(env_id, SEED, 5000, wrappers=[gym.wrappers.FlattenObservation]) - -# @pytest.mark.parametrize('env_id', MANIPULATION_IDS) -# def test_step_manipulation_functionality(env_id: str): -# """Tests that manipulation step environments run without errors using random actions.""" -# run_env(env_id) -# -# -# @pytest.mark.parametrize('env_id', MANIPULATION_IDS) -# def test_step_manipulation_determinism(env_id: str): -# """Tests that for step environments identical seeds produce identical trajectories.""" -# run_env_determinism(env_id, SEED) - - -@pytest.mark.parametrize('env_id', DM_control_MP_IDS) +@pytest.mark.parametrize('env_id', DMC_MP_IDS) def test_bb_dmc_functionality(env_id: str): """Tests that black box environments run without errors using random actions.""" run_env(env_id) - -@pytest.mark.parametrize('env_id', DM_control_MP_IDS) +@pytest.mark.parametrize('env_id', DMC_MP_IDS) def test_bb_dmc_determinism(env_id: str): """Tests that for black box environment identical seeds produce identical trajectories.""" run_env_determinism(env_id, SEED) From 5b99227fac7aa780b246cc4ff4457729237bc49b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 14 Aug 2023 16:45:33 +0200 Subject: [PATCH 126/198] Moving the ugly_mitigation_for_metaworld_bug into the metaworld env wrapper --- fancy_gym/meta/metaworld_adapter.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py index b0dda4d..6f5859f 100644 --- a/fancy_gym/meta/metaworld_adapter.py +++ b/fancy_gym/meta/metaworld_adapter.py @@ -24,7 +24,7 @@ class MujocoMapSpacesWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): gym.Wrapper.__init__(self, env) eos = env.observation_space - eas = env.observation_space + eas = env.action_space Obs_Space_Class = getattr(gym.spaces, str(eos.__class__).split("'")[1].split('.')[-1]) Act_Space_Class = getattr(gym.spaces, str(eas.__class__).split("'")[1].split('.')[-1]) @@ -33,6 +33,23 @@ class MujocoMapSpacesWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): self.action_space = Act_Space_Class(low=eas.low, high=eas.high, dtype=eas.dtype) +class MitigateMetaworldBug(gym.Wrapper, gym.utils.RecordConstructorArgs): + def __init__(self, env: gym.Env): + gym.utils.RecordConstructorArgs.__init__(self) + gym.Wrapper.__init__(self, env) + + def reset(self, **kwargs): + ret = self.env.reset(**kwargs) + head = self.env + try: + for i in range(16): + head.curr_path_length = 0 + head = head.env + except: + pass + return ret + + def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] = None, **kwargs): if underlying_id not in metaworld.ML1.ENV_NAMES: raise ValueError(f'Specified environment "{underlying_id}" not present in metaworld ML1.') @@ -60,6 +77,8 @@ def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld env = gym.make(gym_id, disable_env_checker=True) env = MujocoMapSpacesWrapper(env) + # TODO remove, when this has been fixed upstream + env = MitigateMetaworldBug(env) return env From 78823d95b69110cea8c08d29e3e160ba8328cbac Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 14 Aug 2023 16:47:05 +0200 Subject: [PATCH 127/198] Various fixed to tests --- test/test_black_box.py | 6 ++---- test/test_dmc_envs.py | 10 ++++++---- test/test_fancy_envs.py | 2 +- test/test_gym_envs.py | 2 +- test/test_metaworld_envs.py | 2 +- test/test_replanning_sequencing.py | 8 +++----- test/utils.py | 10 ---------- 7 files changed, 14 insertions(+), 26 deletions(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index fd9da4f..8cdc543 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -10,13 +10,12 @@ from gymnasium.core import ActType, ObsType import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.utils.wrappers import TimeAwareObservation -from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 ENV_IDS = ['fancy/Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] -ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) +ALL_MP_ENVS = fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] MAX_STEPS_FALLBACK = 100 @@ -129,7 +128,7 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): for i in range(5): env.reset(seed=SEED) - ugly_hack_to_mitigate_metaworld_bug(env) # TODO: Remove, when metaworld fixed it upstream + _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) length = info['trajectory_length'] @@ -336,7 +335,6 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): for i in range(5): if done: env.reset(seed=SEED) - ugly_hack_to_mitigate_metaworld_bug(env) action = env.action_space.sample() action[0] = tau action[1] = delay diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py index 7a96d94..3602da6 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc_envs.py @@ -8,10 +8,10 @@ import fancy_gym from test.utils import run_env, run_env_determinism DMC_IDS = [spec.id for spec in gym.envs.registry.values() if - spec.id.startswith('dm_control/') - and 'compatibility-env-v0' not in spec.id - and 'lqr-lqr' not in spec.id] -DMC_MP_IDS = list(chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) + spec.id.startswith('dm_control/') + and 'compatibility-env-v0' not in spec.id + and 'lqr-lqr' not in spec.id] +DMC_MP_IDS = fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] SEED = 1 @@ -26,11 +26,13 @@ def test_step_dm_control_determinism(env_id: str): """Tests that for step environments identical seeds produce identical trajectories.""" run_env_determinism(env_id, SEED, 5000, wrappers=[gym.wrappers.FlattenObservation]) + @pytest.mark.parametrize('env_id', DMC_MP_IDS) def test_bb_dmc_functionality(env_id: str): """Tests that black box environments run without errors using random actions.""" run_env(env_id) + @pytest.mark.parametrize('env_id', DMC_MP_IDS) def test_bb_dmc_determinism(env_id: str): """Tests that for black box environment identical seeds produce identical trajectories.""" diff --git a/test/test_fancy_envs.py b/test/test_fancy_envs.py index 898cc08..a15c837 100644 --- a/test/test_fancy_envs.py +++ b/test/test_fancy_envs.py @@ -10,7 +10,7 @@ from test.utils import run_env, run_env_determinism CUSTOM_IDS = [id for id, spec in gym.envs.registry.items() if not isinstance(spec.entry_point, Callable) and "fancy_gym" in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] -CUSTOM_MP_IDS = list(chain(*fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) +CUSTOM_MP_IDS = fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] SEED = 1 diff --git a/test/test_gym_envs.py b/test/test_gym_envs.py index 76b5c85..5e50d53 100644 --- a/test/test_gym_envs.py +++ b/test/test_gym_envs.py @@ -14,7 +14,7 @@ GYM_IDS = [spec.id for spec in gym.envs.registry.values() if and 'jax' not in spec.id.lower() and not re.match(r'GymV2.Environment', spec.id) ] -GYM_MP_IDS = list(chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) +GYM_MP_IDS = fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] SEED = 1 diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py index 55de621..18b922f 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_metaworld_envs.py @@ -8,7 +8,7 @@ from test.utils import run_env, run_env_determinism METAWORLD_IDS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] -METAWORLD_MP_IDS = list(chain(*fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())) +METAWORLD_MP_IDS = fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] SEED = 1 diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 0563a76..c2edf42 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -13,13 +13,12 @@ import fancy_gym from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.utils.wrappers import TimeAwareObservation from fancy_gym.utils.make_env_helpers import ensure_finite_time -from test.utils import ugly_hack_to_mitigate_metaworld_bug SEED = 1 -ENV_IDS = ['fancy/Reacher5d-v0', 'dmc/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] +ENV_IDS = ['fancy/Reacher5d-v0', 'dm_control/ball_in_cup-catch-v0', 'metaworld/reach-v2', 'Reacher-v2'] WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in_cup.MPWrapper, fancy_gym.meta.goal_object_change_mp_wrapper.MPWrapper, fancy_gym.open_ai.mujoco.reacher_v2.MPWrapper] -ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) +ALL_MP_ENVS = fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] MAX_STEPS_FALLBACK = 50 @@ -94,7 +93,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter for i in range(25): if done: env.reset(seed=SEED) - ugly_hack_to_mitigate_metaworld_bug(env) # TODO: Remove, when metaworld fixed it upstream + action = env.action_space.sample() _obs, _reward, terminated, truncated, info = env.step(action) done = terminated or truncated @@ -159,7 +158,6 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra print(done, (i + 1), episode_steps) assert (i + 1) % episode_steps == 0 env.reset(seed=SEED) - ugly_hack_to_mitigate_metaworld_bug(env) # TODO: Remove, when metaworld fixed it upstream assert replanning_schedule(None, None, None, None, length) diff --git a/test/utils.py b/test/utils.py index 8f92bbd..01e33fe 100644 --- a/test/utils.py +++ b/test/utils.py @@ -100,13 +100,3 @@ def verify_reward(reward): def verify_done(done): assert isinstance( done, bool), f"Returned {done} as done flag, expected bool." - - -def ugly_hack_to_mitigate_metaworld_bug(env): - head = env - try: - for i in range(16): - head.curr_path_length = 0 - head = head.env - except: - pass From 94c4397fac335156280477430efa036723d2a483 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 21 Aug 2023 11:57:58 +0200 Subject: [PATCH 128/198] Fixed incorrect ns seperator for metaworld tests --- test/test_metaworld_envs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py index 18b922f..a4fea03 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_metaworld_envs.py @@ -6,7 +6,7 @@ from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE import fancy_gym from test.utils import run_env, run_env_determinism -METAWORLD_IDS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in +METAWORLD_IDS = [f'metaworld/{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] METAWORLD_MP_IDS = fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] SEED = 1 From 6d80201a0335d9e9bfdd196d197894b30d4d0900 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 21 Aug 2023 11:58:29 +0200 Subject: [PATCH 129/198] Simplified metaworld adapter --- fancy_gym/meta/metaworld_adapter.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py index 6f5859f..a898a93 100644 --- a/fancy_gym/meta/metaworld_adapter.py +++ b/fancy_gym/meta/metaworld_adapter.py @@ -65,17 +65,17 @@ def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] # TODO remove this as soon as there is support for the new API _env = EnvCompatibility(_env, render_mode) + env = _env - gym_id = '_metaworld_compat_' + uuid.uuid4().hex + '-v0' - - gym_register( - id=gym_id, - entry_point=lambda: _env, - max_episode_steps=max_episode_steps, - ) + # gym_id = '_metaworld_compat_' + uuid.uuid4().hex + '-v0' + # gym_register( + # id=gym_id, + # entry_point=lambda: _env, + # max_episode_steps=max_episode_steps, + # ) # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld - env = gym.make(gym_id, disable_env_checker=True) + # env = gym.make(gym_id, disable_env_checker=True) env = MujocoMapSpacesWrapper(env) # TODO remove, when this has been fixed upstream env = MitigateMetaworldBug(env) From 15e1bdc218cae39fe6272a421ce207c70cd4e2ed Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 16:18:18 +0200 Subject: [PATCH 130/198] Mitigation: Allow seeding Metaworld on reset --- fancy_gym/meta/metaworld_adapter.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py index a898a93..f7f3251 100644 --- a/fancy_gym/meta/metaworld_adapter.py +++ b/fancy_gym/meta/metaworld_adapter.py @@ -50,6 +50,18 @@ class MitigateMetaworldBug(gym.Wrapper, gym.utils.RecordConstructorArgs): return ret +class MetaworldResetFix(gym.Wrapper, gym.utils.RecordConstructorArgs): + def __init__(self, env: gym.Env): + gym.utils.RecordConstructorArgs.__init__(self) + gym.Wrapper.__init__(self, env) + + def reset(self, **kwargs): + ret = self.env.reset(**kwargs) + if 'seed' in kwargs: + self.env.seed(kwargs['seed']) + return ret + + def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] = None, **kwargs): if underlying_id not in metaworld.ML1.ENV_NAMES: raise ValueError(f'Specified environment "{underlying_id}" not present in metaworld ML1.') @@ -79,6 +91,7 @@ def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] env = MujocoMapSpacesWrapper(env) # TODO remove, when this has been fixed upstream env = MitigateMetaworldBug(env) + env = MetaworldResetFix(env) return env From a4e28837bb24b44c1724599d144d29303af7b180 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 17:34:46 +0200 Subject: [PATCH 131/198] Fix: Incorrect Controller Type in mp_config for dmc reach_site --- fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py index fbfd592..0eaf8b9 100644 --- a/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py +++ b/fancy_gym/dmc/manipulation/reach_site/mp_wrapper.py @@ -9,7 +9,6 @@ class MPWrapper(RawInterfaceWrapper): mp_config = { 'ProMP': { 'controller_kwargs': { - 'controller_type': 'velocity', 'p_gains': 50.0, }, 'trajectory_generator_kwargs': { @@ -18,7 +17,6 @@ class MPWrapper(RawInterfaceWrapper): }, 'DMP': { 'controller_kwargs': { - 'controller_type': 'velocity', 'p_gains': 50.0, }, 'phase_generator': { From 8b3d05aaafaebf4db588294bf489638dc2c547f0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 17:35:30 +0200 Subject: [PATCH 132/198] Fix Typo: weights_scale should apply to traj_gen, not controller --- fancy_gym/dmc/suite/reacher/mp_wrapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fancy_gym/dmc/suite/reacher/mp_wrapper.py b/fancy_gym/dmc/suite/reacher/mp_wrapper.py index 8741d91..fe40d26 100644 --- a/fancy_gym/dmc/suite/reacher/mp_wrapper.py +++ b/fancy_gym/dmc/suite/reacher/mp_wrapper.py @@ -10,6 +10,8 @@ class MPWrapper(RawInterfaceWrapper): 'ProMP': { 'controller_kwargs': { 'p_gains': 50.0, + }, + 'trajectory_generator_kwargs': { 'weights_scale': 0.2, }, }, From 20b1b0ccac1a6f72c06d17da2cf14135721e6172 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 17:36:17 +0200 Subject: [PATCH 133/198] Quickfix for mp_config merging and allow defining different base_id for upgrades --- fancy_gym/envs/__init__.py | 8 +++---- fancy_gym/envs/registry.py | 46 +++++++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index cabee26..9a3ccdc 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -2,7 +2,7 @@ from copy import deepcopy import numpy as np from gymnasium import register as gym_register -from .registry import register +from .registry import register, upgrade from . import classic_control, mujoco from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv @@ -213,12 +213,10 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, ) - register( + upgrade( id='fancy/BoxPushing{}Replan-v0'.format(reward_type), - entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), + base_id='fancy/BoxPushing{}-v0'.format(reward_type), mp_wrapper=mujoco.box_pushing.ReplanMPWrapper, - register_step_based=False, - max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, ) # Table Tennis environments diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 58a637f..5176699 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -5,8 +5,10 @@ import importlib import numpy as np from collections import defaultdict +from collections.abc import Mapping, MutableMapping + from fancy_gym.utils.make_env_helpers import make_bb -from fancy_gym.utils.utils import nested_update +# from fancy_gym.utils.utils import nested_update from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from gymnasium import register as gym_register @@ -129,33 +131,27 @@ def register( mp_wrapper = getattr(mod, attr_name) if register_step_based: gym_register(id=id, entry_point=entry_point, **kwargs) - register_mps(id, mp_wrapper, add_mp_types, mp_config_override) + upgrade(id, mp_wrapper, add_mp_types, mp_config_override) def upgrade( id, mp_wrapper=DefaultMPWrapper, add_mp_types=KNOWN_MPS, + base_id=None, mp_config_override={}, - **kwargs ): - register( - id, - entry_point=None, - mp_wrapper=mp_wrapper, - register_step_based=False, - add_mp_types=add_mp_types, - mp_config_override=mp_config_override, - **kwargs - ) + if not base_id: + base_id = id + register_mps(id, base_id, mp_wrapper, add_mp_types, mp_config_override) -def register_mps(id, mp_wrapper, add_mp_types=KNOWN_MPS, mp_config_override={}): +def register_mps(id, base_id, mp_wrapper, add_mp_types=KNOWN_MPS, mp_config_override={}): for mp_type in add_mp_types: - register_mp(id, mp_wrapper, mp_type, mp_config_override.get(mp_type, {})) + register_mp(id, base_id, mp_wrapper, mp_type, mp_config_override.get(mp_type, {})) -def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): +def register_mp(id, base_id, mp_wrapper, mp_type, mp_config_override={}): assert mp_type in KNOWN_MPS, 'Unknown mp_type' assert id not in ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' @@ -176,7 +172,7 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): id=fancy_id, entry_point=bb_env_constructor, kwargs={ - 'underlying_id': id, + 'underlying_id': base_id, 'mp_wrapper': mp_wrapper, 'mp_type': mp_type, '_mp_config_override_register': mp_config_override @@ -190,6 +186,24 @@ def register_mp(id, mp_wrapper, mp_type, mp_config_override={}): MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns][mp_type].append(fancy_id) MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns]['all'].append(fancy_id) +# TODO: Apply inherit_defaults: False to appropiate places and remove this... + + +def nested_update(base: MutableMapping, update): + """ + Updated method for nested Mappings + Args: + base: main Mapping to be updated + update: updated values for base Mapping + + """ + if any([item.endswith('_type') for item in update]): + base = update + return base + for k, v in update.items(): + base[k] = nested_update(base.get(k, {}), v) if isinstance(v, Mapping) else v + return base + def bb_env_constructor(underlying_id, mp_wrapper, mp_type, mp_config_override={}, _mp_config_override_register={}, **kwargs): raw_underlying_env = gym_make(underlying_id, **kwargs) From 2c0c4e5508650b2a89a94f58e375085e6bc2d0de Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 17:37:03 +0200 Subject: [PATCH 134/198] Fix Typo: alpha_phase belongs to phase_gen not basis_gen --- fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py index 03121f9..c2c5637 100644 --- a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py @@ -61,6 +61,8 @@ class ReplanMPWrapper(MPWrapper): 'basis_generator_kwargs': { 'num_basis': 5, 'basis_bandwidth_factor': 3, + }, + 'phase_generator_kwargs': { 'alpha_phase': 3, }, 'black_box_kwargs': { From 22a72fed2ff5459c4b4ea101b6126919b2b30495 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 17:37:34 +0200 Subject: [PATCH 135/198] Fix: Seeding of act-space is not guaranteed, seed it manually since we depend on it... --- test/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/utils.py b/test/utils.py index 01e33fe..427622d 100644 --- a/test/utils.py +++ b/test/utils.py @@ -31,11 +31,12 @@ def run_env(env_id: str, iterations: int = None, seed: int = 0, wrappers: List[T terminations = [] truncations = [] obs, _ = env.reset(seed=seed) + env.action_space.seed(seed) verify_observations(obs, env.observation_space, "reset()") iterations = iterations or (env.spec.max_episode_steps or 1) - # number of samples(multiple environment steps) + # number of samples (multiple environment steps) for i in range(iterations): observations.append(obs) From 5921e0008b7e91f86a8c7a3456dc77f7b891daa8 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 17:57:24 +0200 Subject: [PATCH 136/198] Mitigation: Metaworld .reset ignores seeds; we must manually seed on reset. --- fancy_gym/meta/metaworld_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py index f7f3251..ed2b5b6 100644 --- a/fancy_gym/meta/metaworld_adapter.py +++ b/fancy_gym/meta/metaworld_adapter.py @@ -56,10 +56,10 @@ class MetaworldResetFix(gym.Wrapper, gym.utils.RecordConstructorArgs): gym.Wrapper.__init__(self, env) def reset(self, **kwargs): - ret = self.env.reset(**kwargs) + self.env.reset(**kwargs) if 'seed' in kwargs: self.env.seed(kwargs['seed']) - return ret + return self.env.reset(**kwargs) def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] = None, **kwargs): From 07aeb779a70e6e3e49a5e2dbb6cbfae7b32f1c50 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 18:16:16 +0200 Subject: [PATCH 137/198] Fix: Some fancy envs failed determinism test because they sampled during reset before the random gen was seeded --- fancy_gym/envs/mujoco/ant_jump/ant_jump.py | 3 ++- fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py | 3 ++- fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py | 3 ++- fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py | 3 ++- fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py | 3 ++- fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py index 14ab625..ed6bea5 100644 --- a/fancy_gym/envs/mujoco/ant_jump/ant_jump.py +++ b/fancy_gym/envs/mujoco/ant_jump/ant_jump.py @@ -162,8 +162,9 @@ class AntJumpEnv(AntEnvCustomXML): self.current_step = 0 self.max_height = 0 # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE + ret = super().reset(seed=seed, options=options) self.goal = self.np_random.uniform(1.0, 2.5, 1) - return super().reset(seed=seed, options=options) + return ret # reset_model had to be implemented in every env to make it deterministic def reset_model(self): diff --git a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py index 4ef2757..f15a9f4 100644 --- a/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py +++ b/fancy_gym/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py @@ -127,8 +127,9 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML): -> Tuple[ObsType, Dict[str, Any]]: self.max_height = 0 self.current_step = 0 + ret = super().reset(seed=seed, options=options) self.goal = self.np_random.uniform(1.1, 1.6, 1) # 1.1 1.6 - return super().reset(seed=seed, options=options) + return ret # overwrite reset_model to make it deterministic def reset_model(self): diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py index c8c15c3..506344b 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -150,10 +150,11 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML): self.min_distance = 5000 self.current_step = 0 self.hopper_on_box = False + ret = super().reset(seed=seed, options=options) if self.context: self.box_x = self.np_random.uniform(1, 3, 1) self.model.body("box").pos = [self.box_x[0], 0, 0] - return super().reset(seed=seed, options=options) + return ret # overwrite reset_model to make it deterministic def reset_model(self): diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py index 2dd82b2..b5afc8b 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw.py @@ -100,8 +100,9 @@ class HopperThrowEnv(HopperEnvCustomXML): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: self.current_step = 0 + ret = super().reset(seed=seed, options=options) self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1) # 0.5 8.0 - return super().reset(seed=seed, options=options) + return ret # overwrite reset_model to make it deterministic def reset_model(self): diff --git a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index be6b81a..00d1bdb 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/fancy_gym/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -130,10 +130,11 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML): self.current_step = 0 self.ball_in_basket = False + ret = super().reset(seed=seed, options=options) if self.context: self.basket_x = self.np_random.uniform(low=3, high=7, size=1) self.model.body("basket_ground").pos[:] = [self.basket_x[0], 0, 0] - return super().reset(seed=seed, options=options) + return ret # overwrite reset_model to make it deterministic def reset_model(self): diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py index 127719c..6ad2be0 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -152,8 +152,9 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML): -> Tuple[ObsType, Dict[str, Any]]: self.current_step = 0 self.max_height = 0 + ret = super().reset(seed=seed, options=options) self.goal = self.np_random.uniform(1.5, 2.5, 1) # 1.5 3.0 - return super().reset(seed=seed, options=options) + return ret # overwrite reset_model to make it deterministic def reset_model(self): From 820e781a0c8ad467cfb47f2ba8c7bd22a589c9d0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 18:32:11 +0200 Subject: [PATCH 138/198] Fix: Some simple_reacher did not seed correctly --- .../envs/classic_control/simple_reacher/simple_reacher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index 5c63cf8..db7274c 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -45,8 +45,9 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: + ret = super().reset(seed=seed, options=options) self._generate_goal() - return super().reset(seed=seed, options=options) + return ret def _get_reward(self, action: np.ndarray): diff = self.end_effector - self._goal From 155807207fb6398e4beca17e9434f89195ccf926 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 18:38:33 +0200 Subject: [PATCH 139/198] Fix: SimpleReacher and ViaPointReacher did not seed correctly --- .../envs/classic_control/simple_reacher/simple_reacher.py | 8 ++++++-- .../classic_control/viapoint_reacher/viapoint_reacher.py | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index db7274c..3afd021 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -45,9 +45,13 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: - ret = super().reset(seed=seed, options=options) + # Reset twice to ensure we return obs after generating goal and generating goal after executing seeded reset. + # (Env will not behave deterministic otherwise) + # Yes, there is probably a more elegant solution to this problem... self._generate_goal() - return ret + super().reset(seed=seed, options=options) + self._generate_goal() + return super().reset(seed=seed, options=options) def _get_reward(self, action: np.ndarray): diff = self.end_effector - self._goal diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py index febccc7..e4d9091 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -44,6 +44,11 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: + # Reset twice to ensure we return obs after generating goal and generating goal after executing seeded reset. + # (Env will not behave deterministic otherwise) + # Yes, there is probably a more elegant solution to this problem... + self._generate_goal() + super().reset(seed=seed, options=options) self._generate_goal() return super().reset(seed=seed, options=options) From 315e135ff06711b3ec0420ef03fda59e6dc8806d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 10 Sep 2023 09:12:15 +0200 Subject: [PATCH 140/198] Added message about discontinuation of fancy_gym.make --- fancy_gym/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index c1155fb..c406c5b 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -8,3 +8,6 @@ ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['fancy'] ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['metaworld'] ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['gym'] + +def make(*args, **kwargs): + raise Exception('As part of the refactor of Fancy Gym and upgrade to gymnasium the use of fancy_gym.make has been discontinued. Regular gym.make should be used instead. For more details check out the github README. If your codebase was build for older versions of Fancy Gym and relies on the old behavior and dependency versions, please check out the legacy branch.') From 7f95923cf4661a4c8f7f6a25d17e0d3104b7bcc4 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 17:17:20 +0200 Subject: [PATCH 141/198] Fixed typo in env registration --- fancy_gym/dmc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/dmc/__init__.py b/fancy_gym/dmc/__init__.py index 28e1a0a..6e9f38c 100644 --- a/fancy_gym/dmc/__init__.py +++ b/fancy_gym/dmc/__init__.py @@ -32,7 +32,7 @@ register( _dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] for _task in _dmc_cartpole_tasks: register( - id=f'dmc_cartpole-{_task}_dmp-v0', + id=f'dm_control/cartpole-{_task}_dmp-v0', register_step_based=False, mp_wrapper=suite.cartpole.MPWrapper, add_mp_types=['DMP', 'ProMP'], From 0629d1260c6d611b2917f02776fd00f6208a2578 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 17:29:26 +0200 Subject: [PATCH 142/198] Ensure mp_config defined for all envs, even if just using defaults. --- fancy_gym/dmc/suite/reacher/mp_wrapper.py | 2 ++ fancy_gym/envs/mujoco/half_cheetah_jump/mp_wrapper.py | 6 ++++++ fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py | 5 +++++ fancy_gym/envs/mujoco/hopper_throw/mp_wrapper.py | 5 +++++ fancy_gym/envs/mujoco/walker_2d_jump/mp_wrapper.py | 5 +++++ 5 files changed, 23 insertions(+) diff --git a/fancy_gym/dmc/suite/reacher/mp_wrapper.py b/fancy_gym/dmc/suite/reacher/mp_wrapper.py index fe40d26..d713fb6 100644 --- a/fancy_gym/dmc/suite/reacher/mp_wrapper.py +++ b/fancy_gym/dmc/suite/reacher/mp_wrapper.py @@ -10,6 +10,7 @@ class MPWrapper(RawInterfaceWrapper): 'ProMP': { 'controller_kwargs': { 'p_gains': 50.0, + 'd_gains': 1.0, }, 'trajectory_generator_kwargs': { 'weights_scale': 0.2, @@ -18,6 +19,7 @@ class MPWrapper(RawInterfaceWrapper): 'DMP': { 'controller_kwargs': { 'p_gains': 50.0, + 'd_gains': 1.0, }, 'phase_generator': { 'alpha_phase': 2, diff --git a/fancy_gym/envs/mujoco/half_cheetah_jump/mp_wrapper.py b/fancy_gym/envs/mujoco/half_cheetah_jump/mp_wrapper.py index 11b169b..f5f7634 100644 --- a/fancy_gym/envs/mujoco/half_cheetah_jump/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/half_cheetah_jump/mp_wrapper.py @@ -6,6 +6,12 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': {}, + } + @property def context_mask(self) -> np.ndarray: return np.hstack([ diff --git a/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py b/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py index ed95b3d..4faeaad 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/hopper_jump/mp_wrapper.py @@ -6,6 +6,11 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': {}, + } # Random x goal + random init pos @property diff --git a/fancy_gym/envs/mujoco/hopper_throw/mp_wrapper.py b/fancy_gym/envs/mujoco/hopper_throw/mp_wrapper.py index cad680a..03588a2 100644 --- a/fancy_gym/envs/mujoco/hopper_throw/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/hopper_throw/mp_wrapper.py @@ -6,6 +6,11 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': {}, + } @property def context_mask(self): diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/mp_wrapper.py b/fancy_gym/envs/mujoco/walker_2d_jump/mp_wrapper.py index d55e9d2..3dd8c55 100644 --- a/fancy_gym/envs/mujoco/walker_2d_jump/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/walker_2d_jump/mp_wrapper.py @@ -6,6 +6,11 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': {}, + } @property def context_mask(self): From da34db22c82649bfb056ea485786d4e0bbfc7325 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 17:31:41 +0200 Subject: [PATCH 143/198] Fix: mp_config missing for BoxPushing ProDMP --- fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py index c2c5637..9d775b8 100644 --- a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py @@ -17,7 +17,15 @@ class MPWrapper(RawInterfaceWrapper): } }, 'DMP': {}, - 'ProDMP': {}, + 'ProDMP': { + 'controller_kwargs': { + 'p_gains': 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]), + 'd_gains': 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]), + }, + 'basis_generator_kwargs': { + 'basis_bandwidth_factor': 2 # 3.5, 4 to try + } + }, } # Random x goal + random init pos From 8749fc52cbe256f4337f8b6dde642baefb8a7d0b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 18:37:40 +0200 Subject: [PATCH 144/198] Better README --- README.md | 124 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 72 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 24108f2..9d73ec4 100644 --- a/README.md +++ b/README.md @@ -7,25 +7,26 @@
-`fancy_gym` offers a large variety of reinforcement learning environments under the unifying interface of [Gymnasium](https://gymnasium.farama.org/). +| :exclamation: Fancy Gym has recently received a mayor refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](TODO). | +| ------------------------------------------------------------ | -We provide support (under the Gymnasium interface) for the benchmark suites [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) (DMC) and [Metaworld](https://meta-world.github.io/). If those are not sufficient and you want to create your own custom gym environments, use [this guide](https://www.gymlibrary.dev/content/environment_creation/). We highly appreciate it, if you would then submit a PR for this environment to become part of `fancy_gym`. +Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. -In comparison to existing libraries, we additionally support to control agents with movement primitives, such as Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP). +**Key Features**: + +- **New Challenging Environments**: We've introduced several new environments that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. +- **Advanced Movement Primitives**: `fancy_gym` supports sophisticated movement primitives, including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP). +- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/) and makes it easy to use them with movement primitives. +- **Upgrade to Movement Primitives**: With our framework, it's straightforward to transform standard Gymnasium environments into environments that support movement primitives. +- **Contribute Your Own Environments**: If you're inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://www.gymlibrary.dev/content/environment_creation/) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`. ## Movement Primitive Environments (Episode-Based/Black-Box Environments) -Unlike step-based environments, movement primitive (MP) environments are closer related to stochastic search, black-box -optimization, and methods that are often used in traditional robotics and control. MP environments are typically -episode-based and execute a full trajectory, which is generated by a trajectory generator, such as a Dynamic Movement -Primitive (DMP) or a Probabilistic Movement Primitive (ProMP). The generated trajectory is translated into individual -step-wise actions by a trajectory tracking controller. The exact choice of controller is, however, dependent on the type -of environment. We currently support position, velocity, and PD-Controllers for position, velocity, and torque control, -respectively as well as a special controller for the MetaWorld control suite. -The goal of all MP environments is still to learn an optimal policy. Yet, an action represents the parametrization of -the motion primitives to generate a suitable trajectory. Additionally, in this framework we support all of this also for -the contextual setting, i.e. we expose the context space - a subset of the observation space - in the beginning of the -episode. This requires to predict a new action/MP parametrization for each context. +Movement primitive (MP) environments differ from traditional step-based environments. They align more with concepts from stochastic search, black-box optimization, and methods commonly found in classical robotics and control. Instead of individual steps, MP environments operate on an episode basis, executing complete trajectories. These trajectories are produced by trajectory generators like Dynamic Movement Primitives (DMP), Probabilistic Movement Primitives (ProMP) or Probabilistic Dynamic Movement Primitives (ProDMP). + +Once generated, these trajectories are converted into step-by-step actions using a trajectory tracking controller. The specific controller chosen depends on the environment's requirements. Currently, we support position, velocity, and PD-Controllers tailored for position, velocity, and torque control. Additionally, we have a specialized controller designed for the MetaWorld control suite. + +While the overarching objective of MP environments remains the learning of an optimal policy, the actions here represent the parametrization of motion primitives to craft the right trajectory. Our framework further enhances this by accommodating a contextual setting. At the episode's onset, we present the context space—a subset of the observation space. This demands the prediction of a new action or MP parametrization for every unique context. ## Installation @@ -47,47 +48,43 @@ cd fancy_gym pip install -e . ``` -In case you want to use dm_control oder metaworld, you can install them by specifying extras +We have a few optional dependencies. CHeck them out in the setup.py or just install all of them via ```bash -pip install -e .[dmc,metaworld] +pip install -e '.[all]' ``` -> **Note:** -> While our library already fully supports the new mujoco bindings, metaworld still relies on -> [mujoco_py](https://github.com/openai/mujoco-py), hence make sure to have mujoco 2.1 installed beforehand. ## How to use Fancy Gym We will only show the basics here and prepared [multiple examples](fancy_gym/examples/) for a more detailed look. -### Step-wise Environments +### Step-Based Environments +Regular step based environments added by Fancy Gym are added into the ```fancy/``` namespace. + +| :exclamation: Legacy versions of Fancy Gym used ```fancy_gym.make(...)```. This is no longer supported and will raise an Exception on new versions. | +| ------------------------------------------------------------ | ```python import fancy_gym +import gym -env = fancy_gym.make('Reacher5d-v0', seed=1) -obs = env.reset() +env = gym.make('fancy/Reacher5d-v0') +observation = env.reset(seed=1) for i in range(1000): action = env.action_space.sample() - obs, reward, done, info = env.step(action) + observation, reward, terminated, truncated, info = env.step(action) if i % 5 == 0: env.render() - if done: - obs = env.reset() + if terminated or truncated: + observation = env.reset() ``` -When using `dm_control` tasks we expect the `env_id` to be specified as `dmc:domain_name-task_name` or for manipulation -tasks as `dmc:manipulation-environment_name`. For `metaworld` tasks, we require the structure `metaworld:env_id-v2`, our -custom tasks and standard gym environments can be created without prefixes. - ### Black-box Environments -All environments provide by default the cumulative episode reward, this can however be changed if necessary. Optionally, -each environment returns all collected information from each step as part of the infos. This information is, however, -mainly meant for debugging as well as logging and not for training. +All environments provide by default the cumulative episode reward, this can however be changed if necessary. Optionally, each environment returns all collected information from each step as part of the infos. This information is, however, mainly meant for debugging as well as logging and not for training. |Key| Description|Type |---|---|---| @@ -99,7 +96,8 @@ mainly meant for debugging as well as logging and not for training. `trajectory_length`| Total number of environment interactions | Always `other`| All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. In case some information are not provided every time step, the missing values are filled with `None`. | Always -Existing MP tasks can be created the same way as above. Just keep in mind, calling `step()` executes a full trajectory. +Existing MP tasks can be created the same way as above. The namespace of a MP-variant of an environment is given by ```_/```. +Just keep in mind, calling `step()` executes a full trajectory. > **Note:** > Currently, we are also in the process of enabling replanning as well as learning of sub-trajectories. @@ -111,20 +109,23 @@ Existing MP tasks can be created the same way as above. Just keep in mind, calli ```python import fancy_gym -env = fancy_gym.make('Reacher5dProMP-v0', seed=1) +env = fancy_gym.make('fancy_ProMP/Reacher5d-v0') +# or env = fancy_gym.make('metaworld_ProDMP/reach-v2') +# or env = fancy_gym.make('dm_control_DMP/ball_in_cup-catch-v0') + # render() can be called once in the beginning with all necessary arguments. # To turn it of again just call render() without any arguments. env.render(mode='human') # This returns the context information, not the full state observation -obs = env.reset() +observation = env.reset(seed=1) for i in range(5): action = env.action_space.sample() - obs, reward, done, info = env.step(action) + observation, reward, terminated, truncated, info = env.step(action) # Done is always True as we are working on the episode level, hence we always reset() - obs = env.reset() + observation = env.reset() ``` To show all available environments, we provide some additional convenience variables. All of them return a dictionary @@ -133,6 +134,9 @@ with two keys `DMP` and `ProMP` that store a list of available environment ids. ```python import fancy_gym +print("All Black-box tasks:") +print(fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS) + print("Fancy Black-box tasks:") print(fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS) @@ -155,12 +159,17 @@ hand, the following [interface](fancy_gym/black_box/raw_interface_wrapper.py) ne from abc import abstractmethod from typing import Union, Tuple -import gym +import gymnasium as gym import numpy as np class RawInterfaceWrapper(gym.Wrapper): - + mp_config = { # Default configurations for MPs can be ovveritten by defining them here. + 'ProMP': {}, + 'DMP': {}, + 'ProDMP': {}, + } + @property def context_mask(self) -> np.ndarray: """ @@ -205,32 +214,43 @@ If you created a new task wrapper, feel free to open a PR, so we can integrate i integration the task can still be used. A rough outline can be shown here, for more details we recommend having a look at the [examples](fancy_gym/examples/). +If the step-based is already registered with gym, you can simply do the following: + ```python -import fancy_gym +fancy_gym.upgrade( + id='custom/cool_new_env-v0', + mp_wrapper=my_custom_MPWrapper +) +``` -# Base environment name, according to structure of above example -base_env_id = "dmc:ball_in_cup-catch" +If the step-based is not yet registered with gym we can add both the step-based and MP-versions via -# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInferfaceWrapper. -# You can also add other gym.Wrappers in case they are needed, -# e.g. gym.wrappers.FlattenObservation for dict observations -wrappers = [fancy_gym.dmc.suite.ball_in_cup.MPWrapper] -kwargs = {...} -env = fancy_gym.make_bb(base_env_id, wrappers=wrappers, seed=0, **kwargs) +```python +fancy_gym.register( + id='custom/cool_new_env-v0', + entry_point=my_custom_env, + mp_wrapper=my_custom_MPWrapper +) +``` + +From this point on, you can access MP-version of your environments via + +```python +env = gym.make('custom_ProDMP/cool_new_env-v0') rewards = 0 -obs = env.reset() +observation = env.reset() # number of samples/full trajectories (multiple environment steps) for i in range(5): ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) + observation, reward, terminated, truncated, info = env.step(ac) rewards += reward - if done: + if terminated or truncated: print(base_env_id, rewards) rewards = 0 - obs = env.reset() + observation = env.reset() ``` ## Icon Attribution From 7f58093c5ec0df4d70663f2e87f1c3a02d47e6b1 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 18:50:21 +0200 Subject: [PATCH 145/198] Fixed all examples --- fancy_gym/examples/example_replanning_envs.py | 17 +++++----- fancy_gym/examples/examples_dmc.py | 19 ++++++----- fancy_gym/examples/examples_general.py | 9 +++-- fancy_gym/examples/examples_metaworld.py | 17 +++++----- .../examples/examples_movement_primitives.py | 34 +++++++++---------- fancy_gym/examples/examples_open_ai.py | 9 +++-- fancy_gym/examples/mp_params_tuning.py | 10 ++++-- fancy_gym/examples/pd_control_gain_tuning.py | 7 ++-- 8 files changed, 63 insertions(+), 59 deletions(-) diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index 05be6ad..2c3c3f4 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -1,17 +1,18 @@ +import gymnasium as gym import fancy_gym -def example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False): - env = fancy_gym.make(env_name, seed=seed) - env.reset() +def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False): + env = gym.make(env_name) + env.reset(seed=seed) for i in range(iterations): done = False while done is False: ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) + obs, reward, terminated, truncated, info = env.step(ac) if render: env.render(mode="human") - if done: + if terminated or truncated: env.reset() env.close() del env @@ -48,8 +49,8 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): for i in range(iteration): ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) - if done: + obs, reward, terminated, truncated, info = env.step(ac) + if terminated or truncated: env.reset() env.close() @@ -58,7 +59,7 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): if __name__ == "__main__": # run a registered replanning environment - example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False) + example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False) # run a custom replanning environment example_custom_replanning_envs(seed=0, iteration=8, render=True) diff --git a/fancy_gym/examples/examples_dmc.py b/fancy_gym/examples/examples_dmc.py index 243bd70..fbb1473 100644 --- a/fancy_gym/examples/examples_dmc.py +++ b/fancy_gym/examples/examples_dmc.py @@ -1,7 +1,8 @@ +import gymnasium as gym import fancy_gym -def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True): +def example_dmc(env_id="dm_control/fish-swim", seed=1, iterations=1000, render=True): """ Example for running a DMC based env in the step based setting. The env_id has to be specified as `domain_name:task_name` or @@ -16,9 +17,9 @@ def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True): Returns: """ - env = fancy_gym.make(env_id, seed) + env = gym.make(env_id) rewards = 0 - obs = env.reset() + obs = env.reset(seed=seed) print("observation shape:", env.observation_space.shape) print("action shape:", env.action_space.shape) @@ -56,7 +57,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): """ # Base DMC name, according to structure of above example - base_env_id = "dmc:ball_in_cup-catch" + base_env_id = "dm_control/ball_in_cup-catch" # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # You can also add other gym.Wrappers in case they are needed. @@ -65,8 +66,8 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'} phase_generator_kwargs = {'phase_generator_type': 'linear'} controller_kwargs = {'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1,} + "p_gains": 1.0, + "d_gains": 0.1, } basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', 'num_basis': 5, 'num_basis_zero_start': 1 @@ -123,14 +124,14 @@ if __name__ == '__main__': render = True # # Standard DMC Suite tasks - example_dmc("dmc:fish-swim", seed=10, iterations=1000, render=render) + example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) # # # Manipulation tasks # # Disclaimer: The vision versions are currently not integrated and yield an error - example_dmc("dmc:manipulation-reach_site_features", seed=10, iterations=250, render=render) + example_dmc("dm_control/manipulation-reach_site_features", seed=10, iterations=250, render=render) # # # Gym + DMC hybrid task provided in the MP framework - example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render) + example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is # already registered above diff --git a/fancy_gym/examples/examples_general.py b/fancy_gym/examples/examples_general.py index 383c4cf..e341bfe 100644 --- a/fancy_gym/examples/examples_general.py +++ b/fancy_gym/examples/examples_general.py @@ -21,9 +21,9 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True): """ - env = fancy_gym.make(env_id, seed) + env = gym.make(env_id) rewards = 0 - obs = env.reset() + obs = env.reset(seed=seed) print("Observation shape: ", env.observation_space.shape) print("Action shape: ", env.action_space.shape) @@ -41,7 +41,7 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True): obs = env.reset() -def example_async(env_id="HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800): +def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800): """ Example for running any env in a vectorized multiprocessing setting to generate more samples faster. This also includes DMC and DMP environments when leveraging our custom make_env function. @@ -93,11 +93,10 @@ if __name__ == '__main__': example_general("Pendulum-v1", seed=10, iterations=200, render=render) # Mujoco task from framework - example_general("Reacher5d-v0", seed=10, iterations=200, render=render) + example_general("fancy/Reacher5d-v0", seed=10, iterations=200, render=render) # # OpenAI Mujoco task example_general("HalfCheetah-v2", seed=10, render=render) # Vectorized multiprocessing environments # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) - diff --git a/fancy_gym/examples/examples_metaworld.py b/fancy_gym/examples/examples_metaworld.py index 0c38bff..7919b71 100644 --- a/fancy_gym/examples/examples_metaworld.py +++ b/fancy_gym/examples/examples_metaworld.py @@ -1,7 +1,8 @@ +import gymnasium as gym import fancy_gym -def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): +def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True): """ Example for running a MetaWorld based env in the step based setting. The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always @@ -17,9 +18,9 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): Returns: """ - env = fancy_gym.make(env_id, seed) + env = gym.make(env_id) rewards = 0 - obs = env.reset() + obs = env.reset(seed=seed) print("observation shape:", env.observation_space.shape) print("action shape:", env.action_space.shape) @@ -40,7 +41,7 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): del env -def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): +def example_custom_meta_and_mp(seed=1, iterations=1, render=True): """ Example for running a custom movement primitive based environments. Our already registered environments follow the same structure. @@ -58,7 +59,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): """ # Base MetaWorld name, according to structure of above example - base_env_id = "metaworld:button-press-v2" + base_env_id = "metaworld/button-press-v2" # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # You can also add other gym.Wrappers in case they are needed. @@ -124,10 +125,10 @@ if __name__ == '__main__': render = False # # Standard Meta world tasks - example_dmc("metaworld:button-press-v2", seed=10, iterations=500, render=render) + example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render) # # MP + MetaWorld hybrid task provided in the our framework - example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render) + example_meta("metaworld_ProMP/ButtonPress-v2", seed=10, iterations=1, render=render) # # # Custom MetaWorld task - example_custom_dmc_and_mp(seed=10, iterations=1, render=render) + example_custom_meta_and_mp(seed=10, iterations=1, render=render) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 5913774..317a103 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -1,7 +1,8 @@ +import gymnasium as gym import fancy_gym -def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True): +def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, render=True): """ Example for running a black box based environment, which is already registered Args: @@ -15,11 +16,11 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True """ # Equivalent to gym, we have a make function which can be used to create environments. # It takes care of seeding and enables the use of a variety of external environments using the gym interface. - env = fancy_gym.make(env_name, seed) + env = gym.make(env_name) returns = 0 # env.render(mode=None) - obs = env.reset() + obs = env.reset(seed=seed) # number of samples/full trajectories (multiple environment steps) for i in range(iterations): @@ -50,7 +51,7 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True obs = env.reset() -def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render=True): +def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): """ Example for running a movement primitive based environment, which is already registered Args: @@ -62,12 +63,9 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render Returns: """ - # Changing the arguments of the black box env is possible by providing them to gym as with all kwargs. + # Changing the arguments of the black box env is possible by providing them to gym through mp_config_override. # E.g. here for way to many basis functions - env = fancy_gym.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) - # env = fancy_gym.make(env_name, seed) - # mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}}) - # mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}}) + env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}) returns = 0 obs = env.reset() @@ -106,7 +104,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): """ - base_env_id = "Reacher5d-v0" + base_env_id = "fancy/Reacher5d-v0" # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # You can also add other gym.Wrappers in case they are needed. @@ -157,20 +155,20 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = False # DMP - example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) # ProMP - example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) - example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) + example_mp("fancy_ProMP/HoleReacher-v0", seed=10, iterations=5, render=render) + example_mp("fancy_ProMP/BoxPushingTemporalSparse-v0", seed=10, iterations=1, render=render) + example_mp("fancy_ProMP/TableTennis4D-v0", seed=10, iterations=20, render=render) # ProDMP with Replanning - example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) - example_mp("TableTennis4DReplanProDMP-v0", seed=10, iterations=20, render=render) - example_mp("TableTennisWindReplanProDMP-v0", seed=10, iterations=20, render=render) + example_mp("fancy_ProDMP/BoxPushingDenseReplan-v0", seed=10, iterations=4, render=render) + example_mp("fancy_ProDMP/TableTennis4DReplan-v0", seed=10, iterations=20, render=render) + example_mp("fancy_ProDMP/TableTennisWindReplan-v0", seed=10, iterations=20, render=render) # Altered basis functions - obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + obs1 = example_custom_mp("fancy_ProMP/Reacher5d-v0", seed=10, iterations=1, render=render) # Custom MP example_fully_custom_mp(seed=10, iterations=1, render=render) diff --git a/fancy_gym/examples/examples_open_ai.py b/fancy_gym/examples/examples_open_ai.py index a79a44b..07f1719 100644 --- a/fancy_gym/examples/examples_open_ai.py +++ b/fancy_gym/examples/examples_open_ai.py @@ -1,3 +1,4 @@ +import gymnasium as gym import fancy_gym @@ -12,11 +13,10 @@ def example_mp(env_name, seed=1, render=True): Returns: """ - # While in this case gym.make() is possible to use as well, we recommend our custom make env function. - env = fancy_gym.make(env_name, seed) + env = gym.make(env_name) returns = 0 - obs = env.reset() + obs = env.reset(seed=seed) # number of samples/full trajectories (multiple environment steps) for i in range(10): if render and i % 2 == 0: @@ -33,5 +33,4 @@ def example_mp(env_name, seed=1, render=True): if __name__ == '__main__': - example_mp("ReacherProMP-v2") - + example_mp("gym_ProMP/Reacher-v2") diff --git a/fancy_gym/examples/mp_params_tuning.py b/fancy_gym/examples/mp_params_tuning.py index 644d86b..71a579a 100644 --- a/fancy_gym/examples/mp_params_tuning.py +++ b/fancy_gym/examples/mp_params_tuning.py @@ -1,10 +1,14 @@ +import gymnasium as gym import fancy_gym + def compare_bases_shape(env1_id, env2_id): - env1 = fancy_gym.make(env1_id, seed=0) + env1 = gym.make(env1_id) env1.traj_gen.show_scaled_basis(plot=True) - env2 = fancy_gym.make(env2_id, seed=0) + env2 = gym.make(env2_id) env2.traj_gen.show_scaled_basis(plot=True) return + + if __name__ == '__main__': - compare_bases_shape("TableTennis4DProDMP-v0", "TableTennis4DProMP-v0") \ No newline at end of file + compare_bases_shape("fancy_ProDMP/TableTennis4D-v0", "fancy_ProMP/TableTennis4D-v0") diff --git a/fancy_gym/examples/pd_control_gain_tuning.py b/fancy_gym/examples/pd_control_gain_tuning.py index 4cfae39..3f8634c 100644 --- a/fancy_gym/examples/pd_control_gain_tuning.py +++ b/fancy_gym/examples/pd_control_gain_tuning.py @@ -3,19 +3,20 @@ from collections import OrderedDict import numpy as np from matplotlib import pyplot as plt +import gymnasium as gym import fancy_gym # This might work for some environments, however, please verify either way the correct trajectory information # for your environment are extracted below SEED = 1 -env_id = "Reacher5dProMP-v0" +env_id = "fancy_ProMP/Reacher5d-v0" -env = fancy_gym.make(env_id, seed=SEED, controller_kwargs={'p_gains': 0.05, 'd_gains': 0.05}).env +env = fancy_gym.make(env_id, mp_config_override={'controller_kwargs': {'p_gains': 0.05, 'd_gains': 0.05}}).env env.action_space.seed(SEED) # Plot difference between real trajectory and target MP trajectory -env.reset() +env.reset(seed=SEED) w = env.action_space.sample() pos, vel = env.get_trajectory(w) From a76967a49872e4425fc2afc221b2304a33356fe4 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 19:05:08 +0200 Subject: [PATCH 146/198] Forgot to port gym_Reacher over --- fancy_gym/open_ai/__init__.py | 41 +++++------------------------------ 1 file changed, 6 insertions(+), 35 deletions(-) diff --git a/fancy_gym/open_ai/__init__.py b/fancy_gym/open_ai/__init__.py index e4e80ee..c8422d2 100644 --- a/fancy_gym/open_ai/__init__.py +++ b/fancy_gym/open_ai/__init__.py @@ -1,45 +1,16 @@ from copy import deepcopy -from gymnasium import register +from ..envs.registry import register, upgrade from . import mujoco from .deprecated_needs_gym_robotics import robotics -ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} - -DEFAULT_BB_DICT_ProMP = { - "name": 'EnvName', - "wrappers": [], - "trajectory_generator_kwargs": { - 'trajectory_generator_type': 'promp' - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear' - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } -} - -kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) -kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6 -kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075 -kwargs_dict_reacher_promp['basis_generator_kwargs']['num_basis'] = 6 -kwargs_dict_reacher_promp['name'] = "Reacher-v2" -kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper) -register( - id='ReacherProMP-v2', - entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', - kwargs=kwargs_dict_reacher_promp +upgrade( + id='Reacher-v2', + mp_wrapper=mujoco.reacher_v2.MPWrapper, + add_mp_types=['ProMP'], ) -ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ReacherProMP-v2") + """ The Fetch environments are not supported by gym anymore. A new repository (gym_robotics) is supporting the environments. However, the usage and so on needs to be checked From 89bd6781c78a1793f54d4a32acc9cc14eaa0c87f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 19:05:25 +0200 Subject: [PATCH 147/198] mp_config for vanilla gym Reacher --- .../open_ai/mujoco/reacher_v2/mp_wrapper.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fancy_gym/open_ai/mujoco/reacher_v2/mp_wrapper.py b/fancy_gym/open_ai/mujoco/reacher_v2/mp_wrapper.py index b2fa04c..3000353 100644 --- a/fancy_gym/open_ai/mujoco/reacher_v2/mp_wrapper.py +++ b/fancy_gym/open_ai/mujoco/reacher_v2/mp_wrapper.py @@ -6,6 +6,28 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper class MPWrapper(RawInterfaceWrapper): + mp_config = { + 'ProMP': { + "trajectory_generator_kwargs": { + 'trajectory_generator_type': 'promp' + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear' + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": 0.6, + "d_gains": 0.075, + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 6, + 'num_basis_zero_start': 1 + } + }, + 'DMP': {}, + 'ProDMP': {}, + } @property def current_vel(self) -> Union[float, int, np.ndarray]: From 38358c183ea0e0de7e50488f036fbd6cf931be95 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 17 Sep 2023 19:05:42 +0200 Subject: [PATCH 148/198] Upgrading metaworld --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 0f8fd5d..af4ff5d 100644 --- a/setup.py +++ b/setup.py @@ -7,9 +7,7 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { 'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'], - 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@43abf981b97c01669af898833a740fb63605b8ac#egg=metaworld', - 'mujoco-py<2.2,>=2.1', 'gym>=0.15.4' - ], + 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'], 'box2d': ['gymnasium[box2d]>=0.26.0'], 'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'], } From c933a7588003a05df9104b4f0fd672081da47097 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 17:41:10 +0200 Subject: [PATCH 149/198] More README improvements --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9d73ec4..7a19bab 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,10 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maint **Key Features**: -- **New Challenging Environments**: We've introduced several new environments that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. -- **Advanced Movement Primitives**: `fancy_gym` supports sophisticated movement primitives, including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP). -- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/) and makes it easy to use them with movement primitives. +- **New Challenging Environments**: We've introduced several new environments (Panda Box Pushing, Table Tennis, etc.) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. +- **Support for Movement Primitives**: `fancy_gym` supports a range of movement primitives (MPs), including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP). - **Upgrade to Movement Primitives**: With our framework, it's straightforward to transform standard Gymnasium environments into environments that support movement primitives. +- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/), wether you want to use them in the normal step-based or a MP-based setting. - **Contribute Your Own Environments**: If you're inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://www.gymlibrary.dev/content/environment_creation/) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`. ## Movement Primitive Environments (Episode-Based/Black-Box Environments) From 4c8dda3b600405018b563bcd72bc4812d19dc012 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 17:43:15 +0200 Subject: [PATCH 150/198] Fix: README code imported gym instead of gymnasium --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7a19bab..ca4f102 100644 --- a/README.md +++ b/README.md @@ -66,8 +66,8 @@ Regular step based environments added by Fancy Gym are added into the ```fancy/` | ------------------------------------------------------------ | ```python +import gymnasium as gym import fancy_gym -import gym env = gym.make('fancy/Reacher5d-v0') observation = env.reset(seed=1) @@ -107,9 +107,10 @@ Just keep in mind, calling `step()` executes a full trajectory. > Feel free to try it and open an issue with any problems that occur. ```python +import gymnasium as gym import fancy_gym -env = fancy_gym.make('fancy_ProMP/Reacher5d-v0') +env = gym.make('fancy_ProMP/Reacher5d-v0') # or env = fancy_gym.make('metaworld_ProDMP/reach-v2') # or env = fancy_gym.make('dm_control_DMP/ball_in_cup-catch-v0') From 513bf2361efaf5bcb0968dafba3a3ee094ff8751 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 18:37:19 +0200 Subject: [PATCH 151/198] Fix: Deprecated beerpong env still referenced old mujoco binding --- .../mujoco/beerpong/deprecated/beerpong.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py b/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py index 2fc98ba..93bba06 100644 --- a/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/deprecated/beerpong.py @@ -1,6 +1,5 @@ import os -import mujoco_py.builder import numpy as np from gymnasium import utils from gymnasium.envs.mujoco import MujocoEnv @@ -74,16 +73,13 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): crash = False for _ in range(self.repeat_action): applied_action = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0] - try: - self.do_simulation(applied_action, self.frame_skip) - self.reward_function.initialize(self) - # self.reward_function.check_contacts(self.sim) # I assume this is not important? - if self._steps < self.release_step: - self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.site_id("init_ball_pos"), :].copy() - self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.site_id("init_ball_pos"), :].copy() - crash = False - except mujoco_py.builder.MujocoException: - crash = True + self.do_simulation(applied_action, self.frame_skip) + self.reward_function.initialize(self) + # self.reward_function.check_contacts(self.sim) # I assume this is not important? + if self._steps < self.release_step: + self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.site_id("init_ball_pos"), :].copy() + self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.site_id("init_ball_pos"), :].copy() + crash = False ob = self._get_obs() From 592f09789d52f3857d4b58503b9a0868e45fc598 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 18:37:46 +0200 Subject: [PATCH 152/198] Future proofing: Accesing attributes of other wrappers needs to be explicit in future gymnasium versions --- fancy_gym/black_box/black_box_wrapper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index a097b09..131c229 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -99,8 +99,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): init_time = np.array( 0 if not self.do_replanning else self.current_traj_steps * self.dt) - condition_pos = self.condition_pos if self.condition_pos is not None else self.current_pos - condition_vel = self.condition_vel if self.condition_vel is not None else self.current_vel + condition_pos = self.condition_pos if self.condition_pos is not None else self.env.get_wrapper_attr('current_pos') + condition_vel = self.condition_vel if self.condition_vel is not None else self.env.get_wrapper_attr('current_vel') self.traj_gen.set_initial_conditions( init_time, condition_pos, condition_vel) @@ -165,7 +165,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): step_action = self.tracking_controller.get_action( - pos, vel, self.current_pos, self.current_vel) + pos, vel, self.env.get_wrapper_attr('current_pos'), self.env.get_wrapper_attr('current_vel')) c_action = np.clip( step_action, self.env.action_space.low, self.env.action_space.high) obs, c_reward, terminated, truncated, info = self.env.step( @@ -184,7 +184,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if terminated or truncated or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, t + 1 + self.current_traj_steps) and self.plan_steps < self.max_planning_times): + if terminated or truncated or (self.replanning_schedule(self.env.get_wrapper_attr('current_pos'), self.env.get_wrapper_attr('current_vel'), obs, c_action, t + 1 + self.current_traj_steps) and self.plan_steps < self.max_planning_times): if self.condition_on_desired: self.condition_pos = pos From 20510d8f68a1c5d3d263ce40c5f01432734feff8 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 18:38:32 +0200 Subject: [PATCH 153/198] Ported MetaWorld wrapper to new mujoco binding --- fancy_gym/meta/base_metaworld_mp_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/meta/base_metaworld_mp_wrapper.py b/fancy_gym/meta/base_metaworld_mp_wrapper.py index 12338fd..03b78dc 100644 --- a/fancy_gym/meta/base_metaworld_mp_wrapper.py +++ b/fancy_gym/meta/base_metaworld_mp_wrapper.py @@ -59,7 +59,7 @@ class BaseMetaworldMPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray]: - r_close = self.env.data.get_joint_qpos('r_close') + r_close = self.env.data.joint('r_close').qpos return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close]) @property From 9ce040d1101f2e3c3bcfb5aa5fe5a8c518f51ae9 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 18:40:10 +0200 Subject: [PATCH 154/198] Porting Metaworld Bug Mitigations --- fancy_gym/meta/metaworld_adapter.py | 43 ++++++++++------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py index ed2b5b6..71e8ef0 100644 --- a/fancy_gym/meta/metaworld_adapter.py +++ b/fancy_gym/meta/metaworld_adapter.py @@ -1,3 +1,4 @@ +import random from typing import Iterable, Type, Union, Optional import numpy as np @@ -13,12 +14,10 @@ from fancy_gym.utils.env_compatibility import EnvCompatibility try: import metaworld except Exception: - # catch Exception as Import error does not catch missing mujoco-py - # TODO: Print info? - pass + print('[FANCY GYM] Metaworld not avaible') -class MujocoMapSpacesWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): +class FixMetaworldHasIncorrectObsSpaceWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): def __init__(self, env: gym.Env): gym.utils.RecordConstructorArgs.__init__(self) gym.Wrapper.__init__(self, env) @@ -29,11 +28,11 @@ class MujocoMapSpacesWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): Obs_Space_Class = getattr(gym.spaces, str(eos.__class__).split("'")[1].split('.')[-1]) Act_Space_Class = getattr(gym.spaces, str(eas.__class__).split("'")[1].split('.')[-1]) - self.observation_space = Obs_Space_Class(low=eos.low, high=eos.high, dtype=eos.dtype) + self.observation_space = Obs_Space_Class(low=eos.low-np.inf, high=eos.high+np.inf, dtype=eos.dtype) self.action_space = Act_Space_Class(low=eas.low, high=eas.high, dtype=eas.dtype) -class MitigateMetaworldBug(gym.Wrapper, gym.utils.RecordConstructorArgs): +class FixMetaworldIncorrectResetPathLengthWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): def __init__(self, env: gym.Env): gym.utils.RecordConstructorArgs.__init__(self) gym.Wrapper.__init__(self, env) @@ -50,13 +49,12 @@ class MitigateMetaworldBug(gym.Wrapper, gym.utils.RecordConstructorArgs): return ret -class MetaworldResetFix(gym.Wrapper, gym.utils.RecordConstructorArgs): +class FixMetaworldIgnoresSeedOnResetWrapper(gym.Wrapper, gym.utils.RecordConstructorArgs): def __init__(self, env: gym.Env): gym.utils.RecordConstructorArgs.__init__(self) gym.Wrapper.__init__(self, env) def reset(self, **kwargs): - self.env.reset(**kwargs) if 'seed' in kwargs: self.env.seed(kwargs['seed']) return self.env.reset(**kwargs) @@ -66,32 +64,19 @@ def make_metaworld(underlying_id: str, seed: int = 1, render_mode: Optional[str] if underlying_id not in metaworld.ML1.ENV_NAMES: raise ValueError(f'Specified environment "{underlying_id}" not present in metaworld ML1.') - _env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[underlying_id + "-goal-observable"](seed=seed, **kwargs) + env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[underlying_id + "-goal-observable"](seed=seed, **kwargs) # setting this avoids generating the same initialization after each reset - _env._freeze_rand_vec = False + env._freeze_rand_vec = False # New argument to use global seeding - _env.seeded_rand_vec = True + env.seeded_rand_vec = True - max_episode_steps = _env.max_path_length - - # TODO remove this as soon as there is support for the new API - _env = EnvCompatibility(_env, render_mode) - env = _env - - # gym_id = '_metaworld_compat_' + uuid.uuid4().hex + '-v0' - # gym_register( - # id=gym_id, - # entry_point=lambda: _env, - # max_episode_steps=max_episode_steps, - # ) - - # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld - # env = gym.make(gym_id, disable_env_checker=True) - env = MujocoMapSpacesWrapper(env) # TODO remove, when this has been fixed upstream - env = MitigateMetaworldBug(env) - env = MetaworldResetFix(env) + env = FixMetaworldHasIncorrectObsSpaceWrapper(env) + # TODO remove, when this has been fixed upstream + # env = FixMetaworldIncorrectResetPathLengthWrapper(env) + # TODO remove, when this has been fixed upstream + env = FixMetaworldIgnoresSeedOnResetWrapper(env) return env From ddd11c178325f91983fea491f54cce47f25af51d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 18:40:47 +0200 Subject: [PATCH 155/198] Skip Tests: Metaworld does currently not correclty implement seeding --- test/test_metaworld_envs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py index a4fea03..90d98a3 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_metaworld_envs.py @@ -18,6 +18,7 @@ def test_step_metaworld_functionality(env_id: str): run_env(env_id) +@pytest.mark.skip(reason="Seeding does not correctly work on current Metaworld.") @pytest.mark.parametrize('env_id', METAWORLD_IDS) def test_step_metaworld_determinism(env_id: str): """Tests that for step environments identical seeds produce identical trajectories.""" @@ -30,6 +31,7 @@ def test_bb_metaworld_functionality(env_id: str): run_env(env_id) +@pytest.mark.skip(reason="Seeding does not correctly work on current Metaworld.") @pytest.mark.parametrize('env_id', METAWORLD_MP_IDS) def test_bb_metaworld_determinism(env_id: str): """Tests that for black box environment identical seeds produce identical trajectories.""" From 54e8ec40155416880a1d143295b5ea0a649392a9 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 18:42:34 +0200 Subject: [PATCH 156/198] Print warning on seeded metaworld reset --- fancy_gym/meta/metaworld_adapter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fancy_gym/meta/metaworld_adapter.py b/fancy_gym/meta/metaworld_adapter.py index 71e8ef0..21dfed7 100644 --- a/fancy_gym/meta/metaworld_adapter.py +++ b/fancy_gym/meta/metaworld_adapter.py @@ -55,6 +55,7 @@ class FixMetaworldIgnoresSeedOnResetWrapper(gym.Wrapper, gym.utils.RecordConstru gym.Wrapper.__init__(self, env) def reset(self, **kwargs): + print('[!] You just called .reset on a Metaworld env and supplied a seed. Metaworld curretly does not correctly implement seeding. Do not rely on deterministic behavior.') if 'seed' in kwargs: self.env.seed(kwargs['seed']) return self.env.reset(**kwargs) From 5488ca6dda4ff2ffba503d2af83033de1f3377d6 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 19:17:43 +0200 Subject: [PATCH 157/198] Fixed merge error --- fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 2a90cba..932e3df 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -36,7 +36,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): "render_fps": 50 } - def __init__(self, frame_skip: int = 10, random_init): + def __init__(self, frame_skip: int = 10, random_init: bool = False): utils.EzPickle.__init__(**locals()) self._steps = 0 self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.]) From 725e6c8fe17a1b9a5dcec42a7abebf25c587f82e Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 19:18:26 +0200 Subject: [PATCH 158/198] Fixed wrong env name (still contained mp info) --- fancy_gym/dmc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/dmc/__init__.py b/fancy_gym/dmc/__init__.py index 6e9f38c..7fcebba 100644 --- a/fancy_gym/dmc/__init__.py +++ b/fancy_gym/dmc/__init__.py @@ -32,7 +32,7 @@ register( _dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] for _task in _dmc_cartpole_tasks: register( - id=f'dm_control/cartpole-{_task}_dmp-v0', + id=f'dm_control/cartpole-{_task}-v0', register_step_based=False, mp_wrapper=suite.cartpole.MPWrapper, add_mp_types=['DMP', 'ProMP'], From 659a717fbd8f38856110fde11dbf88038682c358 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 19:31:17 +0200 Subject: [PATCH 159/198] Removed old files --- .../hopper_jump_on_box.before_convert.xml | 51 ------- .../assets/hopper_throw.before_convert.xml | 56 -------- .../hopper_throw_in_basket.before_convert.xml | 132 ------------------ .../assets/walker2d.before_convert.xml | 64 --------- 4 files changed, 303 deletions(-) delete mode 100644 fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml delete mode 100644 fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml delete mode 100644 fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml delete mode 100644 fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml diff --git a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml b/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml deleted file mode 100644 index 69d78ff..0000000 --- a/fancy_gym/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.before_convert.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml deleted file mode 100644 index 1c39602..0000000 --- a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw.before_convert.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - diff --git a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml b/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml deleted file mode 100644 index b4f0342..0000000 --- a/fancy_gym/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.before_convert.xml +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml b/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml deleted file mode 100644 index f3bcbd1..0000000 --- a/fancy_gym/envs/mujoco/walker_2d_jump/assets/walker2d.before_convert.xml +++ /dev/null @@ -1,64 +0,0 @@ - - - - - - - From cf886b2b878bc224178c8a03f8b2ac3dd4048cc9 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 19:42:58 +0200 Subject: [PATCH 160/198] New optional dependency: jax (Required, so that we can make all gymnasium envs during our tests) --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index af4ff5d..524f2d1 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ extras = { 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'], 'box2d': ['gymnasium[box2d]>=0.26.0'], 'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'], + 'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"], } # All dependencies From c5b051da20e424f66d8e14b1909f77ea1a6ced58 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 19:47:43 +0200 Subject: [PATCH 161/198] Fixed typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ca4f102..55fe81a 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maint - **New Challenging Environments**: We've introduced several new environments (Panda Box Pushing, Table Tennis, etc.) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. - **Support for Movement Primitives**: `fancy_gym` supports a range of movement primitives (MPs), including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP). - **Upgrade to Movement Primitives**: With our framework, it's straightforward to transform standard Gymnasium environments into environments that support movement primitives. -- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/), wether you want to use them in the normal step-based or a MP-based setting. +- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/), whether you want to use them in the normal step-based or a MP-based setting. - **Contribute Your Own Environments**: If you're inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://www.gymlibrary.dev/content/environment_creation/) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`. ## Movement Primitive Environments (Episode-Based/Black-Box Environments) From 57c4a940b8a23fb23760463656639b652df7faec Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 19:50:23 +0200 Subject: [PATCH 162/198] Another typo gone from the README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 55fe81a..7527ce8 100644 --- a/README.md +++ b/README.md @@ -165,12 +165,12 @@ import numpy as np class RawInterfaceWrapper(gym.Wrapper): - mp_config = { # Default configurations for MPs can be ovveritten by defining them here. + mp_config = { # Default configurations for MPs can be overitten by defining attributes here. 'ProMP': {}, 'DMP': {}, 'ProDMP': {}, - } - + } + @property def context_mask(self) -> np.ndarray: """ From 59ee559f8debd3d52cd05d081f2bfd2dd54ca9c0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 18 Sep 2023 19:52:08 +0200 Subject: [PATCH 163/198] README: reset will return (obs, info) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7527ce8..76d4394 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ env = gym.make('fancy_ProMP/Reacher5d-v0') env.render(mode='human') # This returns the context information, not the full state observation -observation = env.reset(seed=1) +observation, info = env.reset(seed=1) for i in range(5): action = env.action_space.sample() @@ -240,7 +240,7 @@ From this point on, you can access MP-version of your environments via env = gym.make('custom_ProDMP/cool_new_env-v0') rewards = 0 -observation = env.reset() +observation, info = env.reset() # number of samples/full trajectories (multiple environment steps) for i in range(5): From 842ab3fbc96886b45b8e5e5d0ca1eec67bef4956 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 13:42:03 +0200 Subject: [PATCH 164/198] Added mujoco-legacy (mujoco-py) as an optional dependency required to pass all tests, since some OpenAI envs still use old mujoco --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 524f2d1..a0ee43c 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ extras = { 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'], 'box2d': ['gymnasium[box2d]>=0.26.0'], 'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'], + 'mujoco-legacy' = ['mujoco-py >=2.1,<2.2', 'cython<3'], 'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"], } From e077a8f13d85a1d7fac2c108d9f8d1550b42d3bc Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 16:43:07 +0200 Subject: [PATCH 165/198] Updated README --- README.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 76d4394..c7c1012 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maint - **New Challenging Environments**: We've introduced several new environments (Panda Box Pushing, Table Tennis, etc.) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. - **Support for Movement Primitives**: `fancy_gym` supports a range of movement primitives (MPs), including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP). - **Upgrade to Movement Primitives**: With our framework, it's straightforward to transform standard Gymnasium environments into environments that support movement primitives. -- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/), whether you want to use them in the normal step-based or a MP-based setting. -- **Contribute Your Own Environments**: If you're inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://www.gymlibrary.dev/content/environment_creation/) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`. +- **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/), whether you want to use them in the regular step-based setting or using MPs. +- **Contribute Your Own Environments**: If you're inspired to create custom gym environments, both step-based and with movement primitives, this [guide](https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/) will assist you. We encourage and highly appreciate submissions via PRs to integrate these environments into `fancy_gym`. ## Movement Primitive Environments (Episode-Based/Black-Box Environments) @@ -48,10 +48,11 @@ cd fancy_gym pip install -e . ``` -We have a few optional dependencies. CHeck them out in the setup.py or just install all of them via +We have a few optional dependencies. If you also want to install those use ```bash -pip install -e '.[all]' +pip install -e '.[all]' # to install all optional dependencies +pip install -e '.[dmc,metaworld,box2d,mujoco,mujoco-legacy,jax,testing]' # or choose only those you want ``` @@ -60,9 +61,9 @@ pip install -e '.[all]' We will only show the basics here and prepared [multiple examples](fancy_gym/examples/) for a more detailed look. ### Step-Based Environments -Regular step based environments added by Fancy Gym are added into the ```fancy/``` namespace. +Regular step based environments added by Fancy Gym are added into the `fancy/` namespace. -| :exclamation: Legacy versions of Fancy Gym used ```fancy_gym.make(...)```. This is no longer supported and will raise an Exception on new versions. | +| :exclamation: Legacy versions of Fancy Gym used `fancy_gym.make(...)`. This is no longer supported and will raise an Exception on new versions. | | ------------------------------------------------------------ | ```python @@ -70,6 +71,9 @@ import gymnasium as gym import fancy_gym env = gym.make('fancy/Reacher5d-v0') +# or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS +# or env = gym.make('dm_control/ball_in_cup-catch-v0') +# or env = gym.make('Hopper-v4') observation = env.reset(seed=1) for i in range(1000): @@ -96,7 +100,7 @@ All environments provide by default the cumulative episode reward, this can howe `trajectory_length`| Total number of environment interactions | Always `other`| All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. In case some information are not provided every time step, the missing values are filled with `None`. | Always -Existing MP tasks can be created the same way as above. The namespace of a MP-variant of an environment is given by ```_/```. +Existing MP tasks can be created the same way as above. The namespace of a MP-variant of an environment is given by `_/`. Just keep in mind, calling `step()` executes a full trajectory. > **Note:** @@ -111,8 +115,9 @@ import gymnasium as gym import fancy_gym env = gym.make('fancy_ProMP/Reacher5d-v0') -# or env = fancy_gym.make('metaworld_ProDMP/reach-v2') -# or env = fancy_gym.make('dm_control_DMP/ball_in_cup-catch-v0') +# or env = gym.make('metaworld_ProDMP/reach-v2') +# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0') +# or env = gym.make('gym_ProDMP/Hopper-v4') # mp versions of envs added directly by gymnasium are in the gym_ NS # render() can be called once in the beginning with all necessary arguments. # To turn it of again just call render() without any arguments. From d8315c8d97c10f3719e5638803590057bafa5a66 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 17:00:49 +0200 Subject: [PATCH 166/198] Fixed typo in NS --- fancy_gym/__init__.py | 1 + fancy_gym/envs/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index c406c5b..c646aef 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -9,5 +9,6 @@ ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_ ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['metaworld'] ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['gym'] + def make(*args, **kwargs): raise Exception('As part of the refactor of Fancy Gym and upgrade to gymnasium the use of fancy_gym.make has been discontinued. Regular gym.make should be used instead. For more details check out the github README. If your codebase was build for older versions of Fancy Gym and relies on the old behavior and dependency versions, please check out the legacy branch.') diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 10ed0b0..d416dbb 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -213,7 +213,7 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, ) register( - id='BoxPushingRandomInit{}-v0'.format(reward_type), + id='fancy/BoxPushingRandomInit{}-v0'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, kwargs={"random_init": True} From 49658515b0d28b5ad3c418032fa42b825b32ddb4 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 17:01:00 +0200 Subject: [PATCH 167/198] Extended README --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c7c1012..ed2c093 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ import fancy_gym env = gym.make('fancy/Reacher5d-v0') # or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS # or env = gym.make('dm_control/ball_in_cup-catch-v0') -# or env = gym.make('Hopper-v4') +# or env = gym.make('Reacher-v2') observation = env.reset(seed=1) for i in range(1000): @@ -117,7 +117,7 @@ import fancy_gym env = gym.make('fancy_ProMP/Reacher5d-v0') # or env = gym.make('metaworld_ProDMP/reach-v2') # or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0') -# or env = gym.make('gym_ProDMP/Hopper-v4') # mp versions of envs added directly by gymnasium are in the gym_ NS +# or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_ NS # render() can be called once in the beginning with all necessary arguments. # To turn it of again just call render() without any arguments. @@ -130,12 +130,12 @@ for i in range(5): action = env.action_space.sample() observation, reward, terminated, truncated, info = env.step(action) - # Done is always True as we are working on the episode level, hence we always reset() + # terminated or truncated is always True as we are working on the episode level, hence we always reset() observation = env.reset() ``` To show all available environments, we provide some additional convenience variables. All of them return a dictionary -with two keys `DMP` and `ProMP` that store a list of available environment ids. +with the keys `DMP`, `ProMP`, `ProDMP` and `all` that store a list of available environment ids. ```python import fancy_gym @@ -154,6 +154,9 @@ print(fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print("MetaWorld Black-box tasks:") print(fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS) + +print("If you add custom envs, their mp versions will be found in:") +print(fancy_gym.MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['']) ``` ### How to create a new MP task From 79c2eda4fedf61d63a4dfb889424021d084fe9d7 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 17:07:12 +0200 Subject: [PATCH 168/198] Fixed minor issues --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ed2c093..0ac39e5 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ for i in range(1000): env.render() if terminated or truncated: - observation = env.reset() + observation, info = env.reset() ``` ### Black-box Environments @@ -131,7 +131,7 @@ for i in range(5): observation, reward, terminated, truncated, info = env.step(action) # terminated or truncated is always True as we are working on the episode level, hence we always reset() - observation = env.reset() + observation, info = env.reset() ``` To show all available environments, we provide some additional convenience variables. All of them return a dictionary @@ -257,9 +257,9 @@ for i in range(5): rewards += reward if terminated or truncated: - print(base_env_id, rewards) + print(rewards) rewards = 0 - observation = env.reset() + observatio, infon = env.reset() ``` ## Icon Attribution From 310719e471581c70a3b47d99176d2d972775789d Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 17:32:29 +0200 Subject: [PATCH 169/198] Fix: Typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ac39e5..44c8d8c 100644 --- a/README.md +++ b/README.md @@ -259,7 +259,7 @@ for i in range(5): if terminated or truncated: print(rewards) rewards = 0 - observatio, infon = env.reset() + observation, info = env.reset() ``` ## Icon Attribution From 740d1d7896ba8837e179d24e8aa149a98a47d31b Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 17:54:27 +0200 Subject: [PATCH 170/198] README: Added link to changelog of refactor --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 44c8d8c..5940652 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@
-| :exclamation: Fancy Gym has recently received a mayor refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](TODO). | +| :exclamation: Fancy Gym has recently received a mayor refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | | ------------------------------------------------------------ | Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. From 6d0e96c6f2c3b9f472c2902a56c976c2a6beea7f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 19 Sep 2023 18:04:02 +0200 Subject: [PATCH 171/198] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5940652..952a71d 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@
-| :exclamation: Fancy Gym has recently received a mayor refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | +| :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | | ------------------------------------------------------------ | Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. From d6ecc0dc67b8a9170902c16f3b7c9fa9ea02cde9 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 20 Sep 2023 12:49:31 +0200 Subject: [PATCH 172/198] minor changes to README --- README.md | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 952a71d..85c60ce 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,14 @@
-| :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | -| ------------------------------------------------------------ | +| :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. **Key Features**: -- **New Challenging Environments**: We've introduced several new environments (Panda Box Pushing, Table Tennis, etc.) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. +- **New Challenging Environments**: `fancy_gym` includes several new environments (Panda Box Pushing, Table Tennis, etc.) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. - **Support for Movement Primitives**: `fancy_gym` supports a range of movement primitives (MPs), including Dynamic Movement Primitives (DMPs), Probabilistic Movement Primitives (ProMP), and Probabilistic Dynamic Movement Primitives (ProDMP). - **Upgrade to Movement Primitives**: With our framework, it's straightforward to transform standard Gymnasium environments into environments that support movement primitives. - **Benchmark Suite Compatibility**: `fancy_gym` makes it easy to access renowned benchmark suites such as [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) and [Metaworld](https://meta-world.github.io/), whether you want to use them in the regular step-based setting or using MPs. @@ -32,39 +32,39 @@ While the overarching objective of MP environments remains the learning of an op 1. Clone the repository -```bash +```bash git clone git@github.com:ALRhub/fancy_gym.git ``` 2. Go to the folder -```bash +```bash cd fancy_gym ``` 3. Install with -```bash +```bash pip install -e . ``` We have a few optional dependencies. If you also want to install those use -```bash +```bash pip install -e '.[all]' # to install all optional dependencies pip install -e '.[dmc,metaworld,box2d,mujoco,mujoco-legacy,jax,testing]' # or choose only those you want ``` - ## How to use Fancy Gym We will only show the basics here and prepared [multiple examples](fancy_gym/examples/) for a more detailed look. ### Step-Based Environments + Regular step based environments added by Fancy Gym are added into the `fancy/` namespace. -| :exclamation: Legacy versions of Fancy Gym used `fancy_gym.make(...)`. This is no longer supported and will raise an Exception on new versions. | -| ------------------------------------------------------------ | +| :exclamation: Legacy versions of Fancy Gym used `fancy_gym.make(...)`. This is no longer supported and will raise an Exception on new versions. | +| ----------------------------------------------------------------------------------------------------------------------------------------------- | ```python import gymnasium as gym @@ -90,20 +90,20 @@ for i in range(1000): All environments provide by default the cumulative episode reward, this can however be changed if necessary. Optionally, each environment returns all collected information from each step as part of the infos. This information is, however, mainly meant for debugging as well as logging and not for training. -|Key| Description|Type -|---|---|---| -`positions`| Generated trajectory from MP | Optional -`velocities`| Generated trajectory from MP | Optional -`step_actions`| Step-wise executed action based on controller output | Optional -`step_observations`| Step-wise intermediate observations | Optional -`step_rewards`| Step-wise rewards | Optional -`trajectory_length`| Total number of environment interactions | Always -`other`| All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. In case some information are not provided every time step, the missing values are filled with `None`. | Always +| Key | Description | Type | +| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------- | +| `positions` | Generated trajectory from MP | Optional | +| `velocities` | Generated trajectory from MP | Optional | +| `step_actions` | Step-wise executed action based on controller output | Optional | +| `step_observations` | Step-wise intermediate observations | Optional | +| `step_rewards` | Step-wise rewards | Optional | +| `trajectory_length` | Total number of environment interactions | Always | +| `other` | All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. In case some information are not provided every time step, the missing values are filled with `None`. | Always | Existing MP tasks can be created the same way as above. The namespace of a MP-variant of an environment is given by `_/`. Just keep in mind, calling `step()` executes a full trajectory. -> **Note:** +> **Note:** > Currently, we are also in the process of enabling replanning as well as learning of sub-trajectories. > This allows to split the episode into multiple trajectories and is a hybrid setting between step-based and > black-box leaning. @@ -120,7 +120,7 @@ env = gym.make('fancy_ProMP/Reacher5d-v0') # or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_ NS # render() can be called once in the beginning with all necessary arguments. -# To turn it of again just call render() without any arguments. +# To turn it of again just call render() without any arguments. env.render(mode='human') # This returns the context information, not the full state observation @@ -185,7 +185,7 @@ class RawInterfaceWrapper(gym.Wrapper): Returns boolean mask of the same shape as the observation space. It determines whether the observation is returned for the contextual case or not. This effectively allows to filter unwanted or unnecessary observations from the full step-based case. - E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the + E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the context/part of the first observation, the velocities are not necessary in the observation for the task. Returns: bool array representing the indices of the observations @@ -263,4 +263,5 @@ for i in range(5): ``` ## Icon Attribution + The icon is based on the [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) icon as can be found [here](https://gymnasium.farama.org/_static/img/gymnasium_black.svg). From 56c1c65d09082d25160f66454d2b579e07af8d40 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 29 Sep 2023 19:19:57 +0200 Subject: [PATCH 173/198] Better doumentation of fancy registry fucntions (register & upgrade) --- fancy_gym/envs/registry.py | 80 +++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 5176699..c3eb896 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -1,4 +1,4 @@ -from typing import Tuple, Union +from typing import Tuple, Union, Callable, List, Dict, Any, Optional import copy import importlib @@ -113,14 +113,41 @@ MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS = {} def register( - id, - entry_point=None, - mp_wrapper=DefaultMPWrapper, - register_step_based=True, # TODO: Detect - add_mp_types=KNOWN_MPS, - mp_config_override={}, + id: str, + entry_point: Optional[Union[Callable, str]] = None, + mp_wrapper: RawInterfaceWrapper = DefaultMPWrapper, + register_step_based: bool = True, # TODO: Detect + add_mp_types: List[str] = KNOWN_MPS, + mp_config_override: Dict[str, Any] = {}, **kwargs ): + """ + Registers a Gymnasium environment, including Movement Primitives (MP) versions. + If you only want to register MP versions for an already registered environment, use fancy_gym.upgrade instead. + + Args: + id (str): The unique identifier for the environment. + entry_point (Optional[Union[Callable, str]]): The entry point for creating the environment. + mp_wrapper (RawInterfaceWrapper): The MP wrapper for the environment. + register_step_based (bool): Whether to also register the raw srtep-based version of the environment (default True). + add_mp_types (List[str]): List of additional MP types to register. + mp_config_override (Dict[str, Any]): Dictionary for overriding MP configuration. + **kwargs: Additional keyword arguments which are passed to the environment constructor. + + Notes: + - When `register_step_based` is True, the raw environment will also be registered to gymnasium otherwise only mp-versions will be registered. + - `entry_point` can be given as a string, allowing the same notation as gymnasium. + - If `id` already exists in the Gymnasium registry and `register_step_based` is True, + a warning message will be printed, suggesting to set `register_step_based=False` or use `fancy_gym.upgrade`. + + Example: + To register a step-based environment with Movement Primitive versions (will use default mp_wrapper): + >>> register("MyEnv-v0", MyEnvClass"my_module:MyEnvClass") + + The entry point can also be provided as a string: + >>> register("MyEnv-v0", "my_module:MyEnvClass") + + """ if register_step_based and id in gym_registry: print(f'[Info] Gymnasium env with id "{id}" already exists. You should supply register_step_based=False or use fancy_gym.upgrade if you only want to register mp versions of an existing env.') if register_step_based: @@ -135,23 +162,48 @@ def register( def upgrade( - id, - mp_wrapper=DefaultMPWrapper, - add_mp_types=KNOWN_MPS, - base_id=None, - mp_config_override={}, + id: str, + mp_wrapper: RawInterfaceWrapper = DefaultMPWrapper, + add_mp_types: List[str] = KNOWN_MPS, + base_id: Optional[str] = None, + mp_config_override: Dict[str, Any] = {}, ): + """ + Upgrades an existing Gymnasium environment to include Movement Primitives (MP) versions. + We expect the raw step-based env to be already registered with gymnasium. Otherwise please use fancy_gym.register instead. + + Args: + id (str): The unique identifier for the environment. + mp_wrapper (RawInterfaceWrapper): The MP wrapper for the environment (default is DefaultMPWrapper). + add_mp_types (List[str]): List of additional MP types to register (default is KNOWN_MPS). + base_id (Optional[str]): The unique identifier for the environment to upgrade. Will use id if non is provided. Can be defined to allow multiple registrations of different versions for the same step-based environment. + mp_config_override (Dict[str, Any]): Dictionary for overriding MP configuration. + + Notes: + - The `id` parameter should match the ID of the existing Gymnasium environment you wish to upgrade. You can also pick a new one, but then `base_id` needs to be provided. + - The `mp_wrapper` parameter specifies the MP wrapper to use, allowing for customization. + - `add_mp_types` can be used to specify additional MP types to register alongside the base environment. + - The `base_id` parameter should match the ID of the existing Gymnasium environment you wish to upgrade. + - `mp_config_override` allows for customizing MP configuration if needed. + + Example: + To upgrade an existing environment with MP versions: + >>> upgrade("MyEnv-v0", mp_wrapper=CustomMPWrapper) + + To upgrade an existing environment with custom MP types and configuration: + >>> upgrade("MyEnv-v0", mp_wrapper=CustomMPWrapper, add_mp_types=["ProDMP", "DMP"], mp_config_override={"param": 42}) + """ if not base_id: base_id = id register_mps(id, base_id, mp_wrapper, add_mp_types, mp_config_override) -def register_mps(id, base_id, mp_wrapper, add_mp_types=KNOWN_MPS, mp_config_override={}): +def register_mps(id: str, base_id: str, mp_wrapper: RawInterfaceWrapper, add_mp_types: List[str] = KNOWN_MPS, mp_config_override: Dict[str, Any] = {}): for mp_type in add_mp_types: register_mp(id, base_id, mp_wrapper, mp_type, mp_config_override.get(mp_type, {})) -def register_mp(id, base_id, mp_wrapper, mp_type, mp_config_override={}): +def register_mp(id: str, base_id: str, mp_wrapper: RawInterfaceWrapper, mp_type: List[str], mp_config_override: Dict[str, Any] = {}): assert mp_type in KNOWN_MPS, 'Unknown mp_type' assert id not in ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS[mp_type], f'The environment {id} is already registered for {mp_type}.' From 8bba78b813939c78ec38d1df11b2fc1339acd165 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 29 Sep 2023 20:36:36 +0200 Subject: [PATCH 174/198] Updated dmc README --- fancy_gym/dmc/README.MD | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fancy_gym/dmc/README.MD b/fancy_gym/dmc/README.MD index 040a9a0..a360e44 100644 --- a/fancy_gym/dmc/README.MD +++ b/fancy_gym/dmc/README.MD @@ -1,7 +1,7 @@ # DeepMind Control (DMC) Wrappers -These are the Environment Wrappers for selected -[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) +These are the Environment Wrappers for selected +[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) environments in order to use our Motion Primitive gym interface with them. ## MP Environments @@ -9,11 +9,11 @@ environments in order to use our Motion Primitive gym interface with them. [//]: <> (These environments are wrapped-versions of their Deep Mind Control Suite (DMC) counterparts. Given most task can be) [//]: <> (solved in shorter horizon lengths than the original 1000 steps, we often shorten the episodes for those task.) -|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension -|---|---|---|---|---| -|`dmc_ball_in_cup-catch_promp-v0`| A ProMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2 -|`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000| 10 | 2 -|`dmc_reacher-easy_promp-v0`| A ProMP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4 -|`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4 -|`dmc_reacher-hard_promp-v0`| A ProMP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4 -|`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4 +| Name | Description | Trajectory Horizon | Action Dimension | Context Dimension | +| ---------------------------------------- | ------------------------------------------------------------------------------ | ------------------ | ---------------- | ----------------- | +| `dm_control_ProDMP/ball_in_cup-catch-v0` | A ProMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2 | +| `dm_control_DMP/ball_in_cup-catch-v0` | A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2 | +| `dm_control_ProDMP/reacher-easy-v0` | A ProMP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4 | +| `dm_control_DMP/reacher-easy-v0` | A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4 | +| `dm_control_ProDMP/reacher-hard-v0` | A ProMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4 | +| `dm_control_DMP/reacher-hard-v0` | A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4 | From e55905a8246d325ae463bcb48082116197d13827 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 29 Sep 2023 20:39:11 +0200 Subject: [PATCH 175/198] Improved and Updated README for custom mujoco envs --- fancy_gym/envs/mujoco/README.MD | 57 ++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/fancy_gym/envs/mujoco/README.MD b/fancy_gym/envs/mujoco/README.MD index 0ea5a1f..ff74085 100644 --- a/fancy_gym/envs/mujoco/README.MD +++ b/fancy_gym/envs/mujoco/README.MD @@ -1,15 +1,48 @@ # Custom Mujoco tasks ## Step-based Environments -|Name| Description|Horizon|Action Dimension|Observation Dimension -|---|---|---|---|---| -|`ALRReacher-v0`|Modified (5 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 5 | 21 -|`ALRReacherSparse-v0`|Same as `ALRReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 5 | 21 -|`ALRReacherSparseBalanced-v0`|Same as `ALRReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 5 | 21 -|`ALRLongReacher-v0`|Modified (7 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 7 | 27 -|`ALRLongReacherSparse-v0`|Same as `ALRLongReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 7 | 27 -|`ALRLongReacherSparseBalanced-v0`|Same as `ALRLongReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 7 | 27 -|`ALRBallInACupSimple-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector. | 4000 | 3 | wip -|`ALRBallInACup-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector | 4000 | 7 | wip -|`ALRBallInACupGoal-v0`| Similar to `ALRBallInACupSimple-v0` but the ball needs to be caught at a specified goal position | 4000 | 7 | wip - \ No newline at end of file + +| Name | Description | Horizon | Action Dimension | Observation Dimension | +| ------------------------------------------ | -------------------------------------------------------------------------------------------------- | ------- | ---------------- | --------------------- | +| `fancy/Reacher-v0` | Modified (5 links) gymnasiums's mujoco `Reacher-v2` (2 links) | 200 | 5 | 21 | +| `fancy/ReacherSparse-v0` | Same as `fancy/Reacher-v0`, but the distance penalty is only provided in the last time step. | 200 | 5 | 21 | +| `fancy/ReacherSparseBalanced-v0` | Same as `fancy/ReacherSparse-v0`, but the end-effector has to remain upright. | 200 | 5 | 21 | +| `fancy/LongReacher-v0` | Modified (7 links) gymnasiums's mujoco `Reacher-v2` (2 links) | 200 | 7 | 27 | +| `fancy/LongReacherSparse-v0` | Same as `fancy/LongReacher-v0`, but the distance penalty is only provided in the last time step. | 200 | 7 | 27 | +| `fancy/LongReacherSparseBalanced-v0` | Same as `fancy/LongReacherSparse-v0`, but the end-effector has to remain upright. | 200 | 7 | 27 | +| `fancy/Reacher5d-v0` | Reacher task with 5 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 5 | 20 | +| `fancy/Reacher5dSparse-v0` | Sparse Reacher task with 5 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 5 | 20 | +| `fancy/Reacher7d-v0` | Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 | +| `fancy/Reacher7dSparse-v0` | Sparse Reacher task with 7 links, based on Gymnasium's `gym.envs.mujoco.ReacherEnv` | 200 | 7 | 22 | +| `fancy/HopperJumpSparse-v0` | Hopper Jump task with sparse rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* | +| `fancy/HopperJump-v0` | Hopper Jump task with continuous rewards, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 15 / 16\* | +| `fancy/AntJump-v0` | Ant Jump task, based on Gymnasium's `gym.envs.mujoco.Ant` | 200 | 8 | 119 | +| `fancy/HalfCheetahJump-v0` | HalfCheetah Jump task, based on Gymnasium's `gym.envs.mujoco.HalfCheetah` | 100 | 6 | 112 | +| `fancy/HopperJumpOnBox-v0` | Hopper Jump on Box task, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 4 | 16 / 100\* | +| `fancy/HopperThrow-v0` | Hopper Throw task, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 18 / 100\* | +| `fancy/HopperThrowInBasket-v0` | Hopper Throw in Basket task, based on Gymnasium's `gym.envs.mujoco.Hopper` | 250 | 3 | 18 / 100\* | +| `fancy/Walker2DJump-v0` | Walker 2D Jump task, based on Gymnasium's `gym.envs.mujoco.Walker2d` | 300 | 6 | 18 / 19\* | +| `fancy/BeerPong-v0` | Beer Pong task, based on a custom environment with multiple task variations | 300 | 3 | 29 | +| `fancy/BeerPongStepBased-v0` | Step-based Beer Pong task, based on a custom environment with episodic rewards | 300 | 3 | 29 | +| `fancy/BeerPongFixedRelease-v0` | Beer Pong with fixed release, based on a custom environment with episodic rewards | 300 | 3 | 29 | +| `fancy/BoxPushingDense-v0` | Custom Box-pushing task with dense rewards | 100 | 3 | 13 | +| `fancy/BoxPushingTemporalSparse-v0` | Custom Box-pushing task with temporally sparse rewards | 100 | 3 | 13 | +| `fancy/BoxPushingTemporalSpatialSparse-v0` | Custom Box-pushing task with temporally and spatially sparse rewards | 100 | 3 | 13 | +| `fancy/TableTennis2D-v0` | Table Tennis task with 2D context, based on a custom environment for table tennis | 350 | 7 | 19 | +| `fancy/TableTennis2DReplan-v0` | Table Tennis task with 2D context and replanning, based on a custom environment for table tennis | 350 | 7 | 19 | +| `fancy/TableTennis4D-v0` | Table Tennis task with 4D context, based on a custom environment for table tennis | 350 | 7 | 22 | +| `fancy/TableTennis4DReplan-v0` | Table Tennis task with 4D context and replanning, based on a custom environment for table tennis | 350 | 7 | 22 | +| `fancy/TableTennisWind-v0` | Table Tennis task with wind effects, based on a custom environment for table tennis | 350 | 7 | 19 | +| `fancy/TableTennisGoalSwitching-v0` | Table Tennis task with goal switching, based on a custom environment for table tennis | 350 | 7 | 19 | +| `fancy/TableTennisWindReplan-v0` | Table Tennis task with wind effects and replanning, based on a custom environment for table tennis | 350 | 7 | 19 | + +\*Observation dimensions depend on configuration. + + From e3055568e859637b1296889b5387bb9df6f7ba58 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 29 Sep 2023 20:39:42 +0200 Subject: [PATCH 176/198] Improved README for classical controll envs --- fancy_gym/envs/classic_control/README.MD | 26 +++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fancy_gym/envs/classic_control/README.MD b/fancy_gym/envs/classic_control/README.MD index bd1b68b..b714554 100644 --- a/fancy_gym/envs/classic_control/README.MD +++ b/fancy_gym/envs/classic_control/README.MD @@ -1,18 +1,20 @@ ### Classic Control ## Step-based Environments -|Name| Description|Horizon|Action Dimension|Observation Dimension -|---|---|---|---|---| -|`SimpleReacher-v0`| Simple reaching task (2 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 2 | 9 -|`LongSimpleReacher-v0`| Simple reaching task (5 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 5 | 18 -|`ViaPointReacher-v0`| Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively.| 200 | 5 | 18 -|`HoleReacher-v0`| 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18 + +| Name | Description | Horizon | Action Dimension | Observation Dimension | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | ---------------- | --------------------- | +| `fancy/SimpleReacher-v0` | Simple reaching task (2 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory. | 200 | 2 | 9 | +| `fancy/LongSimpleReacher-v0` | Simple reaching task (5 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory. | 200 | 5 | 18 | +| `fancy/ViaPointReacher-v0` | Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively. | 200 | 5 | 18 | +| `fancy/HoleReacher-v0` | 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18 | ## MP Environments -|Name| Description|Horizon|Action Dimension|Context Dimension -|---|---|---|---|---| -|`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25 -|`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25 -|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 -[//]: |`HoleReacherProMPP-v0`| \ No newline at end of file +| Name | Description | Horizon | Action Dimension | Context Dimension | +| ----------------------------------- | -------------------------------------------------------------------------------------------------------- | ------- | ---------------- | ----------------- | +| `fancy_DMP/ViaPointReacher-v0` | A DMP provides a trajectory for the `fancy/ViaPointReacher-v0` task. | 200 | 25 | +| `fancy_DMP/HoleReacherFixedGoal-v0` | A DMP provides a trajectory for the `fancy/HoleReacher-v0` task with a fixed goal attractor. | 200 | 25 | +| `fancy_DMP/HoleReacher-v0` | A DMP provides a trajectory for the `fancy/HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 | + +[//]: |`fancy/HoleReacherProMPP-v0`| From a803d92dd2ffa325647a3d9e58e1716e35413d28 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 30 Sep 2023 16:31:23 +0200 Subject: [PATCH 177/198] Slighly better docs for OpenAI envs --- fancy_gym/open_ai/README.MD | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fancy_gym/open_ai/README.MD b/fancy_gym/open_ai/README.MD index 62d1f20..21d297f 100644 --- a/fancy_gym/open_ai/README.MD +++ b/fancy_gym/open_ai/README.MD @@ -4,11 +4,12 @@ These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai. the Motion Primitive gym interface for them. ## MP Environments + These environments are wrapped-versions of their OpenAI-gym counterparts. -|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension -|---|---|---|---|---| -|`ContinuousMountainCarProMP-v0`| A ProMP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 -|`ReacherProMP-v2`| A ProMP wrapped version of the Reacher-v2 environment. | 50 | 2 -|`FetchSlideDenseProMP-v1`| A ProMP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 -|`FetchReachDenseProMP-v1`| A ProMP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 +| Name | Description | Trajectory Horizon | Action Dimension | Context Dimension | +| ------------------------------------ | -------------------------------------------------------------------- | ------------------ | ---------------- | ----------------- | +| `gym_ProMP/ContinuousMountainCar-v0` | A ProMP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 | +| `gym_ProMP/Reacher-v2` | A ProMP wrapped version of the Reacher-v2 environment. | 50 | 2 | +| `gym_ProMP/FetchSlideDense-v1` | A ProMP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 | +| `gym_ProMP/FetchReachDense-v1` | A ProMP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 | From e633b1eeeb31f58bfc42672ee4dd87dde87366ce Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 30 Sep 2023 16:55:22 +0200 Subject: [PATCH 178/198] Fixed open_ai README --- fancy_gym/open_ai/README.MD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fancy_gym/open_ai/README.MD b/fancy_gym/open_ai/README.MD index 21d297f..1db09ff 100644 --- a/fancy_gym/open_ai/README.MD +++ b/fancy_gym/open_ai/README.MD @@ -7,8 +7,8 @@ the Motion Primitive gym interface for them. These environments are wrapped-versions of their OpenAI-gym counterparts. -| Name | Description | Trajectory Horizon | Action Dimension | Context Dimension | -| ------------------------------------ | -------------------------------------------------------------------- | ------------------ | ---------------- | ----------------- | +| Name | Description | Trajectory Horizon | Action Dimension | +| ------------------------------------ | -------------------------------------------------------------------- | ------------------ | ---------------- | | `gym_ProMP/ContinuousMountainCar-v0` | A ProMP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 | | `gym_ProMP/Reacher-v2` | A ProMP wrapped version of the Reacher-v2 environment. | 50 | 2 | | `gym_ProMP/FetchSlideDense-v1` | A ProMP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 | From b3de71e0d6734f115c8d16efe0b5229a2d0bc9fe Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 30 Sep 2023 16:55:50 +0200 Subject: [PATCH 179/198] Expanded README for Metaworld (Thanks to ChatGPT for helping) --- fancy_gym/meta/README.MD | 88 ++++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/fancy_gym/meta/README.MD b/fancy_gym/meta/README.MD index 1664cb0..197ad6e 100644 --- a/fancy_gym/meta/README.MD +++ b/fancy_gym/meta/README.MD @@ -1,26 +1,64 @@ -# MetaWorld Wrappers +# Metaworld -These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Movement Primitive gym interface with them. -All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized. -Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following: -```python - return np.hstack([ - # Current observation - [False] * 3, # end-effector position - [False] * 1, # normalized gripper open distance - [True] * 3, # main object position - [False] * 4, # main object quaternion - [False] * 3, # secondary object position - [False] * 4, # secondary object quaternion - # Previous observation - [False] * 3, # previous end-effector position - [False] * 1, # previous normalized gripper open distance - [False] * 3, # previous main object position - [False] * 4, # previous main object quaternion - [False] * 3, # previous second object position - [False] * 4, # previous second object quaternion - # Goal - [True] * 3, # goal position - ]) -``` -For other tasks only the boolean values have to be adjusted accordingly. \ No newline at end of file +[Metaworld](https://meta-world.github.io/) is an open-source simulated benchmark designed to advance meta-reinforcement learning and multi-task learning, comprising 50 diverse robotic manipulation tasks. The benchmark features a universal tabletop environment equipped with a simulated Sawyer arm and a variety of everyday objects. This shared environment is pivotal for reusing structured learning and efficiently acquiring related tasks. + +## Step-Based Envs + +`fancy_gym` makes all metaworld ML1 tasks avaible via the standard gym interface. + +| Name | Description | Horizon | Action Dimension | Observation Dimension | Context Dimension | +| ---------------------------------------- | ------------------------------------------------------------------------------------- | ------- | ---------------- | --------------------- | ----------------- | +| `metaworld/assembly-v2` | A task where the robot must assemble components. | 150 | 4 | 39 | 6 | +| `metaworld/basketball-v2` | A task where the robot must play a game of basketball. | 150 | 4 | 39 | 6 | +| `metaworld/bin-picking-v2` | A task involving the robot picking objects from a bin. | 150 | 4 | 39 | 6 | +| `metaworld/box-close-v2` | A task requiring the robot to close a box. | 150 | 4 | 39 | 6 | +| `metaworld/button-press-topdown-v2` | A task where the robot must press a button from a top-down perspective. | 150 | 4 | 39 | 6 | +| `metaworld/button-press-topdown-wall-v2` | A task involving the robot pressing a button with a wall from a top-down perspective. | 150 | 4 | 39 | 6 | +| `metaworld/button-press-v2` | A task where the robot must press a button. | 150 | 4 | 39 | 6 | +| `metaworld/button-press-wall-v2` | A task involving the robot pressing a button with a wall. | 150 | 4 | 39 | 6 | +| `metaworld/coffee-button-v2` | A task where the robot must press a button on a coffee machine. | 150 | 4 | 39 | 6 | +| `metaworld/coffee-pull-v2` | A task involving the robot pulling a lever on a coffee machine. | 150 | 4 | 39 | 6 | +| `metaworld/coffee-push-v2` | A task involving the robot pushing a component on a coffee machine. | 150 | 4 | 39 | 6 | +| `metaworld/dial-turn-v2` | A task where the robot must turn a dial. | 150 | 4 | 39 | 6 | +| `metaworld/disassemble-v2` | A task requiring the robot to disassemble an object. | 150 | 4 | 39 | 6 | +| `metaworld/door-close-v2` | A task where the robot must close a door. | 150 | 4 | 39 | 6 | +| `metaworld/door-lock-v2` | A task involving the robot locking a door. | 150 | 4 | 39 | 6 | +| `metaworld/door-open-v2` | A task where the robot must open a door. | 150 | 4 | 39 | 6 | +| `metaworld/door-unlock-v2` | A task involving the robot unlocking a door. | 150 | 4 | 39 | 6 | +| `metaworld/hand-insert-v2` | A task requiring the robot to insert a hand into an object. | 150 | 4 | 39 | 6 | +| `metaworld/drawer-close-v2` | A task where the robot must close a drawer. | 150 | 4 | 39 | 6 | +| `metaworld/drawer-open-v2` | A task involving the robot opening a drawer. | 150 | 4 | 39 | 6 | +| `metaworld/faucet-open-v2` | A task requiring the robot to open a faucet. | 150 | 4 | 39 | 6 | +| `metaworld/faucet-close-v2` | A task where the robot must close a faucet. | 150 | 4 | 39 | 6 | +| `metaworld/hammer-v2` | A task where the robot must use a hammer. | 150 | 4 | 39 | 6 | +| `metaworld/handle-press-side-v2` | A task involving the robot pressing a handle from the side. | 150 | 4 | 39 | 6 | +| `metaworld/handle-press-v2` | A task where the robot must press a handle. | 150 | 4 | 39 | 6 | +| `metaworld/handle-pull-side-v2` | A task requiring the robot to pull a handle from the side. | 150 | 4 | 39 | 6 | +| `metaworld/handle-pull-v2` | A task where the robot must pull a handle. | 150 | 4 | 39 | 6 | +| `metaworld/lever-pull-v2` | A task involving the robot pulling a lever. | 150 | 4 | 39 | 6 | +| `metaworld/peg-insert-side-v2` | A task requiring the robot to insert a peg from the side. | 150 | 4 | 39 | 6 | +| `metaworld/pick-place-wall-v2` | A task involving the robot picking and placing an object with a wall. | 150 | 4 | 39 | 6 | +| `metaworld/pick-out-of-hole-v2` | A task where the robot must pick an object out of a hole. | 150 | 4 | 39 | 6 | +| `metaworld/reach-v2` | A task where the robot must reach an object. | 150 | 4 | 39 | 6 | +| `metaworld/push-back-v2` | A task involving the robot pushing an object backward. | 150 | 4 | 39 | 6 | +| `metaworld/push-v2` | A task where the robot must push an object. | 150 | 4 | 39 | 6 | +| `metaworld/pick-place-v2` | A task involving the robot picking up and placing an object. | 150 | 4 | 39 | 6 | +| `metaworld/plate-slide-v2` | A task requiring the robot to slide a plate. | 150 | 4 | 39 | 6 | +| `metaworld/plate-slide-side-v2` | A task involving the robot sliding a plate from the side. | 150 | 4 | 39 | 6 | +| `metaworld/plate-slide-back-v2` | A task where the robot must slide a plate backward. | 150 | 4 | 39 | 6 | +| `metaworld/plate-slide-back-side-v2` | A task involving the robot sliding a plate backward from the side. | 150 | 4 | 39 | 6 | +| `metaworld/peg-unplug-side-v2` | A task where the robot must unplug a peg from the side. | 150 | 4 | 39 | 6 | +| `metaworld/soccer-v2` | A task where the robot must play soccer. | 150 | 4 | 39 | 6 | +| `metaworld/stick-push-v2` | A task involving the robot pushing a stick. | 150 | 4 | 39 | 6 | +| `metaworld/stick-pull-v2` | A task where the robot must pull a stick. | 150 | 4 | 39 | 6 | +| `metaworld/push-wall-v2` | A task involving the robot pushing against a wall. | 150 | 4 | 39 | 6 | +| `metaworld/reach-wall-v2` | A task where the robot must reach an object with a wall. | 150 | 4 | 39 | 6 | +| `metaworld/shelf-place-v2` | A task involving the robot placing an object on a shelf. | 150 | 4 | 39 | 6 | +| `metaworld/sweep-into-v2` | A task where the robot must sweep objects into a container. | 150 | 4 | 39 | 6 | +| `metaworld/sweep-v2` | A task requiring the robot to sweep. | 150 | 4 | 39 | 6 | +| `metaworld/window-open-v2` | A task where the robot must open a window. | 150 | 4 | 39 | 6 | +| `metaworld/window-close-v2` | A task involving the robot closing a window. | 150 | 4 | 39 | 6 | + +## MP-Based Envs + +All envs also exist in MP-variants. Refer to them using `metaworld_ProMP/` or `metaworld_ProDMP/` (DMP is currently not supported as of now). From 27f3a824c6c429f7c4149aa3027ec923f354ccd4 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 30 Sep 2023 17:27:11 +0200 Subject: [PATCH 180/198] Tiny update to Metaworld README --- fancy_gym/meta/README.MD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/meta/README.MD b/fancy_gym/meta/README.MD index 197ad6e..c9700ba 100644 --- a/fancy_gym/meta/README.MD +++ b/fancy_gym/meta/README.MD @@ -4,7 +4,7 @@ ## Step-Based Envs -`fancy_gym` makes all metaworld ML1 tasks avaible via the standard gym interface. +`fancy_gym` makes all metaworld ML1 tasks avaible via the standard gym interface. To access metaworld environments using a different mode of operation (MT1 / ML100 / etc.) please use the functionality provided by metaworld directly. | Name | Description | Horizon | Action Dimension | Observation Dimension | Context Dimension | | ---------------------------------------- | ------------------------------------------------------------------------------------- | ------- | ---------------- | --------------------- | ----------------- | From 1eeccff35f7de85928dfa685df4f663724e96135 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 30 Sep 2023 18:46:50 +0200 Subject: [PATCH 181/198] README: Justify text about MP envs (looks way better) --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 85c60ce..7400206 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,13 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maint ## Movement Primitive Environments (Episode-Based/Black-Box Environments) +

Movement primitive (MP) environments differ from traditional step-based environments. They align more with concepts from stochastic search, black-box optimization, and methods commonly found in classical robotics and control. Instead of individual steps, MP environments operate on an episode basis, executing complete trajectories. These trajectories are produced by trajectory generators like Dynamic Movement Primitives (DMP), Probabilistic Movement Primitives (ProMP) or Probabilistic Dynamic Movement Primitives (ProDMP). Once generated, these trajectories are converted into step-by-step actions using a trajectory tracking controller. The specific controller chosen depends on the environment's requirements. Currently, we support position, velocity, and PD-Controllers tailored for position, velocity, and torque control. Additionally, we have a specialized controller designed for the MetaWorld control suite. While the overarching objective of MP environments remains the learning of an optimal policy, the actions here represent the parametrization of motion primitives to craft the right trajectory. Our framework further enhances this by accommodating a contextual setting. At the episode's onset, we present the context space—a subset of the observation space. This demands the prediction of a new action or MP parametrization for every unique context. +

## Installation From 0ae87088d9d3fdf9b1c12c5dd36254867d8f01eb Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 30 Sep 2023 18:48:10 +0200 Subject: [PATCH 182/198] README: Better Justifycation of text --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7400206..ff0d3c2 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,11 @@ Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maint

Movement primitive (MP) environments differ from traditional step-based environments. They align more with concepts from stochastic search, black-box optimization, and methods commonly found in classical robotics and control. Instead of individual steps, MP environments operate on an episode basis, executing complete trajectories. These trajectories are produced by trajectory generators like Dynamic Movement Primitives (DMP), Probabilistic Movement Primitives (ProMP) or Probabilistic Dynamic Movement Primitives (ProDMP). - +

+

Once generated, these trajectories are converted into step-by-step actions using a trajectory tracking controller. The specific controller chosen depends on the environment's requirements. Currently, we support position, velocity, and PD-Controllers tailored for position, velocity, and torque control. Additionally, we have a specialized controller designed for the MetaWorld control suite. - +

+

While the overarching objective of MP environments remains the learning of an optimal policy, the actions here represent the parametrization of motion primitives to craft the right trajectory. Our framework further enhances this by accommodating a contextual setting. At the episode's onset, we present the context space—a subset of the observation space. This demands the prediction of a new action or MP parametrization for every unique context.

From 9fee527dbeafbc94276cbfe46c888eacbee90c91 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 11:36:36 +0200 Subject: [PATCH 183/198] Fix: Typo in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a0ee43c..28c1a28 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ extras = { 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'], 'box2d': ['gymnasium[box2d]>=0.26.0'], 'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'], - 'mujoco-legacy' = ['mujoco-py >=2.1,<2.2', 'cython<3'], + 'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'], 'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"], } From 3180154fa6389420b92d18181294f1ae25e58fae Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 11:37:46 +0200 Subject: [PATCH 184/198] Added docstring to deprecated 'fancy_gym.make' --- fancy_gym/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index c646aef..c3adaad 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -11,4 +11,7 @@ ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS def make(*args, **kwargs): + """ + As part of the refactor of Fancy Gym and upgrade to gymnasium the use of fancy_gym.make has been discontinued. Regular gym.make should be used instead. For more details check out the github README. If your codebase was build for older versions of Fancy Gym and relies on the old behavior and dependency versions, please check out the legacy branch. + """ raise Exception('As part of the refactor of Fancy Gym and upgrade to gymnasium the use of fancy_gym.make has been discontinued. Regular gym.make should be used instead. For more details check out the github README. If your codebase was build for older versions of Fancy Gym and relies on the old behavior and dependency versions, please check out the legacy branch.') From fcc79cdde0d286badc4f3f564b066946ddea9fb9 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 11:38:42 +0200 Subject: [PATCH 185/198] Added Hongyi & Hongyi as authors to setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 28c1a28..682bc7a 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]: setup( - author='Fabian Otto, Onur Celik', + author='Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou', name='fancy_gym', version='0.4', classifiers=[ From 5f3c28e8f0a589f38d1bdaa7872c5e92bc70d151 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 11:56:51 +0200 Subject: [PATCH 186/198] Added bibtex for citations to README --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index ff0d3c2..eb702c1 100644 --- a/README.md +++ b/README.md @@ -266,6 +266,20 @@ for i in range(5): observation, info = env.reset() ``` +## Citing the Project + +To cite this repository in publications: + +```bibtex +@software{fancy_gym, + title = {Fancy {Gym}}, + author = {Otto, Fabian and Celik, Onur and Roth, Dominik and Zhou, Hongyi}, + abstract = {Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches.}, + url = {https://github.com/ALRhub/fancy_gym}, + organization = {Autonomous Learning Robots Lab (ALR) @ KIT}, +} +``` + ## Icon Attribution The icon is based on the [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) icon as can be found [here](https://gymnasium.farama.org/_static/img/gymnasium_black.svg). From bc150f5a4d77e67f4612d7b3e194db2f8ca300c2 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 11:59:02 +0200 Subject: [PATCH 187/198] Fix: Bibtex in README mixed between tabs & spaces --- README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index eb702c1..8b82975 100644 --- a/README.md +++ b/README.md @@ -186,14 +186,13 @@ class RawInterfaceWrapper(gym.Wrapper): @property def context_mask(self) -> np.ndarray: """ - Returns boolean mask of the same shape as the observation space. - It determines whether the observation is returned for the contextual case or not. - This effectively allows to filter unwanted or unnecessary observations from the full step-based case. - E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the - context/part of the first observation, the velocities are not necessary in the observation for the task. - Returns: - bool array representing the indices of the observations - + Returns boolean mask of the same shape as the observation space. + It determines whether the observation is returned for the contextual case or not. + This effectively allows to filter unwanted or unnecessary observations from the full step-based case. + E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the + context/part of the first observation, the velocities are not necessary in the observation for the task. + Returns: + bool array representing the indices of the observations """ return np.ones(self.env.observation_space.shape[0], dtype=bool) @@ -276,7 +275,7 @@ To cite this repository in publications: author = {Otto, Fabian and Celik, Onur and Roth, Dominik and Zhou, Hongyi}, abstract = {Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches.}, url = {https://github.com/ALRhub/fancy_gym}, - organization = {Autonomous Learning Robots Lab (ALR) @ KIT}, + organization = {Autonomous Learning Robots Lab (ALR) @ KIT}, } ``` From 757072c7625fdd7e992b51adefc47780cadbf01a Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 12:35:23 +0200 Subject: [PATCH 188/198] Documnenting merging behavior for mp_configs (and remove TODOs) --- README.md | 30 +++++++++++++++++++++++++++++- fancy_gym/envs/registry.py | 3 --- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8b82975..3f2ea95 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ import numpy as np class RawInterfaceWrapper(gym.Wrapper): - mp_config = { # Default configurations for MPs can be overitten by defining attributes here. + mp_config = { 'ProMP': {}, 'DMP': {}, 'ProDMP': {}, @@ -222,6 +222,34 @@ class RawInterfaceWrapper(gym.Wrapper): ``` +Default configurations for MPs can be overitten by defining attributes in mp_config. +Available parameters are documented in the [MP_PyTorch Userguide](https://github.com/ALRhub/MP_PyTorch/blob/main/doc/README.md). + +```python +class RawInterfaceWrapper(gym.Wrapper): + mp_config = { + 'ProMP': { + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + # When selecting another generator type, the default configuration will not be merged for the attribute. + }, + 'controller_kwargs': { + 'p_gains': 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + 'd_gains': 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]), + }, + 'basis_generator_kwargs': { + 'num_basis': 3, + 'num_basis_zero_start': 1, + 'num_basis_zero_goal': 1, + }, + }, + 'DMP': {}, + 'ProDMP': {}. + } + + [...] +``` + If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well. Without the integration the task can still be used. A rough outline can be shown here, for more details we recommend having a look at the [examples](fancy_gym/examples/). diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index c3eb896..4736cf4 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -8,7 +8,6 @@ from collections import defaultdict from collections.abc import Mapping, MutableMapping from fancy_gym.utils.make_env_helpers import make_bb -# from fancy_gym.utils.utils import nested_update from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from gymnasium import register as gym_register @@ -238,8 +237,6 @@ def register_mp(id: str, base_id: str, mp_wrapper: RawInterfaceWrapper, mp_type: MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns][mp_type].append(fancy_id) MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS[ns]['all'].append(fancy_id) -# TODO: Apply inherit_defaults: False to appropiate places and remove this... - def nested_update(base: MutableMapping, update): """ From 3317ffb06d9f90f7e15f9e8a84c7204afd52d193 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 12:39:11 +0200 Subject: [PATCH 189/198] README: @ is not a legal symbol in BibTex attribs --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3f2ea95..dc64694 100644 --- a/README.md +++ b/README.md @@ -299,11 +299,11 @@ To cite this repository in publications: ```bibtex @software{fancy_gym, - title = {Fancy {Gym}}, + title = {Fancy Gym}, author = {Otto, Fabian and Celik, Onur and Roth, Dominik and Zhou, Hongyi}, abstract = {Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches.}, url = {https://github.com/ALRhub/fancy_gym}, - organization = {Autonomous Learning Robots Lab (ALR) @ KIT}, + organization = {Autonomous Learning Robots Lab (ALR) at KIT}, } ``` From db5e0329876a5a42be253613bf1151b909c05a79 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 12:41:38 +0200 Subject: [PATCH 190/198] Development status alpha -> beta; version to 1.0 (for pub release) --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 682bc7a..1daa568 100644 --- a/setup.py +++ b/setup.py @@ -35,9 +35,9 @@ def find_package_data(extensions_to_include: List[str]) -> List[str]: setup( author='Fabian Otto, Onur Celik, Dominik Roth, Hongyi Zhou', name='fancy_gym', - version='0.4', + version='1.0', classifiers=[ - 'Development Status :: 3 - Alpha', + 'Development Status :: 4 - Beta', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: MIT License', 'Natural Language :: English', From 980bb0de6b7886c8225477ae0ff44f1832fee3a0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 12:46:34 +0200 Subject: [PATCH 191/198] Add docstring to DefaultMPWrapper --- fancy_gym/envs/registry.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fancy_gym/envs/registry.py b/fancy_gym/envs/registry.py index 4736cf4..321996f 100644 --- a/fancy_gym/envs/registry.py +++ b/fancy_gym/envs/registry.py @@ -18,6 +18,15 @@ from gymnasium.envs.registration import registry as gym_registry class DefaultMPWrapper(RawInterfaceWrapper): @property def context_mask(self): + """ + Returns boolean mask of the same shape as the observation space. + It determines whether the observation is returned for the contextual case or not. + This effectively allows to filter unwanted or unnecessary observations from the full step-based case. + E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the + context/part of the first observation, the velocities are not necessary in the observation for the task. + Returns: + bool array representing the indices of the observations + """ # If the env already defines a context_mask, we will use that if hasattr(self.env, 'context_mask'): return self.env.context_mask @@ -27,11 +36,25 @@ class DefaultMPWrapper(RawInterfaceWrapper): @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + """ + Returns the current position of the action/control dimension. + The dimensionality has to match the action/control dimension. + This is not required when exclusively using velocity control, + it should, however, be implemented regardless. + E.g. The joint positions that are directly or indirectly controlled by the action. + """ assert hasattr(self.env, 'current_pos'), 'DefaultMPWrapper was unable to access env.current_pos. Please write a custom MPWrapper (recommended) or expose this attribute directly.' return self.env.current_pos @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + """ + Returns the current velocity of the action/control dimension. + The dimensionality has to match the action/control dimension. + This is not required when exclusively using position control, + it should, however, be implemented regardless. + E.g. The joint velocities that are directly or indirectly controlled by the action. + """ assert hasattr(self.env, 'current_vel'), 'DefaultMPWrapper was unable to access env.current_vel. Please write a custom MPWrapper (recommended) or expose this attribute directly.' return self.env.current_vel From 877a7ea6ba321d91973820d1ae405a82d03c20b8 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 12:50:55 +0200 Subject: [PATCH 192/198] Fix: Wrong horizon for Metaworld tasks --- fancy_gym/meta/README.MD | 100 +++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/fancy_gym/meta/README.MD b/fancy_gym/meta/README.MD index c9700ba..9ec5594 100644 --- a/fancy_gym/meta/README.MD +++ b/fancy_gym/meta/README.MD @@ -8,56 +8,56 @@ | Name | Description | Horizon | Action Dimension | Observation Dimension | Context Dimension | | ---------------------------------------- | ------------------------------------------------------------------------------------- | ------- | ---------------- | --------------------- | ----------------- | -| `metaworld/assembly-v2` | A task where the robot must assemble components. | 150 | 4 | 39 | 6 | -| `metaworld/basketball-v2` | A task where the robot must play a game of basketball. | 150 | 4 | 39 | 6 | -| `metaworld/bin-picking-v2` | A task involving the robot picking objects from a bin. | 150 | 4 | 39 | 6 | -| `metaworld/box-close-v2` | A task requiring the robot to close a box. | 150 | 4 | 39 | 6 | -| `metaworld/button-press-topdown-v2` | A task where the robot must press a button from a top-down perspective. | 150 | 4 | 39 | 6 | -| `metaworld/button-press-topdown-wall-v2` | A task involving the robot pressing a button with a wall from a top-down perspective. | 150 | 4 | 39 | 6 | -| `metaworld/button-press-v2` | A task where the robot must press a button. | 150 | 4 | 39 | 6 | -| `metaworld/button-press-wall-v2` | A task involving the robot pressing a button with a wall. | 150 | 4 | 39 | 6 | -| `metaworld/coffee-button-v2` | A task where the robot must press a button on a coffee machine. | 150 | 4 | 39 | 6 | -| `metaworld/coffee-pull-v2` | A task involving the robot pulling a lever on a coffee machine. | 150 | 4 | 39 | 6 | -| `metaworld/coffee-push-v2` | A task involving the robot pushing a component on a coffee machine. | 150 | 4 | 39 | 6 | -| `metaworld/dial-turn-v2` | A task where the robot must turn a dial. | 150 | 4 | 39 | 6 | -| `metaworld/disassemble-v2` | A task requiring the robot to disassemble an object. | 150 | 4 | 39 | 6 | -| `metaworld/door-close-v2` | A task where the robot must close a door. | 150 | 4 | 39 | 6 | -| `metaworld/door-lock-v2` | A task involving the robot locking a door. | 150 | 4 | 39 | 6 | -| `metaworld/door-open-v2` | A task where the robot must open a door. | 150 | 4 | 39 | 6 | -| `metaworld/door-unlock-v2` | A task involving the robot unlocking a door. | 150 | 4 | 39 | 6 | -| `metaworld/hand-insert-v2` | A task requiring the robot to insert a hand into an object. | 150 | 4 | 39 | 6 | -| `metaworld/drawer-close-v2` | A task where the robot must close a drawer. | 150 | 4 | 39 | 6 | -| `metaworld/drawer-open-v2` | A task involving the robot opening a drawer. | 150 | 4 | 39 | 6 | -| `metaworld/faucet-open-v2` | A task requiring the robot to open a faucet. | 150 | 4 | 39 | 6 | -| `metaworld/faucet-close-v2` | A task where the robot must close a faucet. | 150 | 4 | 39 | 6 | -| `metaworld/hammer-v2` | A task where the robot must use a hammer. | 150 | 4 | 39 | 6 | -| `metaworld/handle-press-side-v2` | A task involving the robot pressing a handle from the side. | 150 | 4 | 39 | 6 | -| `metaworld/handle-press-v2` | A task where the robot must press a handle. | 150 | 4 | 39 | 6 | -| `metaworld/handle-pull-side-v2` | A task requiring the robot to pull a handle from the side. | 150 | 4 | 39 | 6 | -| `metaworld/handle-pull-v2` | A task where the robot must pull a handle. | 150 | 4 | 39 | 6 | -| `metaworld/lever-pull-v2` | A task involving the robot pulling a lever. | 150 | 4 | 39 | 6 | -| `metaworld/peg-insert-side-v2` | A task requiring the robot to insert a peg from the side. | 150 | 4 | 39 | 6 | -| `metaworld/pick-place-wall-v2` | A task involving the robot picking and placing an object with a wall. | 150 | 4 | 39 | 6 | -| `metaworld/pick-out-of-hole-v2` | A task where the robot must pick an object out of a hole. | 150 | 4 | 39 | 6 | -| `metaworld/reach-v2` | A task where the robot must reach an object. | 150 | 4 | 39 | 6 | -| `metaworld/push-back-v2` | A task involving the robot pushing an object backward. | 150 | 4 | 39 | 6 | -| `metaworld/push-v2` | A task where the robot must push an object. | 150 | 4 | 39 | 6 | -| `metaworld/pick-place-v2` | A task involving the robot picking up and placing an object. | 150 | 4 | 39 | 6 | -| `metaworld/plate-slide-v2` | A task requiring the robot to slide a plate. | 150 | 4 | 39 | 6 | -| `metaworld/plate-slide-side-v2` | A task involving the robot sliding a plate from the side. | 150 | 4 | 39 | 6 | -| `metaworld/plate-slide-back-v2` | A task where the robot must slide a plate backward. | 150 | 4 | 39 | 6 | -| `metaworld/plate-slide-back-side-v2` | A task involving the robot sliding a plate backward from the side. | 150 | 4 | 39 | 6 | -| `metaworld/peg-unplug-side-v2` | A task where the robot must unplug a peg from the side. | 150 | 4 | 39 | 6 | -| `metaworld/soccer-v2` | A task where the robot must play soccer. | 150 | 4 | 39 | 6 | -| `metaworld/stick-push-v2` | A task involving the robot pushing a stick. | 150 | 4 | 39 | 6 | -| `metaworld/stick-pull-v2` | A task where the robot must pull a stick. | 150 | 4 | 39 | 6 | -| `metaworld/push-wall-v2` | A task involving the robot pushing against a wall. | 150 | 4 | 39 | 6 | -| `metaworld/reach-wall-v2` | A task where the robot must reach an object with a wall. | 150 | 4 | 39 | 6 | -| `metaworld/shelf-place-v2` | A task involving the robot placing an object on a shelf. | 150 | 4 | 39 | 6 | -| `metaworld/sweep-into-v2` | A task where the robot must sweep objects into a container. | 150 | 4 | 39 | 6 | -| `metaworld/sweep-v2` | A task requiring the robot to sweep. | 150 | 4 | 39 | 6 | -| `metaworld/window-open-v2` | A task where the robot must open a window. | 150 | 4 | 39 | 6 | -| `metaworld/window-close-v2` | A task involving the robot closing a window. | 150 | 4 | 39 | 6 | +| `metaworld/assembly-v2` | A task where the robot must assemble components. | 500 | 4 | 39 | 6 | +| `metaworld/basketball-v2` | A task where the robot must play a game of basketball. | 500 | 4 | 39 | 6 | +| `metaworld/bin-picking-v2` | A task involving the robot picking objects from a bin. | 500 | 4 | 39 | 6 | +| `metaworld/box-close-v2` | A task requiring the robot to close a box. | 500 | 4 | 39 | 6 | +| `metaworld/button-press-topdown-v2` | A task where the robot must press a button from a top-down perspective. | 500 | 4 | 39 | 6 | +| `metaworld/button-press-topdown-wall-v2` | A task involving the robot pressing a button with a wall from a top-down perspective. | 500 | 4 | 39 | 6 | +| `metaworld/button-press-v2` | A task where the robot must press a button. | 500 | 4 | 39 | 6 | +| `metaworld/button-press-wall-v2` | A task involving the robot pressing a button with a wall. | 500 | 4 | 39 | 6 | +| `metaworld/coffee-button-v2` | A task where the robot must press a button on a coffee machine. | 500 | 4 | 39 | 6 | +| `metaworld/coffee-pull-v2` | A task involving the robot pulling a lever on a coffee machine. | 500 | 4 | 39 | 6 | +| `metaworld/coffee-push-v2` | A task involving the robot pushing a component on a coffee machine. | 500 | 4 | 39 | 6 | +| `metaworld/dial-turn-v2` | A task where the robot must turn a dial. | 500 | 4 | 39 | 6 | +| `metaworld/disassemble-v2` | A task requiring the robot to disassemble an object. | 500 | 4 | 39 | 6 | +| `metaworld/door-close-v2` | A task where the robot must close a door. | 500 | 4 | 39 | 6 | +| `metaworld/door-lock-v2` | A task involving the robot locking a door. | 500 | 4 | 39 | 6 | +| `metaworld/door-open-v2` | A task where the robot must open a door. | 500 | 4 | 39 | 6 | +| `metaworld/door-unlock-v2` | A task involving the robot unlocking a door. | 500 | 4 | 39 | 6 | +| `metaworld/hand-insert-v2` | A task requiring the robot to insert a hand into an object. | 500 | 4 | 39 | 6 | +| `metaworld/drawer-close-v2` | A task where the robot must close a drawer. | 500 | 4 | 39 | 6 | +| `metaworld/drawer-open-v2` | A task involving the robot opening a drawer. | 500 | 4 | 39 | 6 | +| `metaworld/faucet-open-v2` | A task requiring the robot to open a faucet. | 500 | 4 | 39 | 6 | +| `metaworld/faucet-close-v2` | A task where the robot must close a faucet. | 500 | 4 | 39 | 6 | +| `metaworld/hammer-v2` | A task where the robot must use a hammer. | 500 | 4 | 39 | 6 | +| `metaworld/handle-press-side-v2` | A task involving the robot pressing a handle from the side. | 500 | 4 | 39 | 6 | +| `metaworld/handle-press-v2` | A task where the robot must press a handle. | 500 | 4 | 39 | 6 | +| `metaworld/handle-pull-side-v2` | A task requiring the robot to pull a handle from the side. | 500 | 4 | 39 | 6 | +| `metaworld/handle-pull-v2` | A task where the robot must pull a handle. | 500 | 4 | 39 | 6 | +| `metaworld/lever-pull-v2` | A task involving the robot pulling a lever. | 500 | 4 | 39 | 6 | +| `metaworld/peg-insert-side-v2` | A task requiring the robot to insert a peg from the side. | 500 | 4 | 39 | 6 | +| `metaworld/pick-place-wall-v2` | A task involving the robot picking and placing an object with a wall. | 500 | 4 | 39 | 6 | +| `metaworld/pick-out-of-hole-v2` | A task where the robot must pick an object out of a hole. | 500 | 4 | 39 | 6 | +| `metaworld/reach-v2` | A task where the robot must reach an object. | 500 | 4 | 39 | 6 | +| `metaworld/push-back-v2` | A task involving the robot pushing an object backward. | 500 | 4 | 39 | 6 | +| `metaworld/push-v2` | A task where the robot must push an object. | 500 | 4 | 39 | 6 | +| `metaworld/pick-place-v2` | A task involving the robot picking up and placing an object. | 500 | 4 | 39 | 6 | +| `metaworld/plate-slide-v2` | A task requiring the robot to slide a plate. | 500 | 4 | 39 | 6 | +| `metaworld/plate-slide-side-v2` | A task involving the robot sliding a plate from the side. | 500 | 4 | 39 | 6 | +| `metaworld/plate-slide-back-v2` | A task where the robot must slide a plate backward. | 500 | 4 | 39 | 6 | +| `metaworld/plate-slide-back-side-v2` | A task involving the robot sliding a plate backward from the side. | 500 | 4 | 39 | 6 | +| `metaworld/peg-unplug-side-v2` | A task where the robot must unplug a peg from the side. | 500 | 4 | 39 | 6 | +| `metaworld/soccer-v2` | A task where the robot must play soccer. | 500 | 4 | 39 | 6 | +| `metaworld/stick-push-v2` | A task involving the robot pushing a stick. | 500 | 4 | 39 | 6 | +| `metaworld/stick-pull-v2` | A task where the robot must pull a stick. | 500 | 4 | 39 | 6 | +| `metaworld/push-wall-v2` | A task involving the robot pushing against a wall. | 500 | 4 | 39 | 6 | +| `metaworld/reach-wall-v2` | A task where the robot must reach an object with a wall. | 500 | 4 | 39 | 6 | +| `metaworld/shelf-place-v2` | A task involving the robot placing an object on a shelf. | 500 | 4 | 39 | 6 | +| `metaworld/sweep-into-v2` | A task where the robot must sweep objects into a container. | 500 | 4 | 39 | 6 | +| `metaworld/sweep-v2` | A task requiring the robot to sweep. | 500 | 4 | 39 | 6 | +| `metaworld/window-open-v2` | A task where the robot must open a window. | 500 | 4 | 39 | 6 | +| `metaworld/window-close-v2` | A task involving the robot closing a window. | 500 | 4 | 39 | 6 | ## MP-Based Envs From 600575cbacda12bdc2c8fbafa0abc7dbfe83479f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 13:08:06 +0200 Subject: [PATCH 193/198] Dont run determinism tests on arbitrary external envs added by other libs (like atari envs added by shimmy), only those included in gym and those added by us. --- test/{test_gym_envs.py => test_all_gym_builtin_envs.py} | 1 + 1 file changed, 1 insertion(+) rename test/{test_gym_envs.py => test_all_gym_builtin_envs.py} (96%) diff --git a/test/test_gym_envs.py b/test/test_all_gym_builtin_envs.py similarity index 96% rename from test/test_gym_envs.py rename to test/test_all_gym_builtin_envs.py index 5e50d53..f2eeac6 100644 --- a/test/test_gym_envs.py +++ b/test/test_all_gym_builtin_envs.py @@ -12,6 +12,7 @@ GYM_IDS = [spec.id for spec in gym.envs.registry.values() if not isinstance(spec.entry_point, Callable) and "fancy_gym" not in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point and 'jax' not in spec.id.lower() + and 'jax' not in spec.id.lower() and not re.match(r'GymV2.Environment', spec.id) ] GYM_MP_IDS = fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] From fc7382443df1d7c5a5e72b1a0effebb8203fab08 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 13:23:33 +0200 Subject: [PATCH 194/198] README: Minor cosmetic changes --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index dc64694..02346d5 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,8 @@

Fancy Gym -
-
+

| :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | From 4f534779ff04bd8a11e3a46a41c3388d85ecda96 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 13:25:14 +0200 Subject: [PATCH 195/198] READE: Minor cosmetic changes pt.2 --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 02346d5..016bafa 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,12 @@

Fancy Gym -

+ +Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. | :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. - **Key Features**: - **New Challenging Environments**: `fancy_gym` includes several new environments (Panda Box Pushing, Table Tennis, etc.) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. From 614d2a42db044e1d883d83bd46280198b4bea10a Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 13:28:03 +0200 Subject: [PATCH 196/198] READE: Minor cosmetic changes pt.3 --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 016bafa..dfd5e20 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,14 @@

Fancy Gym +

-Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. - | :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments. + **Key Features**: - **New Challenging Environments**: `fancy_gym` includes several new environments (Panda Box Pushing, Table Tennis, etc.) that present a higher degree of difficulty, pushing the boundaries of reinforcement learning research. From f8dcf5b2df86952ca1549ebd258855c22cea28d1 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 13:29:08 +0200 Subject: [PATCH 197/198] Fix: Did not define MP_Wrapper for new RandomInit version of BoxPushing --- fancy_gym/envs/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index d416dbb..a40c81f 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -215,6 +215,7 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: register( id='fancy/BoxPushingRandomInit{}-v0'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), + mp_wrapper=mujoco.box_pushing.MPWrapper, max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, kwargs={"random_init": True} ) From a04a4b6e6770806979186bc58447823193fc2406 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 11 Oct 2023 13:36:46 +0200 Subject: [PATCH 198/198] Link legacy branch in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dfd5e20..c457932 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@

-| :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the legacy branch. Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | +| :exclamation: Fancy Gym has recently received a major refactor, which also updated many of the used dependencies to current versions. The update has brought some breaking changes. If you want to access the old version, check out the [legacy branch](https://github.com/ALRhub/fancy_gym/tree/legacy). Find out more about what changed [here](https://github.com/ALRhub/fancy_gym/pull/75). | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | Built upon the foundation of [Gymnasium](https://gymnasium.farama.org/) (a maintained fork of OpenAI’s renowned Gym library) `fancy_gym` offers a comprehensive collection of reinforcement learning environments.