From deaca46d87ff305a6fc157efc7f4cb18c213f928 Mon Sep 17 00:00:00 2001 From: Kayen Date: Sat, 25 Nov 2023 23:34:24 +0100 Subject: [PATCH 01/13] added rgb_array render mode for off-screen rendering --- .../air_hockey/air_hockey_env_wrapper.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py index 0e8e31e..5f7bb38 100644 --- a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py +++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py @@ -8,7 +8,7 @@ from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world from mushroom_rl.core import Environment class AirHockeyEnv(Environment): - metadata = {"render_modes": ["human"], "render_fps": 50} + metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50} def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs): """ @@ -39,6 +39,18 @@ class AirHockeyEnv(Environment): if env_mode == "tournament" and type(interpolation_order) != tuple: interpolation_order = (interpolation_order, interpolation_order) + self.render_mode = render_mode + + # Determine headless mode based on render_mode + headless = self.render_mode == 'rgb_array' + width = kwargs.pop('width', 1920) + height = kwargs.pop('height', 1080) + + # Prepare viewer_params + viewer_params = kwargs.get('viewer_params', {}) + viewer_params.update({'headless': headless, 'width': width, 'height': height}) + kwargs['viewer_params'] = viewer_params + self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs) self.env_name = env_mode self.env_info = self.base_env.env_info @@ -81,9 +93,6 @@ class AirHockeyEnv(Environment): self.env_info['constraints'] = constraint_list self.env_info['env_name'] = self.env_name - self.render_mode = render_mode - self.render_human_active = False - super().__init__(self.base_env.info) def step(self, action): @@ -111,15 +120,17 @@ class AirHockeyEnv(Environment): if self.env_info['env_name'] == "tournament": obs = np.array(np.split(obs, 2)) - - if self.render_human_active: - self.base_env.render() return obs, reward, done, False, info def render(self): - self.render_human_active = True - + if self.render_mode == 'rgb_array': + return self.base_env.render(record = True) + elif self.render_mode == 'human': + self.base_env.render() + else: + raise ValueError(f"Unsupported render mode: '{self.render_mode}'") + def reset(self, seed=None, options={}): self.base_env.seed(seed) obs = self.base_env.reset() From 14b7d1d6c344aaa97c9e3bae0e44612c25a9f552 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 26 Nov 2023 17:20:33 +0100 Subject: [PATCH 02/13] Fix small bug in test_black_box.py:test_verbosity --- test/test_black_box.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_black_box.py b/test/test_black_box.py index c1a760a..fc4f0bf 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -103,7 +103,7 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] env_step = make(env_id) env_step.reset() - _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample()) + _obs, _reward, _terminated, _truncated, info = env_step.step(env_step.action_space.sample()) info_keys_step = info.keys() assert all(e in info_keys for e in info_keys_step) From e5bb6f1a88d38e2106bf2889883baeaf324ce0ff Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 26 Nov 2023 17:51:09 +0100 Subject: [PATCH 03/13] Restructured examples and fixed bugs --- fancy_gym/examples/example_replanning.py | 1 + fancy_gym/examples/example_replanning_envs.py | 19 ++++++----- fancy_gym/examples/examples_dmc.py | 32 ++++++++++++------- fancy_gym/examples/examples_general.py | 8 ++--- fancy_gym/examples/examples_metaworld.py | 27 +++++++--------- .../examples/examples_movement_primitives.py | 16 ++++------ fancy_gym/examples/examples_open_ai.py | 4 ++- 7 files changed, 57 insertions(+), 50 deletions(-) diff --git a/fancy_gym/examples/example_replanning.py b/fancy_gym/examples/example_replanning.py index e69de29..f87f5c1 100644 --- a/fancy_gym/examples/example_replanning.py +++ b/fancy_gym/examples/example_replanning.py @@ -0,0 +1 @@ +# TODO \ No newline at end of file diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index 2c3c3f4..b06c970 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -6,21 +6,21 @@ def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", env = gym.make(env_name) env.reset(seed=seed) for i in range(iterations): - done = False - while done is False: + while True: ac = env.action_space.sample() obs, reward, terminated, truncated, info = env.step(ac) if render: env.render(mode="human") if terminated or truncated: env.reset() + break env.close() del env def example_custom_replanning_envs(seed=0, iteration=100, render=True): # id for a step-based environment - base_env_id = "BoxPushingDense-v0" + base_env_id = "fancy/BoxPushingDense-v0" wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] @@ -38,7 +38,8 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, 'condition_on_desired': True} - env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, + base_env = gym.make(base_env_id) + env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, seed=seed) @@ -56,10 +57,12 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): env.close() del env - -if __name__ == "__main__": +def main(render=False): # run a registered replanning environment - example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False) + example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=render) # run a custom replanning environment - example_custom_replanning_envs(seed=0, iteration=8, render=True) + example_custom_replanning_envs(seed=0, iteration=8, render=render) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/fancy_gym/examples/examples_dmc.py b/fancy_gym/examples/examples_dmc.py index fbb1473..2703e01 100644 --- a/fancy_gym/examples/examples_dmc.py +++ b/fancy_gym/examples/examples_dmc.py @@ -84,7 +84,8 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): # basis_generator_kwargs = {'basis_generator_type': 'rbf', # 'num_basis': 5 # } - env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, + base_env = gym.make(base_env_id) + env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, seed=seed) @@ -114,21 +115,13 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): env.close() del env - -if __name__ == '__main__': - # Disclaimer: DMC environments require the seed to be specified in the beginning. - # Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics. - - # For rendering DMC - # export MUJOCO_GL="osmesa" - render = True - +def main(render = True): # # Standard DMC Suite tasks example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) # # # Manipulation tasks # # Disclaimer: The vision versions are currently not integrated and yield an error - example_dmc("dm_control/manipulation-reach_site_features", seed=10, iterations=250, render=render) + example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render) # # # Gym + DMC hybrid task provided in the MP framework example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) @@ -136,3 +129,20 @@ if __name__ == '__main__': # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is # already registered above example_custom_dmc_and_mp(seed=11, iterations=1, render=render) + + # # Standard DMC Suite tasks + example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) + # + # # Manipulation tasks + # # Disclaimer: The vision versions are currently not integrated and yield an error + example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render) + # + # # Gym + DMC hybrid task provided in the MP framework + example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) + + # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is + # already registered above + example_custom_dmc_and_mp(seed=11, iterations=1, render=render) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/fancy_gym/examples/examples_general.py b/fancy_gym/examples/examples_general.py index e341bfe..9def5b6 100644 --- a/fancy_gym/examples/examples_general.py +++ b/fancy_gym/examples/examples_general.py @@ -85,10 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16), # do not return values above threshold return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), - -if __name__ == '__main__': - render = True - +def main(render = True): # Basic gym task example_general("Pendulum-v1", seed=10, iterations=200, render=render) @@ -100,3 +97,6 @@ if __name__ == '__main__': # Vectorized multiprocessing environments # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/fancy_gym/examples/examples_metaworld.py b/fancy_gym/examples/examples_metaworld.py index 7919b71..bd87c2b 100644 --- a/fancy_gym/examples/examples_metaworld.py +++ b/fancy_gym/examples/examples_metaworld.py @@ -35,7 +35,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True): if terminated or truncated: print(env_id, rewards) rewards = 0 - obs = env.reset() + obs = env.reset(seed=seed+i+1) env.close() del env @@ -81,7 +81,8 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): basis_generator_kwargs = {'basis_generator_type': 'rbf', 'num_basis': 5 } - env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, + base_env = gym.make(base_env_id) + env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, seed=seed) @@ -92,14 +93,10 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): # It is also possible to change them mode multiple times when # e.g. only every nth trajectory should be displayed. if render: - raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. " - "A temporary workaround is to alter their code in MujocoEnv render() from " - "`if not offscreen` to `if not offscreen or offscreen == 'human'`.") - # TODO: Remove this, when Metaworld fixes its interface. - # env.render(mode="human") + env.render(mode="human") rewards = 0 - obs = env.reset() + obs = env.reset(seed=seed) # number of samples/full trajectories (multiple environment steps) for i in range(iterations): @@ -110,25 +107,23 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): if terminated or truncated: print(base_env_id, rewards) rewards = 0 - obs = env.reset() + obs = env.reset(seed=seed+i+1) env.close() del env - -if __name__ == '__main__': - # Disclaimer: MetaWorld environments require the seed to be specified in the beginning. - # Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior. - +def main(render = False): # For rendering it might be necessary to specify your OpenGL installation # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so - render = False # # Standard Meta world tasks example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render) # # MP + MetaWorld hybrid task provided in the our framework - example_meta("metaworld_ProMP/ButtonPress-v2", seed=10, iterations=1, render=render) + example_meta("metaworld_ProMP/button-press-v2", seed=10, iterations=1, render=render) # # # Custom MetaWorld task example_custom_meta_and_mp(seed=10, iterations=1, render=render) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 317a103..fc43c63 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -28,13 +28,7 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend if render and i % 1 == 0: # This renders the full MP trajectory # It is only required to call render() once in the beginning, which renders every consecutive trajectory. - # Resetting to no rendering, can be achieved by render(mode=None). - # It is also possible to change the mode multiple times when - # e.g. only every second trajectory should be displayed, such as here - # Just make sure the correct mode is set before executing the step. env.render(mode="human") - else: - env.render() # Now the action space is not the raw action but the parametrization of the trajectory generator, # such as a ProMP @@ -129,7 +123,8 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # basis_generator_kwargs = {'basis_generator_type': 'rbf', # 'num_basis': 5 # } - env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, + base_env = gym.make(base_env_id) + env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, seed=seed) @@ -151,9 +146,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): rewards = 0 obs = env.reset() - -if __name__ == '__main__': - render = False +def main(render = False): # DMP example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) @@ -172,3 +165,6 @@ if __name__ == '__main__': # Custom MP example_fully_custom_mp(seed=10, iterations=1, render=render) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/fancy_gym/examples/examples_open_ai.py b/fancy_gym/examples/examples_open_ai.py index 07f1719..f1688ef 100644 --- a/fancy_gym/examples/examples_open_ai.py +++ b/fancy_gym/examples/examples_open_ai.py @@ -31,6 +31,8 @@ def example_mp(env_name, seed=1, render=True): print(returns) obs = env.reset() +def main(render=True): + example_mp("gym_ProMP/Reacher-v2", render=render) if __name__ == '__main__': - example_mp("gym_ProMP/Reacher-v2") + main() \ No newline at end of file From d367ac0c4b2b79c764ff36404ecebec41f953a11 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 26 Nov 2023 17:51:26 +0100 Subject: [PATCH 04/13] Added new test to run all examples --- test/test_examples.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 test/test_examples.py diff --git a/test/test_examples.py b/test/test_examples.py new file mode 100644 index 0000000..1c1963e --- /dev/null +++ b/test/test_examples.py @@ -0,0 +1,13 @@ +import pytest + +from fancy_gym.examples.example_replanning_envs import main as replanning_envs_main +from fancy_gym.examples.examples_dmc import main as dmc_main +from fancy_gym.examples.examples_general import main as general_main +from fancy_gym.examples.examples_metaworld import main as metaworld_main +from fancy_gym.examples.examples_movement_primitives import main as mp_main +from fancy_gym.examples.examples_open_ai import main as open_ai_main + +@pytest.mark.parametrize('entry', [replanning_envs_main, dmc_main, general_main, metaworld_main, mp_main, open_ai_main]) +@pytest.mark.parametrize('render', [False]) +def test_run_example(entry, render): + entry(render=render) From db842db2b75a8e2b351a0c809ed301ea90a1d015 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 26 Nov 2023 17:59:08 +0100 Subject: [PATCH 05/13] Exclude non gym native envs from test for gym builtin envs. --- test/test_all_gym_builtin_envs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_all_gym_builtin_envs.py b/test/test_all_gym_builtin_envs.py index f2eeac6..9264b98 100644 --- a/test/test_all_gym_builtin_envs.py +++ b/test/test_all_gym_builtin_envs.py @@ -12,7 +12,9 @@ GYM_IDS = [spec.id for spec in gym.envs.registry.values() if not isinstance(spec.entry_point, Callable) and "fancy_gym" not in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point and 'jax' not in spec.id.lower() - and 'jax' not in spec.id.lower() + and 'shimmy' not in spec.id.lower() + and 'ale_py' not in spec.id.lower() + and 'tabular' not in spec.id.lower() and not re.match(r'GymV2.Environment', spec.id) ] GYM_MP_IDS = fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['all'] From 374d72dddeceef19a0db67df972cfe72b099b548 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 26 Nov 2023 18:52:08 +0100 Subject: [PATCH 06/13] Fix wrong signature for main in examples/examples_movement_primitives.py --- fancy_gym/examples/examples_movement_primitives.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index c84cb22..3581aa1 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -250,8 +250,7 @@ def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): pass -def main(): - render = False +def main(render=False): # DMP example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) From 0b4e729a49691230c443531ffa767aaebc373733 Mon Sep 17 00:00:00 2001 From: Kayen Date: Sun, 26 Nov 2023 21:49:52 +0100 Subject: [PATCH 07/13] incorporated human render_mode gym spec + optimized keyword arguments --- .../envs/mujoco/air_hockey/air_hockey_env_wrapper.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py index 5f7bb38..b5ef8c3 100644 --- a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py +++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py @@ -10,7 +10,7 @@ from mushroom_rl.core import Environment class AirHockeyEnv(Environment): metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50} - def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs): + def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, width=1920, height=1080, **kwargs): """ Environment Constructor @@ -40,11 +40,10 @@ class AirHockeyEnv(Environment): interpolation_order = (interpolation_order, interpolation_order) self.render_mode = render_mode + self.render_human_active = False # Determine headless mode based on render_mode headless = self.render_mode == 'rgb_array' - width = kwargs.pop('width', 1920) - height = kwargs.pop('height', 1080) # Prepare viewer_params viewer_params = kwargs.get('viewer_params', {}) @@ -121,13 +120,16 @@ class AirHockeyEnv(Environment): if self.env_info['env_name'] == "tournament": obs = np.array(np.split(obs, 2)) + if self.render_human_active: + self.base_env.render() + return obs, reward, done, False, info def render(self): if self.render_mode == 'rgb_array': return self.base_env.render(record = True) elif self.render_mode == 'human': - self.base_env.render() + self.render_human_active = True else: raise ValueError(f"Unsupported render mode: '{self.render_mode}'") From 6afb5880db86dc811fe60e7e5f4f2403dbb54c77 Mon Sep 17 00:00:00 2001 From: Mustafa Enes Batur Date: Sun, 26 Nov 2023 22:35:39 +0100 Subject: [PATCH 08/13] Implement AirHocKIT 2023 environments --- fancy_gym/envs/__init__.py | 2 +- .../air_hockey/air_hockey_env_wrapper.py | 62 ++++++----- .../air_hockey/position_control_wrapper.py | 4 + .../mujoco/air_hockey/seven_dof/__init__.py | 4 +- .../seven_dof/airhockit_base_env.py | 105 ++++++++++++++++++ .../mujoco/air_hockey/seven_dof/defend.py | 97 +++++++++++++++- .../envs/mujoco/air_hockey/seven_dof/hit.py | 91 ++++++++++++++- 7 files changed, 329 insertions(+), 36 deletions(-) create mode 100644 fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 8c0fa23..42d718a 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -291,7 +291,7 @@ register( ) # Air Hockey environments -for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]: +for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]: register( id=f'fancy/AirHockey-{env_mode}-v0', entry_point='fancy_gym.envs.mujoco:AirHockeyEnv', diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py index 0e8e31e..e943c53 100644 --- a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py +++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py @@ -30,7 +30,10 @@ class AirHockeyEnv(Environment): "7dof-defend": position.IiwaPositionDefend, "3dof-hit": position.PlanarPositionHit, - "3dof-defend": position.PlanarPositionDefend + "3dof-defend": position.PlanarPositionDefend, + + "7dof-hit-airhockit2023": position.IiwaPositionHitAirhocKIT2023, + "7dof-defend-airhockit2023": position.IiwaPositionDefendAirhocKIT2023, } if env_mode not in env_dict: @@ -42,34 +45,39 @@ class AirHockeyEnv(Environment): self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs) self.env_name = env_mode self.env_info = self.base_env.env_info - single_robot_obs_size = len(self.base_env.info.observation_space.low) - if env_mode == "tournament": - self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64) - else: - self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64) - robot_info = self.env_info["robot"] - if env_mode != "tournament": - if interpolation_order in [1, 2]: - self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) - if interpolation_order in [3, 4, -1]: - self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), - high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) - if interpolation_order in [5]: - self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), - high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) + if hasattr(self.base_env, "wrapper_obs_space") and hasattr(self.base_env, "wrapper_act_space"): + self.observation_space = self.base_env.wrapper_obs_space + self.action_space = self.base_env.wrapper_act_space else: - acts = [None, None] - for i in range(2): - if interpolation_order[i] in [1, 2]: - acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) - if interpolation_order[i] in [3, 4, -1]: - acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), - high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) - if interpolation_order[i] in [5]: - acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), - high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) - self.action_space = spaces.Tuple((acts[0], acts[1])) + single_robot_obs_size = len(self.base_env.info.observation_space.low) + if env_mode == "tournament": + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64) + else: + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64) + robot_info = self.env_info["robot"] + + if env_mode != "tournament": + if interpolation_order in [1, 2]: + self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) + if interpolation_order in [3, 4, -1]: + self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) + if interpolation_order in [5]: + self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) + else: + acts = [None, None] + for i in range(2): + if interpolation_order[i] in [1, 2]: + acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) + if interpolation_order[i] in [3, 4, -1]: + acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) + if interpolation_order[i] in [5]: + acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) + self.action_space = spaces.Tuple((acts[0], acts[1])) constraint_list = constraints.ConstraintList() constraint_list.add(constraints.JointPositionConstraint(self.env_info)) diff --git a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py index 91e38e1..4e069b6 100644 --- a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py +++ b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py @@ -261,10 +261,14 @@ class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend): class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit): pass +class IiwaPositionHitAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyHitAirhocKIT2023): + pass class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend): pass +class IiwaPositionDefendAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyDefendAirhocKIT2023): + pass class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament): pass diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py index ef48d24..1bcca25 100644 --- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py @@ -1,4 +1,4 @@ from .env_base import AirHockeyBase from .tournament import AirHockeyTournament -from .hit import AirHockeyHit -from .defend import AirHockeyDefend \ No newline at end of file +from .hit import AirHockeyHit, AirHockeyHitAirhocKIT2023 +from .defend import AirHockeyDefend, AirHockeyDefendAirhocKIT2023 \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py new file mode 100644 index 0000000..1a1ac03 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py @@ -0,0 +1,105 @@ +import numpy as np +from gymnasium import spaces + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle +from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian + +class AirhocKIT2023BaseEnv(AirHockeySingle): + def __init__(self, **kwargs): + super().__init__(**kwargs) + obs_low = np.hstack([[-np.inf] * 37]) + obs_high = np.hstack([[np.inf] * 37]) + self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64) + self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6)) + + # We don't need puck yaw observations + def filter_obs(self, obs): + obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]]) + return obs + + def reset(self): + self.last_acceleration = np.repeat(0., 6) + obs = super().reset() + self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1] + self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1] + + self.last_planned_world_pos = self._fk(self.interp_pos) + obs = np.hstack([ + obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos + ]) + return self.filter_obs(obs) + + def step(self, action): + action /= 10 + + new_vel = self.interp_vel + action + + jerk = 2 * (new_vel - self.interp_vel - self.last_acceleration * 0.02) / (0.02 ** 2) + new_pos = self.interp_pos + self.interp_vel * 0.02 + (1/2) * self.last_acceleration * (0.02 ** 2) + (1/6) * jerk * (0.02 ** 3) + abs_action = np.vstack([np.hstack([new_pos, 0]), np.hstack([new_vel, 0])]) + + self.interp_pos = new_pos + self.interp_vel = new_vel + self.last_acceleration += jerk * 0.02 + + obs, rew, done, info = super().step(abs_action) + self.last_planned_world_pos = self._fk(self.interp_pos) + obs = np.hstack([ + obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos + ]) + + fatal_rew = self.check_fatal(obs) + if fatal_rew != 0: + return self.filter_obs(obs), fatal_rew, True, info + + return self.filter_obs(obs), rew, done, info + + def check_constraints(self, constraint_values): + fatal_rew = 0 + + j_pos_constr = constraint_values["joint_pos_constr"] + if j_pos_constr.max() > 0: + fatal_rew += j_pos_constr.max() + + j_vel_constr = constraint_values["joint_vel_constr"] + if j_vel_constr.max() > 0: + fatal_rew += j_vel_constr.max() + + ee_constr = constraint_values["ee_constr"] + if ee_constr.max() > 0: + fatal_rew += ee_constr.max() + + link_constr = constraint_values["link_constr"] + if link_constr.max() > 0: + fatal_rew += link_constr.max() + + return -fatal_rew + + def check_fatal(self, obs): + fatal_rew = 0 + + q = obs[self.env_info["joint_pos_ids"]] + qd = obs[self.env_info["joint_vel_ids"]] + constraint_values_obs = self.env_info["constraints"].fun(q, qd) + fatal_rew += self.check_constraints(constraint_values_obs) + + return -fatal_rew + + def _fk(self, pos): + res, _ = forward_kinematics(self.env_info["robot"]["robot_model"], + self.env_info["robot"]["robot_data"], pos) + return res.astype(np.float32) + + def _ik(self, world_pos, init_q=None): + success, pos = inverse_kinematics(self.env_info["robot"]["robot_model"], + self.env_info["robot"]["robot_data"], + world_pos, + initial_q=init_q) + pos = pos.astype(np.float32) + assert success + return pos + + def _jacobian(self, pos): + return jacobian(self.env_info["robot"]["robot_model"], + self.env_info["robot"]["robot_data"], + pos).astype(np.float32) diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py index 0ffdf66..d957311 100644 --- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py @@ -1,6 +1,7 @@ import numpy as np from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle +from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv class AirHockeyDefend(AirHockeySingle): @@ -10,9 +11,7 @@ class AirHockeyDefend(AirHockeySingle): """ def __init__(self, gamma=0.99, horizon=500, viewer_params={}): self.init_velocity_range = (1, 3) - self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame - self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) def setup(self, obs): @@ -32,7 +31,7 @@ class AirHockeyDefend(AirHockeySingle): self._write_data("puck_y_vel", puck_vel[1]) self._write_data("puck_yaw_vel", puck_vel[2]) - super(AirHockeyDefend, self).setup(obs) + super().setup(obs) def reward(self, state, action, next_state, absorbing): return 0 @@ -46,6 +45,98 @@ class AirHockeyDefend(AirHockeySingle): return True return super().is_absorbing(state) +class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv): + def __init__(self, gamma=0.99, horizon=200, viewer_params={}): + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + self.init_velocity_range = (1, 3) + self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]]) # Table Frame + self._setup_metrics() + + def setup(self, obs): + self._setup_metrics() + puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0] + + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-0.5, 0.5) + + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-10, 10) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super().setup(obs) + + def reset(self, *args): + obs = super().reset() + self.hit_step_flag = False + self.hit_step = False + self.received_hit_reward = False + self.give_reward_next = False + return obs + + def _setup_metrics(self): + self.episode_steps = 0 + self.has_hit = False + + def _simulation_post_step(self): + if not self.has_hit: + self.has_hit = self._check_collision("puck", "robot_1/ee") + + super()._simulation_post_step() + + def _step_finalize(self): + self.episode_steps += 1 + return super()._step_finalize() + + def reward(self, state, action, next_state, absorbing): + puck_pos, puck_vel = self.get_puck(next_state) + ee_pos, _ = self.get_ee() + rew = 0.01 + if -0.7 < puck_pos[0] <= -0.2 and np.linalg.norm(puck_vel[:2]) < 0.1: + assert absorbing + rew += 70 + + if self.has_hit and not self.hit_step_flag: + self.hit_step_flag = True + self.hit_step = True + else: + self.hit_step = False + + f = lambda puck_vel: 30 + 100 * (100 ** (-0.25 * np.linalg.norm(puck_vel[:2]))) + if not self.give_reward_next and not self.received_hit_reward and self.hit_step and ee_pos[0] < puck_pos[0]: + self.hit_this_step = True + if np.linalg.norm(puck_vel[:2]) < 0.1: + return rew + f(puck_vel) + self.give_reward_next = True + return rew + + if not self.received_hit_reward and self.give_reward_next: + self.received_hit_reward = True + if puck_vel[0] >= -0.2: + return rew + f(puck_vel) + return rew + else: + return rew + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # If puck is over the middle line and moving towards opponent + if puck_pos[0] > 0 and puck_vel[0] > 0: + return True + + if self.episode_steps == self._mdp_info.horizon: + return True + + if np.linalg.norm(puck_vel[:2]) < 0.1: + return True + return super().is_absorbing(obs) + if __name__ == '__main__': env = AirHockeyDefend() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py index 59386d5..41eb08f 100644 --- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py @@ -1,6 +1,7 @@ import numpy as np from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle +from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv class AirHockeyHit(AirHockeySingle): @@ -14,9 +15,6 @@ class AirHockeyHit(AirHockeySingle): opponent_agent(Agent, None): Agent which controls the opponent moving_init(bool, False): If true, initialize the puck with inital velocity. """ - self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]]) # Table Frame - self.init_velocity_range = (0, 0.5) # Table Frame - super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) self.moving_init = moving_init @@ -58,6 +56,93 @@ class AirHockeyHit(AirHockeySingle): return True return super(AirHockeyHit, self).is_absorbing(obs) +class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv): + def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}): + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + self.moving_init = moving_init + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + self.init_velocity_range = (0, 0.5) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + self._setup_metrics() + + def reset(self, *args): + obs = super().reset() + self.last_ee_pos = self.last_planned_world_pos.copy() + self.last_ee_pos[0] -= 1.51 + return obs + + def setup(self, obs): + self._setup_metrics() + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + + if self.moving_init: + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1) + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-2, 2) + + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super().setup(obs) + + def _setup_metrics(self): + self.episode_steps = 0 + self.has_scored = False + + def _step_finalize(self): + cur_obs = self._create_observation(self.obs_helper._build_obs(self._data)) + puck_pos, _ = self.get_puck(cur_obs) # world frame [x, y, z] and [x', y', z'] + + if not self.has_scored: + boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2 + self.has_scored = np.any(np.abs(puck_pos[:2]) > boundary) and puck_pos[0] > 0 + + self.episode_steps += 1 + return super()._step_finalize() + + def reward(self, state, action, next_state, absorbing): + rew = 0 + puck_pos, puck_vel = self.get_puck(next_state) + ee_pos, _ = self.get_ee() + ee_vel = (ee_pos - self.last_ee_pos) / 0.02 + self.last_ee_pos = ee_pos + + if puck_vel[0] < 0.25 and puck_pos[0] < 0: + ee_puck_dir = (puck_pos - ee_pos)[:2] + ee_puck_dir = ee_puck_dir / np.linalg.norm(ee_puck_dir) + rew += 1 * max(0, np.dot(ee_puck_dir, ee_vel[:2])) + else: + rew += 10 * np.linalg.norm(puck_vel[:2]) + + if self.has_scored: + rew += 2000 + 5000 * np.linalg.norm(puck_vel[:2]) + + return rew + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # Stop if the puck bounces back on the opponents wall + if puck_pos[0] > 0 and puck_vel[0] < 0: + return True + + if self.has_scored: + return True + + if self.episode_steps == self._mdp_info.horizon: + return True + + return super().is_absorbing(obs) + if __name__ == '__main__': env = AirHockeyHit(moving_init=True) From aeecf65f023e162ab9458d21968988e4017e998f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 27 Nov 2023 13:18:41 +0100 Subject: [PATCH 09/13] Ensure MujocoViewer opens on .render() for render_mode human --- fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py index b5ef8c3..31a1a8e 100644 --- a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py +++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py @@ -130,6 +130,7 @@ class AirHockeyEnv(Environment): return self.base_env.render(record = True) elif self.render_mode == 'human': self.render_human_active = True + self.base_env.render() else: raise ValueError(f"Unsupported render mode: '{self.render_mode}'") @@ -190,4 +191,4 @@ if __name__ == "__main__": J = 0. gamma = 1. steps = 0 - env.reset() \ No newline at end of file + env.reset() From fde5e33be67a3a224bfecc164be6e224ab26387c Mon Sep 17 00:00:00 2001 From: Mustafa Enes Batur Date: Sun, 3 Dec 2023 22:37:08 +0100 Subject: [PATCH 10/13] Add noise --- .../mujoco/air_hockey/seven_dof/airhockit_base_env.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py index 1a1ac03..8535088 100644 --- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py @@ -11,15 +11,24 @@ class AirhocKIT2023BaseEnv(AirHockeySingle): obs_high = np.hstack([[np.inf] * 37]) self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64) self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6)) + self.noise = False # We don't need puck yaw observations def filter_obs(self, obs): obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]]) return obs + + # These are roughly the noise levels for a noisy environment, turned-off by default, enable in the constructor + def add_noise(self, obs): + if not self.noise: + return + obs[self.env_info["puck_pos_ids"]] += np.random.normal(0, 0.001, 3) + obs[self.env_info["puck_vel_ids"]] += np.random.normal(0, 0.1, 3) def reset(self): self.last_acceleration = np.repeat(0., 6) obs = super().reset() + self.add_noise(obs) self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1] self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1] @@ -43,6 +52,7 @@ class AirhocKIT2023BaseEnv(AirHockeySingle): self.last_acceleration += jerk * 0.02 obs, rew, done, info = super().step(abs_action) + self.add_noise(obs) self.last_planned_world_pos = self._fk(self.interp_pos) obs = np.hstack([ obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos From 86abbe0d979a9b52b166358f1432d0908171fbf8 Mon Sep 17 00:00:00 2001 From: Mustafa Enes Batur Date: Wed, 6 Dec 2023 19:47:00 +0100 Subject: [PATCH 11/13] Pass noise as parameter --- .../envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py | 5 ++--- fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py | 4 ++-- fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py index 8535088..18f7f41 100644 --- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py @@ -5,20 +5,19 @@ from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingl from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian class AirhocKIT2023BaseEnv(AirHockeySingle): - def __init__(self, **kwargs): + def __init__(self, noise=False, **kwargs): super().__init__(**kwargs) obs_low = np.hstack([[-np.inf] * 37]) obs_high = np.hstack([[np.inf] * 37]) self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64) self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6)) - self.noise = False + self.noise = noise # We don't need puck yaw observations def filter_obs(self, obs): obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]]) return obs - # These are roughly the noise levels for a noisy environment, turned-off by default, enable in the constructor def add_noise(self, obs): if not self.noise: return diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py index d957311..4df9f60 100644 --- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py @@ -46,8 +46,8 @@ class AirHockeyDefend(AirHockeySingle): return super().is_absorbing(state) class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv): - def __init__(self, gamma=0.99, horizon=200, viewer_params={}): - super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + def __init__(self, gamma=0.99, horizon=200, viewer_params={}, **kwargs): + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs) self.init_velocity_range = (1, 3) self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]]) # Table Frame self._setup_metrics() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py index 41eb08f..87fc826 100644 --- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py @@ -57,8 +57,8 @@ class AirHockeyHit(AirHockeySingle): return super(AirHockeyHit, self).is_absorbing(obs) class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv): - def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}): - super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}, **kwargs): + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs) self.moving_init = moving_init hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ From 5201937ac85ae215d9fdca92055596f29d31f3ba Mon Sep 17 00:00:00 2001 From: JuliusJacobitz Date: Thu, 7 Dec 2023 15:53:55 +0100 Subject: [PATCH 12/13] add render_mode to examples in README to make them work again --- README.md | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1c76595..494185c 100644 --- a/README.md +++ b/README.md @@ -105,10 +105,10 @@ Regular step based environments added by Fancy Gym are added into the `fancy/` n import gymnasium as gym import fancy_gym -env = gym.make('fancy/Reacher5d-v0') -# or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS -# or env = gym.make('dm_control/ball_in_cup-catch-v0') -# or env = gym.make('Reacher-v2') +env = gym.make('fancy/Reacher5d-v0', render_mode='human') +# or env = gym.make('metaworld/reach-v2', render_mode='human') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS +# or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human') +# or env = gym.make('Reacher-v2', render_mode='human') observation = env.reset(seed=1) for i in range(1000): @@ -149,14 +149,10 @@ Just keep in mind, calling `step()` executes a full trajectory. import gymnasium as gym import fancy_gym -env = gym.make('fancy_ProMP/Reacher5d-v0') -# or env = gym.make('metaworld_ProDMP/reach-v2') -# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0') -# or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_ NS - -# render() can be called once in the beginning with all necessary arguments. -# To turn it of again just call render() without any arguments. -env.render(mode='human') +env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human") +# or env = gym.make('metaworld_ProDMP/reach-v2', render_mode="human") +# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0', render_mode="human") +# or env = gym.make('gym_ProMP/Reacher-v2', render_mode="human") # mp versions of envs added directly by gymnasium are in the gym_ NS # This returns the context information, not the full state observation observation, info = env.reset(seed=1) From 18c17ff130feede594ab05acd10a0c2786940458 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 13 Dec 2023 16:08:49 +0100 Subject: [PATCH 13/13] Add calls to env.render() to README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 494185c..085f501 100644 --- a/README.md +++ b/README.md @@ -110,12 +110,11 @@ env = gym.make('fancy/Reacher5d-v0', render_mode='human') # or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human') # or env = gym.make('Reacher-v2', render_mode='human') observation = env.reset(seed=1) +env.render() for i in range(1000): action = env.action_space.sample() observation, reward, terminated, truncated, info = env.step(action) - if i % 5 == 0: - env.render() if terminated or truncated: observation, info = env.reset() @@ -156,6 +155,7 @@ env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human") # This returns the context information, not the full state observation observation, info = env.reset(seed=1) +env.render() for i in range(5): action = env.action_space.sample()