From e5a9aaff0b53b74caf853e49af58977393c77cc8 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 29 Oct 2023 12:57:29 +0100 Subject: [PATCH 1/3] Fix: Did not correlcty pass render_mode to gym.make (as per new spec) --- .../examples/examples_movement_primitives.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 317a103..cf7b3e2 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -16,7 +16,7 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend """ # Equivalent to gym, we have a make function which can be used to create environments. # It takes care of seeding and enables the use of a variety of external environments using the gym interface. - env = gym.make(env_name) + env = gym.make(env_name, render_mode='human' if render else None) returns = 0 # env.render(mode=None) @@ -26,14 +26,6 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend for i in range(iterations): if render and i % 1 == 0: - # This renders the full MP trajectory - # It is only required to call render() once in the beginning, which renders every consecutive trajectory. - # Resetting to no rendering, can be achieved by render(mode=None). - # It is also possible to change the mode multiple times when - # e.g. only every second trajectory should be displayed, such as here - # Just make sure the correct mode is set before executing the step. - env.render(mode="human") - else: env.render() # Now the action space is not the raw action but the parametrization of the trajectory generator, @@ -65,14 +57,14 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, """ # Changing the arguments of the black box env is possible by providing them to gym through mp_config_override. # E.g. here for way to many basis functions - env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}) + env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}, render_mode='human' if render else None) returns = 0 obs = env.reset() # This time rendering every trajectory if render: - env.render(mode="human") + env.render() # number of samples/full trajectories (multiple environment steps) for i in range(iterations): @@ -129,13 +121,14 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # basis_generator_kwargs = {'basis_generator_type': 'rbf', # 'num_basis': 5 # } - env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, + raw_env = gym.make(base_env_id, render_mode='human' if render else None) + env = fancy_gym.make_bb(env=raw_env, wrappers=wrappers, black_box_kwargs={}, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, seed=seed) if render: - env.render(mode="human") + env.render() rewards = 0 obs = env.reset() @@ -152,7 +145,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): obs = env.reset() -if __name__ == '__main__': +def main(): render = False # DMP example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) @@ -172,3 +165,6 @@ if __name__ == '__main__': # Custom MP example_fully_custom_mp(seed=10, iterations=1, render=render) + +if __name__=='__main__': + main() From f1d3b5333fb822e804cc9782e63269ec093ff2f8 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 29 Oct 2023 13:25:35 +0100 Subject: [PATCH 2/3] Docs: Show usage using new api functionality --- .../examples/examples_movement_primitives.py | 150 ++++++++++++++---- 1 file changed, 120 insertions(+), 30 deletions(-) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index cf7b3e2..95b4d7b 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -45,10 +45,14 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): """ - Example for running a movement primitive based environment, which is already registered + Example for running a custom movement primitive based environments. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the movement primitives. + Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/fancy_gym/ Args: - env_name: DMP env_id - seed: seed for deterministic behaviour + seed: seed iterations: Number of rollout steps to run render: Render the episode @@ -78,6 +82,44 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, return obs +class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper): + mp_config = { + 'ProMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'promp', + 'weights_scale': 2 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + }, + 'DMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'dmp', + 'weights_scale': 500 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'exp', + 'alpha_phase': 2.5 + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'rbf', + 'num_basis': 5 + } + } + } + def example_fully_custom_mp(seed=1, iterations=1, render=True): """ @@ -97,35 +139,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): """ base_env_id = "fancy/Reacher5d-v0" + custom_env_id = "fancy/Reacher5d-Custom-v0" + custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0" + custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" - # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. - # You can also add other gym.Wrappers in case they are needed. - wrappers = [fancy_gym.envs.mujoco.reacher.MPWrapper] + fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id) - # For a ProMP - trajectory_generator_kwargs = {'trajectory_generator_type': 'promp', - 'weights_scale': 2} - phase_generator_kwargs = {'phase_generator_type': 'linear'} - controller_kwargs = {'controller_type': 'velocity'} - basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - - # # For a DMP - # trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp', - # 'weights_scale': 500} - # phase_generator_kwargs = {'phase_generator_type': 'exp', - # 'alpha_phase': 2.5} - # controller_kwargs = {'controller_type': 'velocity'} - # basis_generator_kwargs = {'basis_generator_type': 'rbf', - # 'num_basis': 5 - # } - raw_env = gym.make(base_env_id, render_mode='human' if render else None) - env = fancy_gym.make_bb(env=raw_env, wrappers=wrappers, black_box_kwargs={}, - traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, - phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, - seed=seed) + env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) if render: env.render() @@ -144,6 +164,75 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): rewards = 0 obs = env.reset() +def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): + """ + Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration. + Args: + seed: seed + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + + base_env_id = "fancy/Reacher5d-v0" + custom_env_id = "fancy/Reacher5d-Custom-v0" + custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" + + fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'promp', + 'weights_scale': 2 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + }}) + + env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) + + if render: + env.render() + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(ac) + rewards += reward + + if terminated or truncated: + print(rewards) + rewards = 0 + obs = env.reset() + + if render: + env.render() + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(ac) + rewards += reward + + if terminated or truncated: + print(rewards) + rewards = 0 + obs = env.reset() def main(): render = False @@ -165,6 +254,7 @@ def main(): # Custom MP example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp_alternative(seed=10, iterations=1, render=render) if __name__=='__main__': main() From da042eebd9ba6c47f56621ef184495415d5fe2e1 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 29 Oct 2023 13:41:06 +0100 Subject: [PATCH 3/3] Ensure envs are closed --- .../examples/examples_movement_primitives.py | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 95b4d7b..4042f77 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -41,6 +41,7 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend if terminated or truncated: print(reward) obs = env.reset() + env.close() def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): @@ -80,6 +81,7 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, print(i, reward) obs = env.reset() + env.close() return obs class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper): @@ -147,12 +149,12 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) - if render: - env.render() - rewards = 0 obs = env.reset() + if render: + env.render() + # number of samples/full trajectories (multiple environment steps) for i in range(iterations): ac = env.action_space.sample() @@ -164,6 +166,12 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): rewards = 0 obs = env.reset() + try: # Some mujoco-based envs don't correlcty implement .close + env.close() + except: + pass + + def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): """ Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration. @@ -200,12 +208,12 @@ def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) - if render: - env.render() - rewards = 0 obs = env.reset() + if render: + env.render() + # number of samples/full trajectories (multiple environment steps) for i in range(iterations): ac = env.action_space.sample() @@ -234,6 +242,12 @@ def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): rewards = 0 obs = env.reset() + try: # Some mujoco-based envs don't correlcty implement .close + env.close() + except: + pass + + def main(): render = False # DMP