diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 317a103..4042f77 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -16,7 +16,7 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend """ # Equivalent to gym, we have a make function which can be used to create environments. # It takes care of seeding and enables the use of a variety of external environments using the gym interface. - env = gym.make(env_name) + env = gym.make(env_name, render_mode='human' if render else None) returns = 0 # env.render(mode=None) @@ -26,14 +26,6 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend for i in range(iterations): if render and i % 1 == 0: - # This renders the full MP trajectory - # It is only required to call render() once in the beginning, which renders every consecutive trajectory. - # Resetting to no rendering, can be achieved by render(mode=None). - # It is also possible to change the mode multiple times when - # e.g. only every second trajectory should be displayed, such as here - # Just make sure the correct mode is set before executing the step. - env.render(mode="human") - else: env.render() # Now the action space is not the raw action but the parametrization of the trajectory generator, @@ -49,14 +41,19 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend if terminated or truncated: print(reward) obs = env.reset() + env.close() def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): """ - Example for running a movement primitive based environment, which is already registered + Example for running a custom movement primitive based environments. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the movement primitives. + Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/fancy_gym/ Args: - env_name: DMP env_id - seed: seed for deterministic behaviour + seed: seed iterations: Number of rollout steps to run render: Render the episode @@ -65,14 +62,14 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, """ # Changing the arguments of the black box env is possible by providing them to gym through mp_config_override. # E.g. here for way to many basis functions - env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}) + env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}, render_mode='human' if render else None) returns = 0 obs = env.reset() # This time rendering every trajectory if render: - env.render(mode="human") + env.render() # number of samples/full trajectories (multiple environment steps) for i in range(iterations): @@ -84,8 +81,47 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, print(i, reward) obs = env.reset() + env.close() return obs +class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper): + mp_config = { + 'ProMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'promp', + 'weights_scale': 2 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + }, + 'DMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'dmp', + 'weights_scale': 500 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'exp', + 'alpha_phase': 2.5 + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'rbf', + 'num_basis': 5 + } + } + } + def example_fully_custom_mp(seed=1, iterations=1, render=True): """ @@ -105,37 +141,92 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): """ base_env_id = "fancy/Reacher5d-v0" + custom_env_id = "fancy/Reacher5d-Custom-v0" + custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0" + custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" - # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. - # You can also add other gym.Wrappers in case they are needed. - wrappers = [fancy_gym.envs.mujoco.reacher.MPWrapper] + fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id) - # For a ProMP - trajectory_generator_kwargs = {'trajectory_generator_type': 'promp', - 'weights_scale': 2} - phase_generator_kwargs = {'phase_generator_type': 'linear'} - controller_kwargs = {'controller_type': 'velocity'} - basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } + env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) - # # For a DMP - # trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp', - # 'weights_scale': 500} - # phase_generator_kwargs = {'phase_generator_type': 'exp', - # 'alpha_phase': 2.5} - # controller_kwargs = {'controller_type': 'velocity'} - # basis_generator_kwargs = {'basis_generator_type': 'rbf', - # 'num_basis': 5 - # } - env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, - traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, - phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, - seed=seed) + rewards = 0 + obs = env.reset() if render: - env.render(mode="human") + env.render() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(ac) + rewards += reward + + if terminated or truncated: + print(rewards) + rewards = 0 + obs = env.reset() + + try: # Some mujoco-based envs don't correlcty implement .close + env.close() + except: + pass + + +def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): + """ + Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration. + Args: + seed: seed + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + + base_env_id = "fancy/Reacher5d-v0" + custom_env_id = "fancy/Reacher5d-Custom-v0" + custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" + + fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'promp', + 'weights_scale': 2 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + }}) + + env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) + + rewards = 0 + obs = env.reset() + + if render: + env.render() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(ac) + rewards += reward + + if terminated or truncated: + print(rewards) + rewards = 0 + obs = env.reset() + + if render: + env.render() rewards = 0 obs = env.reset() @@ -151,8 +242,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): rewards = 0 obs = env.reset() + try: # Some mujoco-based envs don't correlcty implement .close + env.close() + except: + pass -if __name__ == '__main__': + +def main(): render = False # DMP example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) @@ -172,3 +268,7 @@ if __name__ == '__main__': # Custom MP example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp_alternative(seed=10, iterations=1, render=render) + +if __name__=='__main__': + main()