From f1d3b5333fb822e804cc9782e63269ec093ff2f8 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sun, 29 Oct 2023 13:25:35 +0100 Subject: [PATCH] Docs: Show usage using new api functionality --- .../examples/examples_movement_primitives.py | 150 ++++++++++++++---- 1 file changed, 120 insertions(+), 30 deletions(-) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index cf7b3e2..95b4d7b 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -45,10 +45,14 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): """ - Example for running a movement primitive based environment, which is already registered + Example for running a custom movement primitive based environments. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the movement primitives. + Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/fancy_gym/ Args: - env_name: DMP env_id - seed: seed for deterministic behaviour + seed: seed iterations: Number of rollout steps to run render: Render the episode @@ -78,6 +82,44 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, return obs +class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper): + mp_config = { + 'ProMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'promp', + 'weights_scale': 2 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + }, + 'DMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'dmp', + 'weights_scale': 500 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'exp', + 'alpha_phase': 2.5 + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'rbf', + 'num_basis': 5 + } + } + } + def example_fully_custom_mp(seed=1, iterations=1, render=True): """ @@ -97,35 +139,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): """ base_env_id = "fancy/Reacher5d-v0" + custom_env_id = "fancy/Reacher5d-Custom-v0" + custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0" + custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" - # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. - # You can also add other gym.Wrappers in case they are needed. - wrappers = [fancy_gym.envs.mujoco.reacher.MPWrapper] + fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id) - # For a ProMP - trajectory_generator_kwargs = {'trajectory_generator_type': 'promp', - 'weights_scale': 2} - phase_generator_kwargs = {'phase_generator_type': 'linear'} - controller_kwargs = {'controller_type': 'velocity'} - basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - - # # For a DMP - # trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp', - # 'weights_scale': 500} - # phase_generator_kwargs = {'phase_generator_type': 'exp', - # 'alpha_phase': 2.5} - # controller_kwargs = {'controller_type': 'velocity'} - # basis_generator_kwargs = {'basis_generator_type': 'rbf', - # 'num_basis': 5 - # } - raw_env = gym.make(base_env_id, render_mode='human' if render else None) - env = fancy_gym.make_bb(env=raw_env, wrappers=wrappers, black_box_kwargs={}, - traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, - phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, - seed=seed) + env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) if render: env.render() @@ -144,6 +164,75 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): rewards = 0 obs = env.reset() +def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): + """ + Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration. + Args: + seed: seed + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + + base_env_id = "fancy/Reacher5d-v0" + custom_env_id = "fancy/Reacher5d-Custom-v0" + custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0" + + fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': { + 'trajectory_generator_kwargs': { + 'trajectory_generator_type': 'promp', + 'weights_scale': 2 + }, + 'phase_generator_kwargs': { + 'phase_generator_type': 'linear' + }, + 'controller_kwargs': { + 'controller_type': 'velocity' + }, + 'basis_generator_kwargs': { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + }}) + + env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None) + + if render: + env.render() + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(ac) + rewards += reward + + if terminated or truncated: + print(rewards) + rewards = 0 + obs = env.reset() + + if render: + env.render() + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(ac) + rewards += reward + + if terminated or truncated: + print(rewards) + rewards = 0 + obs = env.reset() def main(): render = False @@ -165,6 +254,7 @@ def main(): # Custom MP example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp_alternative(seed=10, iterations=1, render=render) if __name__=='__main__': main()