From 14ee58047319df26e96668a5c80c057940b79457 Mon Sep 17 00:00:00 2001 From: Onur Date: Tue, 12 Jul 2022 17:24:12 +0200 Subject: [PATCH] clean up example files --- alr_envs/examples/examples_dmc.py | 84 +++++++++--------- alr_envs/examples/examples_general.py | 18 ++-- alr_envs/examples/examples_metaworld.py | 56 ++++++------ .../examples/examples_movement_primitives.py | 86 ++++++++++--------- alr_envs/examples/examples_open_ai.py | 24 +++--- alr_envs/utils/make_env_helpers.py | 2 +- 6 files changed, 140 insertions(+), 130 deletions(-) diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py index 41d2231..2d2c8fe 100644 --- a/alr_envs/examples/examples_dmc.py +++ b/alr_envs/examples/examples_dmc.py @@ -1,11 +1,11 @@ import alr_envs -def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): +def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True): """ Example for running a DMC based env in the step based setting. - The env_id has to be specified as `domain_name-task_name` or - for manipulation tasks as `manipulation-environment_name` + The env_id has to be specified as `domain_name:task_name` or + for manipulation tasks as `domain_name:manipulation-environment_name` Args: env_id: Either `domain_name-task_name` or `manipulation-environment_name` @@ -24,12 +24,11 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): for i in range(iterations): ac = env.action_space.sample() + if render: + env.render(mode="human") obs, reward, done, info = env.step(ac) rewards += reward - if render: - env.render("human") - if done: print(env_id, rewards) rewards = 0 @@ -57,34 +56,37 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): """ # Base DMC name, according to structure of above example - base_env = "ball_in_cup-catch" + base_env_id = "dmc:ball_in_cup-catch" # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # You can also add other gym.Wrappers in case they are needed. wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper] - mp_kwargs = { - "num_dof": 2, # degrees of fredom a.k.a. the old action space dimensionality - "num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis - "duration": 20, # length of trajectory in s, number of steps = duration / dt - "learn_goal": True, # learn the goal position (recommended) - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "motor", # tracking_controller type, 'velocity', 'position', and 'motor' (torque control) - "weights_scale": 1, # scaling of MP weights - "goal_scale": 1, # scaling of learned goal position - "policy_kwargs": { # only required for torque control/PD-Controller - "p_gains": 0.2, - "d_gains": 0.05 - } - } - kwargs = { - "time_limit": 20, # same as duration value but as max horizon for underlying DMC environment - "episode_length": 1000, # corresponding number of episode steps - # "frame_skip": 1 - } - env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs) - # OR for a deterministic ProMP (other traj_gen_kwargs are required, see metaworld_examples): - # env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args) + # # For a ProMP + trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'} + phase_generator_kwargs = {'phase_generator_type': 'linear'} + controller_kwargs = {'controller_type': 'motor', + "p_gains": 1.0, + "d_gains": 0.1,} + basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + + # For a DMP + # trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'} + # phase_generator_kwargs = {'phase_generator_type': 'exp', + # 'alpha_phase': 2} + # controller_kwargs = {'controller_type': 'motor', + # "p_gains": 1.0, + # "d_gains": 0.1, + # } + # basis_generator_kwargs = {'basis_generator_type': 'rbf', + # 'num_basis': 5 + # } + env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, + traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + seed=seed) # This renders the full MP trajectory # It is only required to call render() once in the beginning, which renders every consecutive trajectory. @@ -104,7 +106,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): rewards += reward if done: - print(base_env, rewards) + print(base_env_id, rewards) rewards = 0 obs = env.reset() @@ -118,18 +120,18 @@ if __name__ == '__main__': # For rendering DMC # export MUJOCO_GL="osmesa" - render = False + render = True # # Standard DMC Suite tasks - example_dmc("fish-swim", seed=10, iterations=1000, render=render) - - # Manipulation tasks - # Disclaimer: The vision versions are currently not integrated and yield an error - example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render) - - # Gym + DMC hybrid task provided in the MP framework + example_dmc("dmc:fish-swim", seed=10, iterations=1000, render=render) + # + # # Manipulation tasks + # # Disclaimer: The vision versions are currently not integrated and yield an error + example_dmc("dmc:manipulation-reach_site_features", seed=10, iterations=250, render=render) + # + # # Gym + DMC hybrid task provided in the MP framework example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render) - # Custom DMC task - # Different seed, because the episode is longer for this example and the name+seed combo is already registered above + # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is + # already registered above example_custom_dmc_and_mp(seed=11, iterations=1, render=render) diff --git a/alr_envs/examples/examples_general.py b/alr_envs/examples/examples_general.py index 4f184b8..33c2740 100644 --- a/alr_envs/examples/examples_general.py +++ b/alr_envs/examples/examples_general.py @@ -6,7 +6,7 @@ import numpy as np import alr_envs -def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True): +def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True): """ Example for running any env in the step based setting. This also includes DMC environments when leveraging our custom make_env function. @@ -41,7 +41,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True): obs = env.reset() -def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800): +def example_async(env_id="HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800): """ Example for running any env in a vectorized multiprocessing setting to generate more samples faster. This also includes DMC and DMP environments when leveraging our custom make_env function. @@ -80,23 +80,21 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16 rewards[done] = 0 # do not return values above threshold - return (*map(lambda v: np.stack(v)[:n_samples], buffer.values()),) + return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), if __name__ == '__main__': render = True # Basic gym task - example_general("Reacher5d-v0", seed=10, iterations=200, render=render) + example_general("Pendulum-v1", seed=10, iterations=200, render=render) - # # Basis task from framework - example_general("Reacher-v0", seed=10, iterations=200, render=render) + # Mujoco task from framework + example_general("Reacher5d-v0", seed=10, iterations=200, render=render) # # OpenAI Mujoco task example_general("HalfCheetah-v2", seed=10, render=render) - # # Mujoco task from framework - example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=render) - # Vectorized multiprocessing environments - example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) + # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) + diff --git a/alr_envs/examples/examples_metaworld.py b/alr_envs/examples/examples_metaworld.py index f179149..aa6820e 100644 --- a/alr_envs/examples/examples_metaworld.py +++ b/alr_envs/examples/examples_metaworld.py @@ -25,14 +25,12 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): for i in range(iterations): ac = env.action_space.sample() - obs, reward, done, info = env.step(ac) - rewards += reward - if render: # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM # TODO: Remove this, when Metaworld fixes its interface. env.render(False) - + obs, reward, done, info = env.step(ac) + rewards += reward if done: print(env_id, rewards) rewards = 0 @@ -60,25 +58,32 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): """ # Base MetaWorld name, according to structure of above example - base_env = "button-press-v2" + base_env_id = "metaworld:button-press-v2" # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # You can also add other gym.Wrappers in case they are needed. - wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper] - mp_kwargs = { - "num_dof": 4, # degrees of fredom a.k.a. the old action space dimensionality - "num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis - "duration": 6.25, # length of trajectory in s, number of steps = duration / dt - "post_traj_time": 0, # pad trajectory with additional zeros at the end (recommended: 0) - "width": 0.025, # width of the basis functions - "zero_start": True, # start from current environment position if True - "weights_scale": 1, # scaling of MP weights - "policy_type": "metaworld", # custom tracking_controller type for metaworld environments - } + wrappers = [alr_envs.meta.goal_object_change_mp_wrapper.MPWrapper] + # # For a ProMP + # trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'} + # phase_generator_kwargs = {'phase_generator_type': 'linear'} + # controller_kwargs = {'controller_type': 'metaworld'} + # basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', + # 'num_basis': 5, + # 'num_basis_zero_start': 1 + # } - env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) - # OR for a DMP (other traj_gen_kwargs are required, see dmc_examples): - # env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs, **kwargs) + # For a DMP + trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'} + phase_generator_kwargs = {'phase_generator_type': 'exp', + 'alpha_phase': 2} + controller_kwargs = {'controller_type': 'metaworld'} + basis_generator_kwargs = {'basis_generator_type': 'rbf', + 'num_basis': 5 + } + env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, + traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + seed=seed) # This renders the full MP trajectory # It is only required to call render() once in the beginning, which renders every consecutive trajectory. @@ -102,7 +107,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): rewards += reward if done: - print(base_env, rewards) + print(base_env_id, rewards) rewards = 0 obs = env.reset() @@ -118,11 +123,12 @@ if __name__ == '__main__': # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so render = False - # # Standard DMC Suite tasks - example_dmc("button-press-v2", seed=10, iterations=500, render=render) + # # Standard Meta world tasks + example_dmc("metaworld:button-press-v2", seed=10, iterations=500, render=render) - # MP + MetaWorld hybrid task provided in the our framework + # # MP + MetaWorld hybrid task provided in the our framework example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render) - - # Custom MetaWorld task + # + # # Custom MetaWorld task example_custom_dmc_and_mp(seed=10, iterations=1, render=render) + diff --git a/alr_envs/examples/examples_movement_primitives.py b/alr_envs/examples/examples_movement_primitives.py index 755d912..62ab91c 100644 --- a/alr_envs/examples/examples_movement_primitives.py +++ b/alr_envs/examples/examples_movement_primitives.py @@ -13,11 +13,11 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True Returns: """ - # Equivalent to gym, we have make function which can be used to create environments. + # Equivalent to gym, we have a make function which can be used to create environments. # It takes care of seeding and enables the use of a variety of external environments using the gym interface. env = alr_envs.make(env_name, seed) - rewards = 0 + returns = 0 # env.render(mode=None) obs = env.reset() @@ -36,16 +36,17 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True env.render(mode=None) # Now the action space is not the raw action but the parametrization of the trajectory generator, - # such as a ProMP. You can still use it the same, though. + # such as a ProMP ac = env.action_space.sample() - # This executes a full trajectory + # This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the + # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal + # to the return of a trajectory. Default is the sum over the step-wise rewards. obs, reward, done, info = env.step(ac) - # Aggregated reward of trajectory - rewards += reward + # Aggregated returns + returns += reward if done: - print(rewards) - rewards = 0 + print(reward) obs = env.reset() @@ -62,12 +63,13 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render """ # Changing the arguments of the black box env is possible by providing them to gym as with all kwargs. - # E.g. here for adding a lot of basis functions + # E.g. here for way to many basis functions env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) + # env = alr_envs.make(env_name, seed) # mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}}) # mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}}) - rewards = 0 + returns = 0 obs = env.reset() # This time rendering every trajectory @@ -78,11 +80,10 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render for i in range(iterations): ac = env.action_space.sample() obs, reward, done, info = env.step(ac) - rewards += reward + returns += reward if done: - print(i, rewards) - rewards = 0 + print(i, reward) obs = env.reset() return obs @@ -93,7 +94,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): Example for running a custom motion primitive based environments. Our already registered environments follow the same structure. Hence, this also allows to adjust hyperparameters of the motion primitives. - Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. + Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks. We appreciate PRs for custom environments (especially MP wrappers of existing tasks) for our repo: https://github.com/ALRhub/alr_envs/ Args: @@ -105,25 +106,35 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): """ - base_env = "alr_envs:HoleReacher-v1" + base_env_id = "HoleReacher-v0" # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # You can also add other gym.Wrappers in case they are needed. - wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper] - mp_kwargs = { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "velocity", - "weights_scale": 50, - "goal_scale": 0.1 - } - env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) - # OR for a deterministic ProMP: - # env = make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs) + wrappers = [alr_envs.envs.classic_control.hole_reacher.MPWrapper] + + # # For a ProMP + # trajectory_generator_kwargs = {'trajectory_generator_type': 'promp', + # 'weight_scale': 2} + # phase_generator_kwargs = {'phase_generator_type': 'linear'} + # controller_kwargs = {'controller_type': 'velocity'} + # basis_generator_kwargs = {'basis_generator_type': 'zero_rbf', + # 'num_basis': 5, + # 'num_basis_zero_start': 1 + # } + + # For a DMP + trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp', + 'weight_scale': 500} + phase_generator_kwargs = {'phase_generator_type': 'exp', + 'alpha_phase': 2.5} + controller_kwargs = {'controller_type': 'velocity'} + basis_generator_kwargs = {'basis_generator_type': 'rbf', + 'num_basis': 5 + } + env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, + traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + seed=seed) if render: env.render(mode="human") @@ -144,18 +155,15 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = False - # # DMP - # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + render = True + # DMP + example_mp("alr_envs:HoleReacherDMP-v0", seed=10, iterations=5, render=render) # # # ProMP - # example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) - # - # # DetProMP - # example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) + example_mp("alr_envs:HoleReacherProMP-v0", seed=10, iterations=5, render=render) # Altered basis functions - obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render) + obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=5, render=render) # Custom MP - # example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp(seed=10, iterations=1, render=render) diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py index 46dcf60..fad3847 100644 --- a/alr_envs/examples/examples_open_ai.py +++ b/alr_envs/examples/examples_open_ai.py @@ -1,41 +1,37 @@ import alr_envs -def example_mp(env_name, seed=1): +def example_mp(env_name, seed=1, render=True): """ Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered. For more information on motion primitive specific stuff, look at the traj_gen examples. Args: env_name: ProMP env_id seed: seed - + render: boolean Returns: """ # While in this case gym.make() is possible to use as well, we recommend our custom make env function. env = alr_envs.make(env_name, seed) - rewards = 0 + returns = 0 obs = env.reset() - # number of samples/full trajectories (multiple environment steps) for i in range(10): + if render and i % 2 == 0: + env.render(mode="human") + else: + env.render(mode=None) ac = env.action_space.sample() obs, reward, done, info = env.step(ac) - rewards += reward + returns += reward if done: - print(rewards) - rewards = 0 + print(returns) obs = env.reset() if __name__ == '__main__': - # DMP - not supported yet - # example_mp("ReacherDMP-v2") - - # DetProMP - example_mp("ContinuousMountainCarProMP-v0") example_mp("ReacherProMP-v2") - example_mp("FetchReachDenseProMP-v1") - example_mp("FetchSlideDenseProMP-v1") + diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 8cdf8c3..f88cfcc 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -118,7 +118,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1 has_black_box_wrapper = True _env = w(_env) if not has_black_box_wrapper: - raise ValueError("An RawInterfaceWrapper is required in order to leverage movement primitive environments.") + raise ValueError("A RawInterfaceWrapper is required in order to leverage movement primitive environments.") return _env