clean up example files

This commit is contained in:
Onur 2022-07-12 17:24:12 +02:00
parent d64cb614fa
commit 14ee580473
6 changed files with 140 additions and 130 deletions

View File

@ -1,11 +1,11 @@
import alr_envs import alr_envs
def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True):
""" """
Example for running a DMC based env in the step based setting. Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or The env_id has to be specified as `domain_name:task_name` or
for manipulation tasks as `manipulation-environment_name` for manipulation tasks as `domain_name:manipulation-environment_name`
Args: Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name` env_id: Either `domain_name-task_name` or `manipulation-environment_name`
@ -24,12 +24,11 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
for i in range(iterations): for i in range(iterations):
ac = env.action_space.sample() ac = env.action_space.sample()
if render:
env.render(mode="human")
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward rewards += reward
if render:
env.render("human")
if done: if done:
print(env_id, rewards) print(env_id, rewards)
rewards = 0 rewards = 0
@ -57,34 +56,37 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
""" """
# Base DMC name, according to structure of above example # Base DMC name, according to structure of above example
base_env = "ball_in_cup-catch" base_env_id = "dmc:ball_in_cup-catch"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed. # You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper] wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
mp_kwargs = { # # For a ProMP
"num_dof": 2, # degrees of fredom a.k.a. the old action space dimensionality trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}
"num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis phase_generator_kwargs = {'phase_generator_type': 'linear'}
"duration": 20, # length of trajectory in s, number of steps = duration / dt controller_kwargs = {'controller_type': 'motor',
"learn_goal": True, # learn the goal position (recommended) "p_gains": 1.0,
"alpha_phase": 2, "d_gains": 0.1,}
"bandwidth_factor": 2, basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
"policy_type": "motor", # tracking_controller type, 'velocity', 'position', and 'motor' (torque control) 'num_basis': 5,
"weights_scale": 1, # scaling of MP weights 'num_basis_zero_start': 1
"goal_scale": 1, # scaling of learned goal position
"policy_kwargs": { # only required for torque control/PD-Controller
"p_gains": 0.2,
"d_gains": 0.05
} }
}
kwargs = { # For a DMP
"time_limit": 20, # same as duration value but as max horizon for underlying DMC environment # trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'}
"episode_length": 1000, # corresponding number of episode steps # phase_generator_kwargs = {'phase_generator_type': 'exp',
# "frame_skip": 1 # 'alpha_phase': 2}
} # controller_kwargs = {'controller_type': 'motor',
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs) # "p_gains": 1.0,
# OR for a deterministic ProMP (other traj_gen_kwargs are required, see metaworld_examples): # "d_gains": 0.1,
# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args) # }
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
# 'num_basis': 5
# }
env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
# This renders the full MP trajectory # This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory. # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
@ -104,7 +106,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
rewards += reward rewards += reward
if done: if done:
print(base_env, rewards) print(base_env_id, rewards)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
@ -118,18 +120,18 @@ if __name__ == '__main__':
# For rendering DMC # For rendering DMC
# export MUJOCO_GL="osmesa" # export MUJOCO_GL="osmesa"
render = False render = True
# # Standard DMC Suite tasks # # Standard DMC Suite tasks
example_dmc("fish-swim", seed=10, iterations=1000, render=render) example_dmc("dmc:fish-swim", seed=10, iterations=1000, render=render)
#
# Manipulation tasks # # Manipulation tasks
# Disclaimer: The vision versions are currently not integrated and yield an error # # Disclaimer: The vision versions are currently not integrated and yield an error
example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render) example_dmc("dmc:manipulation-reach_site_features", seed=10, iterations=250, render=render)
#
# Gym + DMC hybrid task provided in the MP framework # # Gym + DMC hybrid task provided in the MP framework
example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render) example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render)
# Custom DMC task # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
# Different seed, because the episode is longer for this example and the name+seed combo is already registered above # already registered above
example_custom_dmc_and_mp(seed=11, iterations=1, render=render) example_custom_dmc_and_mp(seed=11, iterations=1, render=render)

View File

@ -6,7 +6,7 @@ import numpy as np
import alr_envs import alr_envs
def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True): def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
""" """
Example for running any env in the step based setting. Example for running any env in the step based setting.
This also includes DMC environments when leveraging our custom make_env function. This also includes DMC environments when leveraging our custom make_env function.
@ -41,7 +41,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
obs = env.reset() obs = env.reset()
def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800): def example_async(env_id="HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
""" """
Example for running any env in a vectorized multiprocessing setting to generate more samples faster. Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
This also includes DMC and DMP environments when leveraging our custom make_env function. This also includes DMC and DMP environments when leveraging our custom make_env function.
@ -80,23 +80,21 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
rewards[done] = 0 rewards[done] = 0
# do not return values above threshold # do not return values above threshold
return (*map(lambda v: np.stack(v)[:n_samples], buffer.values()),) return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
if __name__ == '__main__': if __name__ == '__main__':
render = True render = True
# Basic gym task # Basic gym task
example_general("Reacher5d-v0", seed=10, iterations=200, render=render) example_general("Pendulum-v1", seed=10, iterations=200, render=render)
# # Basis task from framework # Mujoco task from framework
example_general("Reacher-v0", seed=10, iterations=200, render=render) example_general("Reacher5d-v0", seed=10, iterations=200, render=render)
# # OpenAI Mujoco task # # OpenAI Mujoco task
example_general("HalfCheetah-v2", seed=10, render=render) example_general("HalfCheetah-v2", seed=10, render=render)
# # Mujoco task from framework
example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=render)
# Vectorized multiprocessing environments # Vectorized multiprocessing environments
example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)

View File

@ -25,14 +25,12 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
for i in range(iterations): for i in range(iterations):
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if render: if render:
# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
# TODO: Remove this, when Metaworld fixes its interface. # TODO: Remove this, when Metaworld fixes its interface.
env.render(False) env.render(False)
obs, reward, done, info = env.step(ac)
rewards += reward
if done: if done:
print(env_id, rewards) print(env_id, rewards)
rewards = 0 rewards = 0
@ -60,25 +58,32 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
""" """
# Base MetaWorld name, according to structure of above example # Base MetaWorld name, according to structure of above example
base_env = "button-press-v2" base_env_id = "metaworld:button-press-v2"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed. # You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper] wrappers = [alr_envs.meta.goal_object_change_mp_wrapper.MPWrapper]
mp_kwargs = { # # For a ProMP
"num_dof": 4, # degrees of fredom a.k.a. the old action space dimensionality # trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}
"num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis # phase_generator_kwargs = {'phase_generator_type': 'linear'}
"duration": 6.25, # length of trajectory in s, number of steps = duration / dt # controller_kwargs = {'controller_type': 'metaworld'}
"post_traj_time": 0, # pad trajectory with additional zeros at the end (recommended: 0) # basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
"width": 0.025, # width of the basis functions # 'num_basis': 5,
"zero_start": True, # start from current environment position if True # 'num_basis_zero_start': 1
"weights_scale": 1, # scaling of MP weights # }
"policy_type": "metaworld", # custom tracking_controller type for metaworld environments
}
env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) # For a DMP
# OR for a DMP (other traj_gen_kwargs are required, see dmc_examples): trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'}
# env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs, **kwargs) phase_generator_kwargs = {'phase_generator_type': 'exp',
'alpha_phase': 2}
controller_kwargs = {'controller_type': 'metaworld'}
basis_generator_kwargs = {'basis_generator_type': 'rbf',
'num_basis': 5
}
env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
# This renders the full MP trajectory # This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory. # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
@ -102,7 +107,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
rewards += reward rewards += reward
if done: if done:
print(base_env, rewards) print(base_env_id, rewards)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
@ -118,11 +123,12 @@ if __name__ == '__main__':
# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
render = False render = False
# # Standard DMC Suite tasks # # Standard Meta world tasks
example_dmc("button-press-v2", seed=10, iterations=500, render=render) example_dmc("metaworld:button-press-v2", seed=10, iterations=500, render=render)
# MP + MetaWorld hybrid task provided in the our framework # # MP + MetaWorld hybrid task provided in the our framework
example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render) example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render)
#
# Custom MetaWorld task # # Custom MetaWorld task
example_custom_dmc_and_mp(seed=10, iterations=1, render=render) example_custom_dmc_and_mp(seed=10, iterations=1, render=render)

View File

@ -13,11 +13,11 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
Returns: Returns:
""" """
# Equivalent to gym, we have make function which can be used to create environments. # Equivalent to gym, we have a make function which can be used to create environments.
# It takes care of seeding and enables the use of a variety of external environments using the gym interface. # It takes care of seeding and enables the use of a variety of external environments using the gym interface.
env = alr_envs.make(env_name, seed) env = alr_envs.make(env_name, seed)
rewards = 0 returns = 0
# env.render(mode=None) # env.render(mode=None)
obs = env.reset() obs = env.reset()
@ -36,16 +36,17 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
env.render(mode=None) env.render(mode=None)
# Now the action space is not the raw action but the parametrization of the trajectory generator, # Now the action space is not the raw action but the parametrization of the trajectory generator,
# such as a ProMP. You can still use it the same, though. # such as a ProMP
ac = env.action_space.sample() ac = env.action_space.sample()
# This executes a full trajectory # This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the
# full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal
# to the return of a trajectory. Default is the sum over the step-wise rewards.
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
# Aggregated reward of trajectory # Aggregated returns
rewards += reward returns += reward
if done: if done:
print(rewards) print(reward)
rewards = 0
obs = env.reset() obs = env.reset()
@ -62,12 +63,13 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
""" """
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs. # Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
# E.g. here for adding a lot of basis functions # E.g. here for way to many basis functions
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
# env = alr_envs.make(env_name, seed)
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}}) # mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}}) # mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
rewards = 0 returns = 0
obs = env.reset() obs = env.reset()
# This time rendering every trajectory # This time rendering every trajectory
@ -78,11 +80,10 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
for i in range(iterations): for i in range(iterations):
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward returns += reward
if done: if done:
print(i, rewards) print(i, reward)
rewards = 0
obs = env.reset() obs = env.reset()
return obs return obs
@ -93,7 +94,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
Example for running a custom motion primitive based environments. Example for running a custom motion primitive based environments.
Our already registered environments follow the same structure. Our already registered environments follow the same structure.
Hence, this also allows to adjust hyperparameters of the motion primitives. Hence, this also allows to adjust hyperparameters of the motion primitives.
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks) We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/alr_envs/ for our repo: https://github.com/ALRhub/alr_envs/
Args: Args:
@ -105,25 +106,35 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
""" """
base_env = "alr_envs:HoleReacher-v1" base_env_id = "HoleReacher-v0"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. # Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed. # You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper] wrappers = [alr_envs.envs.classic_control.hole_reacher.MPWrapper]
mp_kwargs = {
"num_dof": 5, # # For a ProMP
"num_basis": 5, # trajectory_generator_kwargs = {'trajectory_generator_type': 'promp',
"duration": 2, # 'weight_scale': 2}
"learn_goal": True, # phase_generator_kwargs = {'phase_generator_type': 'linear'}
"alpha_phase": 2, # controller_kwargs = {'controller_type': 'velocity'}
"bandwidth_factor": 2, # basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
"policy_type": "velocity", # 'num_basis': 5,
"weights_scale": 50, # 'num_basis_zero_start': 1
"goal_scale": 0.1 # }
# For a DMP
trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp',
'weight_scale': 500}
phase_generator_kwargs = {'phase_generator_type': 'exp',
'alpha_phase': 2.5}
controller_kwargs = {'controller_type': 'velocity'}
basis_generator_kwargs = {'basis_generator_type': 'rbf',
'num_basis': 5
} }
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
# OR for a deterministic ProMP: traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
# env = make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs) phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
if render: if render:
env.render(mode="human") env.render(mode="human")
@ -144,18 +155,15 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
if __name__ == '__main__': if __name__ == '__main__':
render = False render = True
# # DMP # DMP
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) example_mp("alr_envs:HoleReacherDMP-v0", seed=10, iterations=5, render=render)
# #
# # ProMP # # ProMP
# example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) example_mp("alr_envs:HoleReacherProMP-v0", seed=10, iterations=5, render=render)
#
# # DetProMP
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
# Altered basis functions # Altered basis functions
obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render) obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=5, render=render)
# Custom MP # Custom MP
# example_fully_custom_mp(seed=10, iterations=1, render=render) example_fully_custom_mp(seed=10, iterations=1, render=render)

View File

@ -1,41 +1,37 @@
import alr_envs import alr_envs
def example_mp(env_name, seed=1): def example_mp(env_name, seed=1, render=True):
""" """
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered. Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
For more information on motion primitive specific stuff, look at the traj_gen examples. For more information on motion primitive specific stuff, look at the traj_gen examples.
Args: Args:
env_name: ProMP env_id env_name: ProMP env_id
seed: seed seed: seed
render: boolean
Returns: Returns:
""" """
# While in this case gym.make() is possible to use as well, we recommend our custom make env function. # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
env = alr_envs.make(env_name, seed) env = alr_envs.make(env_name, seed)
rewards = 0 returns = 0
obs = env.reset() obs = env.reset()
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(10): for i in range(10):
if render and i % 2 == 0:
env.render(mode="human")
else:
env.render(mode=None)
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward returns += reward
if done: if done:
print(rewards) print(returns)
rewards = 0
obs = env.reset() obs = env.reset()
if __name__ == '__main__': if __name__ == '__main__':
# DMP - not supported yet
# example_mp("ReacherDMP-v2")
# DetProMP
example_mp("ContinuousMountainCarProMP-v0")
example_mp("ReacherProMP-v2") example_mp("ReacherProMP-v2")
example_mp("FetchReachDenseProMP-v1")
example_mp("FetchSlideDenseProMP-v1")

View File

@ -118,7 +118,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
has_black_box_wrapper = True has_black_box_wrapper = True
_env = w(_env) _env = w(_env)
if not has_black_box_wrapper: if not has_black_box_wrapper:
raise ValueError("An RawInterfaceWrapper is required in order to leverage movement primitive environments.") raise ValueError("A RawInterfaceWrapper is required in order to leverage movement primitive environments.")
return _env return _env