clean up example files

This commit is contained in:
Onur 2022-07-12 17:24:12 +02:00
parent d64cb614fa
commit 14ee580473
6 changed files with 140 additions and 130 deletions

View File

@ -1,11 +1,11 @@
import alr_envs
def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True):
"""
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
The env_id has to be specified as `domain_name:task_name` or
for manipulation tasks as `domain_name:manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
@ -24,12 +24,11 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
for i in range(iterations):
ac = env.action_space.sample()
if render:
env.render(mode="human")
obs, reward, done, info = env.step(ac)
rewards += reward
if render:
env.render("human")
if done:
print(env_id, rewards)
rewards = 0
@ -57,34 +56,37 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
"""
# Base DMC name, according to structure of above example
base_env = "ball_in_cup-catch"
base_env_id = "dmc:ball_in_cup-catch"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
mp_kwargs = {
"num_dof": 2, # degrees of fredom a.k.a. the old action space dimensionality
"num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis
"duration": 20, # length of trajectory in s, number of steps = duration / dt
"learn_goal": True, # learn the goal position (recommended)
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor", # tracking_controller type, 'velocity', 'position', and 'motor' (torque control)
"weights_scale": 1, # scaling of MP weights
"goal_scale": 1, # scaling of learned goal position
"policy_kwargs": { # only required for torque control/PD-Controller
"p_gains": 0.2,
"d_gains": 0.05
# # For a ProMP
trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}
phase_generator_kwargs = {'phase_generator_type': 'linear'}
controller_kwargs = {'controller_type': 'motor',
"p_gains": 1.0,
"d_gains": 0.1,}
basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 1
}
}
kwargs = {
"time_limit": 20, # same as duration value but as max horizon for underlying DMC environment
"episode_length": 1000, # corresponding number of episode steps
# "frame_skip": 1
}
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
# OR for a deterministic ProMP (other traj_gen_kwargs are required, see metaworld_examples):
# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
# For a DMP
# trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'}
# phase_generator_kwargs = {'phase_generator_type': 'exp',
# 'alpha_phase': 2}
# controller_kwargs = {'controller_type': 'motor',
# "p_gains": 1.0,
# "d_gains": 0.1,
# }
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
# 'num_basis': 5
# }
env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
@ -104,7 +106,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
rewards += reward
if done:
print(base_env, rewards)
print(base_env_id, rewards)
rewards = 0
obs = env.reset()
@ -118,18 +120,18 @@ if __name__ == '__main__':
# For rendering DMC
# export MUJOCO_GL="osmesa"
render = False
render = True
# # Standard DMC Suite tasks
example_dmc("fish-swim", seed=10, iterations=1000, render=render)
# Manipulation tasks
# Disclaimer: The vision versions are currently not integrated and yield an error
example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render)
# Gym + DMC hybrid task provided in the MP framework
example_dmc("dmc:fish-swim", seed=10, iterations=1000, render=render)
#
# # Manipulation tasks
# # Disclaimer: The vision versions are currently not integrated and yield an error
example_dmc("dmc:manipulation-reach_site_features", seed=10, iterations=250, render=render)
#
# # Gym + DMC hybrid task provided in the MP framework
example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render)
# Custom DMC task
# Different seed, because the episode is longer for this example and the name+seed combo is already registered above
# Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
# already registered above
example_custom_dmc_and_mp(seed=11, iterations=1, render=render)

View File

@ -6,7 +6,7 @@ import numpy as np
import alr_envs
def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
"""
Example for running any env in the step based setting.
This also includes DMC environments when leveraging our custom make_env function.
@ -41,7 +41,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
obs = env.reset()
def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
def example_async(env_id="HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
"""
Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
This also includes DMC and DMP environments when leveraging our custom make_env function.
@ -80,23 +80,21 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
rewards[done] = 0
# do not return values above threshold
return (*map(lambda v: np.stack(v)[:n_samples], buffer.values()),)
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
if __name__ == '__main__':
render = True
# Basic gym task
example_general("Reacher5d-v0", seed=10, iterations=200, render=render)
example_general("Pendulum-v1", seed=10, iterations=200, render=render)
# # Basis task from framework
example_general("Reacher-v0", seed=10, iterations=200, render=render)
# Mujoco task from framework
example_general("Reacher5d-v0", seed=10, iterations=200, render=render)
# # OpenAI Mujoco task
example_general("HalfCheetah-v2", seed=10, render=render)
# # Mujoco task from framework
example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=render)
# Vectorized multiprocessing environments
example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
# example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)

View File

@ -25,14 +25,12 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if render:
# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
# TODO: Remove this, when Metaworld fixes its interface.
env.render(False)
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(env_id, rewards)
rewards = 0
@ -60,25 +58,32 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
"""
# Base MetaWorld name, according to structure of above example
base_env = "button-press-v2"
base_env_id = "metaworld:button-press-v2"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper]
mp_kwargs = {
"num_dof": 4, # degrees of fredom a.k.a. the old action space dimensionality
"num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis
"duration": 6.25, # length of trajectory in s, number of steps = duration / dt
"post_traj_time": 0, # pad trajectory with additional zeros at the end (recommended: 0)
"width": 0.025, # width of the basis functions
"zero_start": True, # start from current environment position if True
"weights_scale": 1, # scaling of MP weights
"policy_type": "metaworld", # custom tracking_controller type for metaworld environments
}
wrappers = [alr_envs.meta.goal_object_change_mp_wrapper.MPWrapper]
# # For a ProMP
# trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}
# phase_generator_kwargs = {'phase_generator_type': 'linear'}
# controller_kwargs = {'controller_type': 'metaworld'}
# basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
# 'num_basis': 5,
# 'num_basis_zero_start': 1
# }
env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a DMP (other traj_gen_kwargs are required, see dmc_examples):
# env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs, **kwargs)
# For a DMP
trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'}
phase_generator_kwargs = {'phase_generator_type': 'exp',
'alpha_phase': 2}
controller_kwargs = {'controller_type': 'metaworld'}
basis_generator_kwargs = {'basis_generator_type': 'rbf',
'num_basis': 5
}
env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
@ -102,7 +107,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
rewards += reward
if done:
print(base_env, rewards)
print(base_env_id, rewards)
rewards = 0
obs = env.reset()
@ -118,11 +123,12 @@ if __name__ == '__main__':
# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
render = False
# # Standard DMC Suite tasks
example_dmc("button-press-v2", seed=10, iterations=500, render=render)
# # Standard Meta world tasks
example_dmc("metaworld:button-press-v2", seed=10, iterations=500, render=render)
# MP + MetaWorld hybrid task provided in the our framework
# # MP + MetaWorld hybrid task provided in the our framework
example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render)
# Custom MetaWorld task
#
# # Custom MetaWorld task
example_custom_dmc_and_mp(seed=10, iterations=1, render=render)

View File

@ -13,11 +13,11 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
Returns:
"""
# Equivalent to gym, we have make function which can be used to create environments.
# Equivalent to gym, we have a make function which can be used to create environments.
# It takes care of seeding and enables the use of a variety of external environments using the gym interface.
env = alr_envs.make(env_name, seed)
rewards = 0
returns = 0
# env.render(mode=None)
obs = env.reset()
@ -36,16 +36,17 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
env.render(mode=None)
# Now the action space is not the raw action but the parametrization of the trajectory generator,
# such as a ProMP. You can still use it the same, though.
# such as a ProMP
ac = env.action_space.sample()
# This executes a full trajectory
# This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the
# full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal
# to the return of a trajectory. Default is the sum over the step-wise rewards.
obs, reward, done, info = env.step(ac)
# Aggregated reward of trajectory
rewards += reward
# Aggregated returns
returns += reward
if done:
print(rewards)
rewards = 0
print(reward)
obs = env.reset()
@ -62,12 +63,13 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
"""
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
# E.g. here for adding a lot of basis functions
# E.g. here for way to many basis functions
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
# env = alr_envs.make(env_name, seed)
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
rewards = 0
returns = 0
obs = env.reset()
# This time rendering every trajectory
@ -78,11 +80,10 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
returns += reward
if done:
print(i, rewards)
rewards = 0
print(i, reward)
obs = env.reset()
return obs
@ -93,7 +94,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
Example for running a custom motion primitive based environments.
Our already registered environments follow the same structure.
Hence, this also allows to adjust hyperparameters of the motion primitives.
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/alr_envs/
Args:
@ -105,25 +106,35 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
"""
base_env = "alr_envs:HoleReacher-v1"
base_env_id = "HoleReacher-v0"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper]
mp_kwargs = {
"num_dof": 5,
"num_basis": 5,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "velocity",
"weights_scale": 50,
"goal_scale": 0.1
wrappers = [alr_envs.envs.classic_control.hole_reacher.MPWrapper]
# # For a ProMP
# trajectory_generator_kwargs = {'trajectory_generator_type': 'promp',
# 'weight_scale': 2}
# phase_generator_kwargs = {'phase_generator_type': 'linear'}
# controller_kwargs = {'controller_type': 'velocity'}
# basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
# 'num_basis': 5,
# 'num_basis_zero_start': 1
# }
# For a DMP
trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp',
'weight_scale': 500}
phase_generator_kwargs = {'phase_generator_type': 'exp',
'alpha_phase': 2.5}
controller_kwargs = {'controller_type': 'velocity'}
basis_generator_kwargs = {'basis_generator_type': 'rbf',
'num_basis': 5
}
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP:
# env = make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs)
env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
if render:
env.render(mode="human")
@ -144,18 +155,15 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
if __name__ == '__main__':
render = False
# # DMP
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
render = True
# DMP
example_mp("alr_envs:HoleReacherDMP-v0", seed=10, iterations=5, render=render)
#
# # ProMP
# example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
#
# # DetProMP
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
example_mp("alr_envs:HoleReacherProMP-v0", seed=10, iterations=5, render=render)
# Altered basis functions
obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render)
obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=5, render=render)
# Custom MP
# example_fully_custom_mp(seed=10, iterations=1, render=render)
example_fully_custom_mp(seed=10, iterations=1, render=render)

View File

@ -1,41 +1,37 @@
import alr_envs
def example_mp(env_name, seed=1):
def example_mp(env_name, seed=1, render=True):
"""
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
For more information on motion primitive specific stuff, look at the traj_gen examples.
Args:
env_name: ProMP env_id
seed: seed
render: boolean
Returns:
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
env = alr_envs.make(env_name, seed)
rewards = 0
returns = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
if render and i % 2 == 0:
env.render(mode="human")
else:
env.render(mode=None)
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
returns += reward
if done:
print(rewards)
rewards = 0
print(returns)
obs = env.reset()
if __name__ == '__main__':
# DMP - not supported yet
# example_mp("ReacherDMP-v2")
# DetProMP
example_mp("ContinuousMountainCarProMP-v0")
example_mp("ReacherProMP-v2")
example_mp("FetchReachDenseProMP-v1")
example_mp("FetchSlideDenseProMP-v1")

View File

@ -118,7 +118,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
has_black_box_wrapper = True
_env = w(_env)
if not has_black_box_wrapper:
raise ValueError("An RawInterfaceWrapper is required in order to leverage movement primitive environments.")
raise ValueError("A RawInterfaceWrapper is required in order to leverage movement primitive environments.")
return _env