Merge pull request #7 from ALRhub/mp_wrappers_and_stuff

Mp wrappers and stuff
This commit is contained in:
ottofabian 2021-05-27 17:40:35 +02:00 committed by GitHub
commit 746d408a76
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 187 additions and 85 deletions

View File

@ -272,12 +272,20 @@ for v in versions:
}
)
# register(
# id='HoleReacherDetPMP-v0',
# entry_point='alr_envs.classic_control.hole_reacher:holereacher_detpmp',
# # max_episode_steps=1,
# # TODO: add mp kwargs
# )
register(
id=f'HoleReacherDetPMP-{v}',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
kwargs={
"name": f"alr_envs:HoleReacher-{v}",
"num_dof": 5,
"num_basis": 5,
"duration": 2,
"width": 0.025,
"policy_type": "velocity",
"weights_scale": 0.2,
"zero_start": True
}
)
# TODO: properly add final_pos
register(
@ -335,6 +343,41 @@ register(
}
)
register(
id='ALRBallInACupSimpleDetPMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
kwargs={
"name": "alr_envs:ALRBallInACupSimple-v0",
"num_dof": 3,
"num_basis": 5,
"duration": 3.5,
"post_traj_time": 4.5,
"width": 0.0035,
# "off": -0.05,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"zero_goal": True
}
)
register(
id='ALRBallInACupDetPMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
kwargs={
"name": "alr_envs:ALRBallInACupSimple-v0",
"num_dof": 7,
"num_basis": 5,
"duration": 3.5,
"post_traj_time": 4.5,
"width": 0.0035,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"zero_goal": True
}
)
register(
id='ALRBallInACupGoalDMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',

View File

@ -7,10 +7,10 @@ from gym.utils import seeding
from matplotlib import patches
from alr_envs.classic_control.utils import check_self_collision
from alr_envs.utils.mps.mp_environments import MPEnv
from alr_envs.utils.mps.mp_environments import AlrEnv
class HoleReacherEnv(MPEnv):
class HoleReacherEnv(AlrEnv):
def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
@ -71,14 +71,15 @@ class HoleReacherEnv(MPEnv):
A single step with an action in joint velocity space
"""
acc = (action - self._angle_velocity) / self.dt
self._angle_velocity = action
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5)
self._update_joints()
acc = (action - self._angle_velocity) / self.dt
reward, info = self._get_reward(acc)
info.update({"is_collided": self._is_collided})
self.end_effector_traj.append(np.copy(self.end_effector))
self._steps += 1
done = self._is_collided
@ -101,6 +102,7 @@ class HoleReacherEnv(MPEnv):
self._joints = np.zeros((self.n_links + 1, 2))
self._update_joints()
self._steps = 0
self.end_effector_traj = []
return self._get_obs().copy()

View File

@ -5,10 +5,10 @@ import numpy as np
from gym import spaces
from gym.utils import seeding
from alr_envs.utils.mps.mp_environments import MPEnv
from alr_envs.utils.mps.mp_environments import AlrEnv
class SimpleReacherEnv(MPEnv):
class SimpleReacherEnv(AlrEnv):
"""
Simple Reaching Task without any physics simulation.
Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions

View File

@ -6,10 +6,10 @@ import numpy as np
from gym.utils import seeding
from alr_envs.classic_control.utils import check_self_collision
from alr_envs.utils.mps.mp_environments import MPEnv
from alr_envs.utils.mps.mp_environments import AlrEnv
class ViaPointReacher(MPEnv):
class ViaPointReacher(AlrEnv):
def __init__(self, n_links, random_start: bool = True, via_target: Union[None, Iterable] = None,
target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):

View File

@ -1,5 +1,6 @@
from collections import OrderedDict
import os
from abc import abstractmethod
from gym import error, spaces
@ -142,18 +143,20 @@ class AlrMujocoEnv(gym.Env):
# methods to override:
# ----------------------------
@property
@abstractmethod
def active_obs(self):
"""Returns boolean mask for each observation entry
whether the observation is returned for the contextual case or not.
This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
"""
return np.ones(self.observation_space.shape, dtype=bool)
def _get_obs(self):
"""Returns the observation.
"""
raise NotImplementedError()
def configure(self, *args, **kwargs):
"""
Helper method to set certain environment properties such as contexts in contextual environments since reset()
doesn't take arguments. Should be called before reset().
"""
pass
def reset_model(self):
"""
Reset the robot degrees of freedom (qpos and qvel).

View File

@ -126,6 +126,7 @@
<geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0045" solref="-10000 -100"/>
<!-- <geom name="cup_base_contact" pos="0 -0.025 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>-->
<geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.02 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
<geom name="cup_base_contact_below" pos="0 -0.04 0.1165" euler="-1.57 0 0" type="cylinder" size="0.035 0.001" solref="-10000 -100" rgba="255 0 255 1"/>
<!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
<!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
<!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->

View File

@ -22,7 +22,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
self.context = None
self.context = context
utils.EzPickle.__init__(self)
alr_mujoco_env.AlrMujocoEnv.__init__(self,
@ -45,7 +45,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
else:
raise ValueError("Unknown reward type")
self.reward_function = reward_function(self.sim_steps)
self.configure(context)
@property
def start_pos(self):
@ -69,10 +68,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def current_vel(self):
return self.sim.data.qvel[0:7].copy()
def configure(self, context):
self.context = context
self.reward_function.reset(context)
def reset_model(self):
init_pos_all = self.init_qpos.copy()
init_pos_robot = self._start_pos
@ -95,26 +90,27 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
reward_ctrl = - np.square(a).sum()
crash = self.do_simulation(a)
joint_cons_viol = self.check_traj_in_joint_limits()
# joint_cons_viol = self.check_traj_in_joint_limits()
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
ob = self._get_obs()
if not crash and not joint_cons_viol:
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
done = success or self._steps == self.sim_steps - 1 or stop_sim
if not crash:
reward, success, is_collided = self.reward_function.compute_reward(a, self)
done = success or self._steps == self.sim_steps - 1 or is_collided
self._steps += 1
else:
reward = -1000
reward = -2
success = False
is_collided = False
done = True
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl,
velocity=angular_vel,
traj=self._q_pos, is_success=success,
is_collided=crash or joint_cons_viol)
is_collided=is_collided, sim_crash=crash)
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
@ -129,6 +125,16 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
[self._steps],
])
# TODO
@property
def active_obs(self):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
# [True] * 2, # x-y coordinates of target distance
[False] # env steps
])
# These functions are for the task with 3 joint actuations
def extend_des_pos(self, des_pos):
des_pos_full = self._start_pos.copy()

View File

@ -6,7 +6,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
def __init__(self, sim_time):
self.sim_time = sim_time
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below",
"wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom",
"wrist_pitch_link_convex_decomposition_p2_geom",
"wrist_pitch_link_convex_decomposition_p3_geom",
@ -20,6 +21,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.goal_id = None
self.goal_final_id = None
self.collision_ids = None
self._is_collided = False
self.collision_penalty = 1
self.ball_traj = None
self.dists = None
@ -36,49 +39,52 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.action_costs = []
self.cup_angles = []
def compute_reward(self, action, sim, step, context=None):
self.ball_id = sim.model._body_name2id["ball"]
self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
self.goal_id = sim.model._site_name2id["cup_goal"]
self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
def compute_reward(self, action, env):
self.ball_id = env.sim.model._body_name2id["ball"]
self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
self.goal_id = env.sim.model._site_name2id["cup_goal"]
self.goal_final_id = env.sim.model._site_name2id["cup_goal_final"]
self.collision_ids = [env.sim.model._geom_name2id[name] for name in self.collision_objects]
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
ball_in_cup = self.check_ball_in_cup(env.sim, self.ball_collision_id)
# Compute the current distance from the ball to the inner part of the cup
goal_pos = sim.data.site_xpos[self.goal_id]
ball_pos = sim.data.body_xpos[self.ball_id]
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
goal_pos = env.sim.data.site_xpos[self.goal_id]
ball_pos = env.sim.data.body_xpos[self.ball_id]
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.ball_traj[step, :] = ball_pos
cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]])
self.ball_traj[env._steps, :] = ball_pos
cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]])
self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)))
action_cost = np.sum(np.square(action))
self.action_costs.append(action_cost)
if self.check_collision(sim):
reward = - 1000
return reward, False, True
self._is_collided = self.check_collision(env.sim) or env.check_traj_in_joint_limits()
if step == self.sim_time - 1:
if env._steps == env.sim_steps - 1 or self._is_collided:
t_min_dist = np.argmin(self.dists)
angle_min_dist = self.cup_angles[t_min_dist]
cost_angle = (angle_min_dist - np.pi / 2)**2
min_dist = self.dists[t_min_dist]
dist_final = self.dists_final[-1]
min_dist_final = np.min(self.dists_final)
cost = 0.5 * min_dist + 0.5 * dist_final + 0.01 * cost_angle
reward = np.exp(-2 * cost) - 1e-3 * action_cost
success = dist_final < 0.05 and ball_in_cup
cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
crash = self._is_collided
else:
reward = - 1e-3 * action_cost
reward = - 1e-5 * action_cost # TODO: increase action_cost weight
success = False
crash = False
return reward, success, False
return reward, success, crash
def check_ball_in_cup(self, sim, ball_collision_id):
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]

View File

@ -105,6 +105,7 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
# TODO
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
return np.concatenate([
@ -114,6 +115,10 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
[self._steps],
])
# TODO
def active_obs(self):
pass
if __name__ == "__main__":

View File

@ -40,6 +40,16 @@ def _flatten_list(l):
return [l__ for l_ in l for l__ in l_]
class DummyDist:
def __init__(self, dim):
self.dim = dim
def sample(self, contexts):
contexts = np.atleast_2d(contexts)
n_samples = contexts.shape[0]
return np.random.normal(size=(n_samples, self.dim)), contexts
class AlrMpEnvSampler:
"""
An asynchronous sampler for non contextual MPWrapper environments. A sampler object can be called with a set of
@ -100,9 +110,9 @@ class AlrContextualMpEnvSampler:
if __name__ == "__main__":
env_name = "alr_envs:ALRBallInACupSimpleDMP-v0"
env_name = "alr_envs:HoleReacherDetPMP-v1"
n_cpu = 8
dim = 15
dim = 25
n_samples = 10
sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)

View File

@ -2,28 +2,29 @@ import gym
import numpy as np
from mp_lib import det_promp
from alr_envs.utils.mps.mp_environments import MPEnv
from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.mps.mp_wrapper import MPWrapper
class DetPMPWrapper(MPWrapper):
def __init__(self, env: MPEnv, num_dof: int, num_basis: int, width: int, duration: int = 1, dt: float = 0.01,
def __init__(self, env: AlrEnv, num_dof: int, num_basis: int, width: float, duration: float = 1, dt: float = 0.01,
post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1.,
zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
self.duration = duration # seconds
dt = env.dt if hasattr(env, "dt") else dt
assert dt is not None
self.dt = dt
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
self.dt = dt
action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
zero_start: bool = False, zero_goal: bool = False):
pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=0.01,
off: float = 0.01, zero_start: bool = False, zero_goal: bool = False):
pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
zero_start=zero_start, zero_goal=zero_goal)
weights = np.zeros(shape=(num_basis, num_dof))
@ -32,10 +33,10 @@ class DetPMPWrapper(MPWrapper):
return pmp
def mp_rollout(self, action):
params = np.reshape(action, (self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
params = np.reshape(action, newshape=(self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
self.mp.set_weights(self.duration, params)
_, des_pos, des_vel, _ = self.mp.compute_trajectory(1 / self.dt, 1.)
if self.mp.zero_start:
des_pos += self.start_pos[None, :]
des_pos += self.env.start_pos[None, :]
return des_pos, des_vel

View File

@ -4,13 +4,13 @@ from mp_lib import dmps
from mp_lib.basis import DMPBasisGenerator
from mp_lib.phase import ExpDecayPhaseGenerator
from alr_envs.utils.mps.mp_environments import MPEnv
from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.mps.mp_wrapper import MPWrapper
class DmpWrapper(MPWrapper):
def __init__(self, env: MPEnv, num_dof: int, num_basis: int,
def __init__(self, env: AlrEnv, num_dof: int, num_basis: int,
duration: int = 1, alpha_phase: float = 2., dt: float = None,
learn_goal: bool = False, post_traj_time: float = 0.,
weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,
@ -40,7 +40,7 @@ class DmpWrapper(MPWrapper):
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode,
num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor)
action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0)))
action_bounds = np.inf * np.ones((np.prod(self.mp.weights.shape) + (num_dof if learn_goal else 0)))
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2.,
@ -51,7 +51,7 @@ class DmpWrapper(MPWrapper):
basis_bandwidth_factor=bandwidth_factor)
dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
num_time_steps=int(duration / dt), dt=dt)
duration=duration, dt=dt)
return dmp
@ -66,7 +66,7 @@ class DmpWrapper(MPWrapper):
goal_pos = self.env.goal_pos
assert goal_pos is not None
weight_matrix = np.reshape(params, self.mp.dmp_weights.shape) # [num_basis, num_dof]
weight_matrix = np.reshape(params, self.mp.weights.shape) # [num_basis, num_dof]
return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
def mp_rollout(self, action):

View File

@ -5,7 +5,7 @@ import gym
import numpy as np
class MPEnv(gym.Env):
class AlrEnv(gym.Env):
@property
@abstractmethod

View File

@ -3,13 +3,13 @@ from abc import ABC, abstractmethod
import gym
import numpy as np
from alr_envs.utils.mps.mp_environments import MPEnv
from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.policies import get_policy_class
class MPWrapper(gym.Wrapper, ABC):
def __init__(self, env: MPEnv, num_dof: int, dt: float, duration: int = 1, post_traj_time: float = 0.,
def __init__(self, env: AlrEnv, num_dof: int, dt: float, duration: float = 1, post_traj_time: float = 0.,
policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs):
super().__init__(env)
@ -53,9 +53,6 @@ class MPWrapper(gym.Wrapper, ABC):
return obs, np.array(rewards), dones, infos
def configure(self, context):
self.env.configure(context)
def reset(self):
return self.env.reset()[self.env.active_obs]
@ -65,7 +62,7 @@ class MPWrapper(gym.Wrapper, ABC):
if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dimensions))])
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
# self._trajectory = trajectory
# self._velocity = velocity
@ -105,7 +102,7 @@ class MPWrapper(gym.Wrapper, ABC):
raise NotImplementedError()
@abstractmethod
def initialize_mp(self, num_dof: int, duration: int, dt: float, **kwargs):
def initialize_mp(self, num_dof: int, duration: float, dt: float, **kwargs):
"""
Create respective instance of MP
Returns:

View File

@ -1,6 +1,7 @@
from collections import defaultdict
import gym
import numpy as np
from alr_envs.utils.mp_env_async_sampler import AlrMpEnvSampler, AlrContextualMpEnvSampler, DummyDist
def example_mujoco():
@ -22,9 +23,9 @@ def example_mujoco():
obs = env.reset()
def example_dmp():
def example_mp(env_name="alr_envs:HoleReacherDMP-v0"):
# env = gym.make("alr_envs:ViaPointReacherDMP-v0")
env = gym.make("alr_envs:HoleReacherDMP-v0")
env = gym.make(env_name)
rewards = 0
# env.render(mode=None)
obs = env.reset()
@ -79,9 +80,36 @@ def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D',
print(sample(envs, 16))
def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4):
n_samples = 10
sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
dim = sampler.env.action_space.spaces[0].shape[0]
thetas = np.random.randn(n_samples, dim) # usually form a search distribution
_, rewards, __, ___ = sampler(thetas)
print(rewards)
def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4):
sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu)
dim = sampler.env.action_space.spaces[0].shape[0]
dist = DummyDist(dim) # needs a sample function
n_samples = 10
new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples)
print(new_rewards)
if __name__ == '__main__':
# example_mujoco()
# example_dmp()
example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
# env = gym.make("alr_envs:HoleReacherDMP-v0", context=0.1)
# env = gym.make("alr_envs:HoleReacherDMP-v1")
# example_dmp("alr_envs:SimpleReacherDMP-v1")
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
# example_async_contextual_sampler()
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
# example_async_sampler(env_name)
example_mp(env_name)