Merge pull request #7 from ALRhub/mp_wrappers_and_stuff

Mp wrappers and stuff
2021-05-27 17:40:35 +02:00 · 2021-05-27 17:40:35 +02:00 · 746d408a76
commit 746d408a76
parent 17c489d622 4aa31a004a
15 changed files with 187 additions and 85 deletions
--- a/alr_envs/init.py
+++ b/alr_envs/init.py
@ -272,12 +272,20 @@ for v in versions:
        }
    )
-# register(
+    register(
-#     id='HoleReacherDetPMP-v0',
+        id=f'HoleReacherDetPMP-{v}',
-#     entry_point='alr_envs.classic_control.hole_reacher:holereacher_detpmp',
+        entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
-#     # max_episode_steps=1,
+        kwargs={
-#     # TODO: add mp kwargs
+            "name": f"alr_envs:HoleReacher-{v}",
-# )
+            "num_dof": 5,
            "num_basis": 5,
            "duration": 2,
            "width": 0.025,
            "policy_type": "velocity",
            "weights_scale": 0.2,
            "zero_start": True
        }
    )
 # TODO: properly add final_pos
 register(
@ -335,6 +343,41 @@ register(
    }
 )
 register(
    id='ALRBallInACupSimpleDetPMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
    kwargs={
        "name": "alr_envs:ALRBallInACupSimple-v0",
        "num_dof": 3,
        "num_basis": 5,
        "duration": 3.5,
        "post_traj_time": 4.5,
        "width": 0.0035,
        # "off": -0.05,
        "policy_type": "motor",
        "weights_scale": 0.2,
        "zero_start": True,
        "zero_goal": True
    }
 )
 register(
    id='ALRBallInACupDetPMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
    kwargs={
        "name": "alr_envs:ALRBallInACupSimple-v0",
        "num_dof": 7,
        "num_basis": 5,
        "duration": 3.5,
        "post_traj_time": 4.5,
        "width": 0.0035,
        "policy_type": "motor",
        "weights_scale": 0.2,
        "zero_start": True,
        "zero_goal": True
    }
 )
 register(
    id='ALRBallInACupGoalDMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@ -7,10 +7,10 @@ from gym.utils import seeding
 from matplotlib import patches
 from alr_envs.classic_control.utils import check_self_collision
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
-class HoleReacherEnv(MPEnv):
+class HoleReacherEnv(AlrEnv):
    def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
                 hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
@ -71,14 +71,15 @@ class HoleReacherEnv(MPEnv):
        A single step with an action in joint velocity space
        """
        acc = (action - self._angle_velocity) / self.dt
        self._angle_velocity = action
-        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
+        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity  # + 0.001 * np.random.randn(5)
        self._update_joints()
        acc = (action - self._angle_velocity) / self.dt
        reward, info = self._get_reward(acc)
        info.update({"is_collided": self._is_collided})
        self.end_effector_traj.append(np.copy(self.end_effector))
        self._steps += 1
        done = self._is_collided
@ -101,6 +102,7 @@ class HoleReacherEnv(MPEnv):
        self._joints = np.zeros((self.n_links + 1, 2))
        self._update_joints()
        self._steps = 0
        self.end_effector_traj = []
        return self._get_obs().copy()
--- a/alr_envs/classic_control/simple_reacher.py
+++ b/alr_envs/classic_control/simple_reacher.py
@ -5,10 +5,10 @@ import numpy as np
 from gym import spaces
 from gym.utils import seeding
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
-class SimpleReacherEnv(MPEnv):
+class SimpleReacherEnv(AlrEnv):
    """
    Simple Reaching Task without any physics simulation.
    Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
--- a/alr_envs/classic_control/viapoint_reacher.py
+++ b/alr_envs/classic_control/viapoint_reacher.py
@ -6,10 +6,10 @@ import numpy as np
 from gym.utils import seeding
 from alr_envs.classic_control.utils import check_self_collision
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
-class ViaPointReacher(MPEnv):
+class ViaPointReacher(AlrEnv):
    def __init__(self, n_links, random_start: bool = True, via_target: Union[None, Iterable] = None,
                 target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):
--- a/alr_envs/mujoco/alr_mujoco_env.py
+++ b/alr_envs/mujoco/alr_mujoco_env.py
@ -1,5 +1,6 @@
 from collections import OrderedDict
 import os
 from abc import abstractmethod
 from gym import error, spaces
@ -142,18 +143,20 @@ class AlrMujocoEnv(gym.Env):
    # methods to override:
    # ----------------------------
    @property
    @abstractmethod
    def active_obs(self):
        """Returns boolean mask for each observation entry
        whether the observation is returned for the contextual case or not.
        This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
        """
        return np.ones(self.observation_space.shape, dtype=bool)
    def _get_obs(self):
        """Returns the observation.
        """
        raise NotImplementedError()
    def configure(self, *args, **kwargs):
        """
        Helper method to set certain environment properties such as contexts in contextual environments since reset()
        doesn't take arguments. Should be called before reset().
        """
        pass
    def reset_model(self):
        """
        Reset the robot degrees of freedom (qpos and qvel).
--- a/alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml
+++ b/alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml
@ -126,6 +126,7 @@
                                            <geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0045" solref="-10000 -100"/>
 <!--                                            <geom name="cup_base_contact" pos="0 -0.025 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>-->
                                            <geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.02 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
                                            <geom name="cup_base_contact_below" pos="0 -0.04 0.1165" euler="-1.57 0 0" type="cylinder" size="0.035 0.001" solref="-10000 -100" rgba="255 0 255 1"/>
 <!--                                            <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
 <!--                                            <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
 <!--                                            <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
@ -22,7 +22,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
        self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
        self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-        self.context = None
+        self.context = context
        utils.EzPickle.__init__(self)
        alr_mujoco_env.AlrMujocoEnv.__init__(self,
@ -45,7 +45,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
        else:
            raise ValueError("Unknown reward type")
        self.reward_function = reward_function(self.sim_steps)
        self.configure(context)
    @property
    def start_pos(self):
@ -69,10 +68,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
    def current_vel(self):
        return self.sim.data.qvel[0:7].copy()
    def configure(self, context):
        self.context = context
        self.reward_function.reset(context)
    def reset_model(self):
        init_pos_all = self.init_qpos.copy()
        init_pos_robot = self._start_pos
@ -95,26 +90,27 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
        reward_ctrl = - np.square(a).sum()
        crash = self.do_simulation(a)
-        joint_cons_viol = self.check_traj_in_joint_limits()
+        # joint_cons_viol = self.check_traj_in_joint_limits()
        self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
        self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
        ob = self._get_obs()
-        if not crash and not joint_cons_viol:
+        if not crash:
-            reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
+            reward, success, is_collided = self.reward_function.compute_reward(a, self)
-            done = success or self._steps == self.sim_steps - 1 or stop_sim
+            done = success or self._steps == self.sim_steps - 1 or is_collided
            self._steps += 1
        else:
-            reward = -1000
+            reward = -2
            success = False
            is_collided = False
            done = True
        return ob, reward, done, dict(reward_dist=reward_dist,
                                      reward_ctrl=reward_ctrl,
                                      velocity=angular_vel,
                                      traj=self._q_pos, is_success=success,
-                                      is_collided=crash or joint_cons_viol)
+                                      is_collided=is_collided, sim_crash=crash)
    def check_traj_in_joint_limits(self):
        return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
@ -129,6 +125,16 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
            [self._steps],
        ])
    # TODO
    @property
    def active_obs(self):
        return np.hstack([
            [False] * 7,  # cos
            [False] * 7,  # sin
            # [True] * 2,  # x-y coordinates of target distance
            [False]  # env steps
        ])
    # These functions are for the task with 3 joint actuations
    def extend_des_pos(self, des_pos):
        des_pos_full = self._start_pos.copy()
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
@ -6,7 +6,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
    def __init__(self, sim_time):
        self.sim_time = sim_time
-        self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
+        self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below",
                                  "wrist_palm_link_convex_geom",
                                  "wrist_pitch_link_convex_decomposition_p1_geom",
                                  "wrist_pitch_link_convex_decomposition_p2_geom",
                                  "wrist_pitch_link_convex_decomposition_p3_geom",
@ -20,6 +21,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
        self.goal_id = None
        self.goal_final_id = None
        self.collision_ids = None
        self._is_collided = False
        self.collision_penalty = 1
        self.ball_traj = None
        self.dists = None
@ -36,49 +39,52 @@ class BallInACupReward(alr_reward_fct.AlrReward):
        self.action_costs = []
        self.cup_angles = []
-    def compute_reward(self, action, sim, step, context=None):
+    def compute_reward(self, action, env):
-        self.ball_id = sim.model._body_name2id["ball"]
+        self.ball_id = env.sim.model._body_name2id["ball"]
-        self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
+        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
-        self.goal_id = sim.model._site_name2id["cup_goal"]
+        self.goal_id = env.sim.model._site_name2id["cup_goal"]
-        self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
+        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final"]
-        self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
+        self.collision_ids = [env.sim.model._geom_name2id[name] for name in self.collision_objects]
-        ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
+        ball_in_cup = self.check_ball_in_cup(env.sim, self.ball_collision_id)
        # Compute the current distance from the ball to the inner part of the cup
-        goal_pos = sim.data.site_xpos[self.goal_id]
+        goal_pos = env.sim.data.site_xpos[self.goal_id]
-        ball_pos = sim.data.body_xpos[self.ball_id]
+        ball_pos = env.sim.data.body_xpos[self.ball_id]
-        goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+        goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
        self.dists.append(np.linalg.norm(goal_pos - ball_pos))
        self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-        self.ball_traj[step, :] = ball_pos
+        self.ball_traj[env._steps, :] = ball_pos
-        cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]])
+        cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]])
        self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
                                          1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)))
        action_cost = np.sum(np.square(action))
        self.action_costs.append(action_cost)
-        if self.check_collision(sim):
+        self._is_collided = self.check_collision(env.sim) or env.check_traj_in_joint_limits()
            reward = - 1000
            return reward, False, True
-        if step == self.sim_time - 1:
+        if env._steps == env.sim_steps - 1 or self._is_collided:
            t_min_dist = np.argmin(self.dists)
            angle_min_dist = self.cup_angles[t_min_dist]
            cost_angle = (angle_min_dist - np.pi / 2)**2
            min_dist = self.dists[t_min_dist]
            dist_final = self.dists_final[-1]
            min_dist_final = np.min(self.dists_final)
-            cost = 0.5 * min_dist + 0.5 * dist_final + 0.01 * cost_angle
+            cost = 0.5 * dist_final + 0.05 * cost_angle  # TODO: Increase cost_angle weight  # 0.5 * min_dist +
-            reward = np.exp(-2 * cost) - 1e-3 * action_cost
+            # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
-            success = dist_final < 0.05 and ball_in_cup
+            # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
            reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
            success = dist_final < 0.05 and ball_in_cup and not self._is_collided
            crash = self._is_collided
        else:
-            reward = - 1e-3 * action_cost
+            reward = - 1e-5 * action_cost  # TODO: increase action_cost weight
            success = False
            crash = False
-        return reward, success, False
+        return reward, success, crash
    def check_ball_in_cup(self, sim, ball_collision_id):
        cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
--- a/alr_envs/mujoco/beerpong/beerpong.py
+++ b/alr_envs/mujoco/beerpong/beerpong.py
@ -105,6 +105,7 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
    def check_traj_in_joint_limits(self):
        return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
    # TODO
    def _get_obs(self):
        theta = self.sim.data.qpos.flat[:7]
        return np.concatenate([
@ -114,6 +115,10 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
            [self._steps],
        ])
    # TODO
    def active_obs(self):
        pass
 if __name__ == "__main__":
--- a/alr_envs/utils/mp_env_async_sampler.py
+++ b/alr_envs/utils/mp_env_async_sampler.py
@ -40,6 +40,16 @@ def _flatten_list(l):
    return [l__ for l_ in l for l__ in l_]
 class DummyDist:
    def __init__(self, dim):
        self.dim = dim
    def sample(self, contexts):
        contexts = np.atleast_2d(contexts)
        n_samples = contexts.shape[0]
        return np.random.normal(size=(n_samples, self.dim)), contexts
 class AlrMpEnvSampler:
    """
    An asynchronous sampler for non contextual MPWrapper environments. A sampler object can be called with a set of
@ -100,9 +110,9 @@ class AlrContextualMpEnvSampler:
 if __name__ == "__main__":
-    env_name = "alr_envs:ALRBallInACupSimpleDMP-v0"
+    env_name = "alr_envs:HoleReacherDetPMP-v1"
    n_cpu = 8
-    dim = 15
+    dim = 25
    n_samples = 10
    sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
--- a/alr_envs/utils/mps/detpmp_wrapper.py
+++ b/alr_envs/utils/mps/detpmp_wrapper.py
@ -2,28 +2,29 @@ import gym
 import numpy as np
 from mp_lib import det_promp
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
 from alr_envs.utils.mps.mp_wrapper import MPWrapper
 class DetPMPWrapper(MPWrapper):
-    def __init__(self, env: MPEnv, num_dof: int, num_basis: int, width: int, duration: int = 1, dt: float = 0.01,
+    def __init__(self, env: AlrEnv, num_dof: int, num_basis: int, width: float, duration: float = 1, dt: float = 0.01,
                 post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1.,
                 zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
        self.duration = duration  # seconds
        dt = env.dt if hasattr(env, "dt") else dt
        assert dt is not None
        self.dt = dt
        super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
                         width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
        self.dt = dt
        action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
        self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
-                      zero_start: bool = False, zero_goal: bool = False):
+                      off: float = 0.01, zero_start: bool = False, zero_goal: bool = False):
-        pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=0.01,
+        pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
                                           zero_start=zero_start, zero_goal=zero_goal)
        weights = np.zeros(shape=(num_basis, num_dof))
@ -32,10 +33,10 @@ class DetPMPWrapper(MPWrapper):
        return pmp
    def mp_rollout(self, action):
-        params = np.reshape(action, (self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
+        params = np.reshape(action, newshape=(self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
        self.mp.set_weights(self.duration, params)
        _, des_pos, des_vel, _ = self.mp.compute_trajectory(1 / self.dt, 1.)
        if self.mp.zero_start:
-            des_pos += self.start_pos[None, :]
+            des_pos += self.env.start_pos[None, :]
        return des_pos, des_vel
--- a/alr_envs/utils/mps/dmp_wrapper.py
+++ b/alr_envs/utils/mps/dmp_wrapper.py
@ -4,13 +4,13 @@ from mp_lib import dmps
 from mp_lib.basis import DMPBasisGenerator
 from mp_lib.phase import ExpDecayPhaseGenerator
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
 from alr_envs.utils.mps.mp_wrapper import MPWrapper
 class DmpWrapper(MPWrapper):
-    def __init__(self, env: MPEnv, num_dof: int, num_basis: int,
+    def __init__(self, env: AlrEnv, num_dof: int, num_basis: int,
                 duration: int = 1, alpha_phase: float = 2., dt: float = None,
                 learn_goal: bool = False, post_traj_time: float = 0.,
                 weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,
@ -40,7 +40,7 @@ class DmpWrapper(MPWrapper):
        super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode,
                         num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor)
-        action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0)))
+        action_bounds = np.inf * np.ones((np.prod(self.mp.weights.shape) + (num_dof if learn_goal else 0)))
        self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2.,
@ -51,7 +51,7 @@ class DmpWrapper(MPWrapper):
                                            basis_bandwidth_factor=bandwidth_factor)
        dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
-                       num_time_steps=int(duration / dt), dt=dt)
+                       duration=duration, dt=dt)
        return dmp
@ -66,7 +66,7 @@ class DmpWrapper(MPWrapper):
            goal_pos = self.env.goal_pos
            assert goal_pos is not None
-        weight_matrix = np.reshape(params, self.mp.dmp_weights.shape)  # [num_basis, num_dof]
+        weight_matrix = np.reshape(params, self.mp.weights.shape)  # [num_basis, num_dof]
        return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
    def mp_rollout(self, action):
--- a/alr_envs/utils/mps/mp_environments.py
+++ b/alr_envs/utils/mps/mp_environments.py
@ -5,7 +5,7 @@ import gym
 import numpy as np
-class MPEnv(gym.Env):
+class AlrEnv(gym.Env):
    @property
    @abstractmethod
--- a/alr_envs/utils/mps/mp_wrapper.py
+++ b/alr_envs/utils/mps/mp_wrapper.py
@ -3,13 +3,13 @@ from abc import ABC, abstractmethod
 import gym
 import numpy as np
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
 from alr_envs.utils.policies import get_policy_class
 class MPWrapper(gym.Wrapper, ABC):
-    def __init__(self, env: MPEnv, num_dof: int, dt: float, duration: int = 1, post_traj_time: float = 0.,
+    def __init__(self, env: AlrEnv, num_dof: int, dt: float, duration: float = 1, post_traj_time: float = 0.,
                 policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs):
        super().__init__(env)
@ -53,9 +53,6 @@ class MPWrapper(gym.Wrapper, ABC):
        return obs, np.array(rewards), dones, infos
    def configure(self, context):
        self.env.configure(context)
    def reset(self):
        return self.env.reset()[self.env.active_obs]
@ -65,7 +62,7 @@ class MPWrapper(gym.Wrapper, ABC):
        if self.post_traj_steps > 0:
            trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
-            velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dimensions))])
+            velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
        # self._trajectory = trajectory
        # self._velocity = velocity
@ -105,7 +102,7 @@ class MPWrapper(gym.Wrapper, ABC):
        raise NotImplementedError()
    @abstractmethod
-    def initialize_mp(self, num_dof: int, duration: int, dt: float, **kwargs):
+    def initialize_mp(self, num_dof: int, duration: float, dt: float, **kwargs):
        """
        Create respective instance of MP
        Returns:
--- a/example.py
+++ b/example.py
@ -1,6 +1,7 @@
 from collections import defaultdict
 import gym
 import numpy as np
 from alr_envs.utils.mp_env_async_sampler import AlrMpEnvSampler, AlrContextualMpEnvSampler, DummyDist
 def example_mujoco():
@ -22,9 +23,9 @@ def example_mujoco():
            obs = env.reset()
-def example_dmp():
+def example_mp(env_name="alr_envs:HoleReacherDMP-v0"):
    # env = gym.make("alr_envs:ViaPointReacherDMP-v0")
-    env = gym.make("alr_envs:HoleReacherDMP-v0")
+    env = gym.make(env_name)
    rewards = 0
    # env.render(mode=None)
    obs = env.reset()
@ -79,9 +80,36 @@ def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D',
    print(sample(envs, 16))
 def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4):
    n_samples = 10
    sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
    dim = sampler.env.action_space.spaces[0].shape[0]
    thetas = np.random.randn(n_samples, dim)  # usually form a search distribution
    _, rewards, __, ___ = sampler(thetas)
    print(rewards)
 def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4):
    sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu)
    dim = sampler.env.action_space.spaces[0].shape[0]
    dist = DummyDist(dim)  # needs a sample function
    n_samples = 10
    new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples)
    print(new_rewards)
 if __name__ == '__main__':
    # example_mujoco()
-    # example_dmp()
+    # example_dmp("alr_envs:SimpleReacherDMP-v1")
-    example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
+    # example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
-    # env = gym.make("alr_envs:HoleReacherDMP-v0", context=0.1)
+    # example_async_contextual_sampler()
-    # env = gym.make("alr_envs:HoleReacherDMP-v1")
+    # env = gym.make("alr_envs:HoleReacherDetPMP-v1")
    env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
    # example_async_sampler(env_name)
    example_mp(env_name)