merge

2021-05-17 12:49:15 +02:00 · 2021-05-17 12:49:15 +02:00 · b39104a449
commit b39104a449
parent 7f512068c9 6ae195962c
16 changed files with 671 additions and 604 deletions
--- a/alr_envs/init.py
+++ b/alr_envs/init.py
@ -1,7 +1,8 @@
 from gym.envs.registration import register

 from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
-# from alr_envs.utils.wrapper.dmp_wrapper import DmpWrapper
+
+# from alr_envs.utils.mps.dmp_wrapper import DmpWrapper

 # Mujoco

@ -71,6 +72,17 @@ register(
    }
 )

+## Balancing Reacher
+
+register(
+    id='Balancing-v0',
+    entry_point='alr_envs.mujoco:BalancingEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 5,
+    }
+)
+
 register(
    id='ALRBallInACupSimple-v0',
    entry_point='alr_envs.mujoco:ALRBallInACupEnv',
@ -101,15 +113,7 @@ register(

 # Classic control

-register(
-    id='Balancing-v0',
-    entry_point='alr_envs.mujoco:BalancingEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 5,
-    }
-)
-
+## Simple Reacher
 register(
    id='SimpleReacher-v0',
    entry_point='alr_envs.classic_control:SimpleReacherEnv',
@ -129,25 +133,6 @@ register(
    }
 )

-register(
-    id='EpisodicSimpleReacher-v0',
-    entry_point='alr_envs.classic_control:EpisodicSimpleReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 2,
-    }
-)
-
-register(
-    id='EpisodicSimpleReacher-v1',
-    entry_point='alr_envs.classic_control:EpisodicSimpleReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 2,
-        "random_start": False
-    }
-)
-
 register(
    id='LongSimpleReacher-v0',
    entry_point='alr_envs.classic_control:SimpleReacherEnv',
@ -157,6 +142,18 @@ register(
    }
 )

+register(
+    id='LongSimpleReacher-v1',
+    entry_point='alr_envs.classic_control:SimpleReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 5,
+        "random_start": False
+    }
+)
+
+## Viapoint Reacher
+
 register(
    id='ViaPointReacher-v0',
    entry_point='alr_envs.classic_control.viapoint_reacher:ViaPointReacher',
@ -168,27 +165,45 @@ register(
    }
 )

+## Hole Reacher
 register(
    id='HoleReacher-v0',
-    entry_point='alr_envs.classic_control.hole_reacher:HoleReacher',
+    entry_point='alr_envs.classic_control.hole_reacher:HoleReacherEnv',
    max_episode_steps=200,
    kwargs={
        "n_links": 5,
        "allow_self_collision": False,
        "allow_wall_collision": False,
-        "hole_width": 0.25,
+        "hole_width": None,
        "hole_depth": 1,
-        "hole_x": 2,
+        "hole_x": None,
+        "collision_penalty": 100,
+    }
+)
+
+register(
+    id='HoleReacher-v1',
+    entry_point='alr_envs.classic_control.hole_reacher:HoleReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 5,
+        "random_start": False,
+        "allow_self_collision": False,
+        "allow_wall_collision": False,
+        "hole_width": None,
+        "hole_depth": 1,
+        "hole_x": None,
        "collision_penalty": 100,
    }
 )

 register(
    id='HoleReacher-v2',
-    entry_point='alr_envs.classic_control.hole_reacher_v2:HoleReacher',
+    entry_point='alr_envs.classic_control.hole_reacher:HoleReacherEnv',
    max_episode_steps=200,
    kwargs={
        "n_links": 5,
+        "random_start": False,
        "allow_self_collision": False,
        "allow_wall_collision": False,
        "hole_width": 0.25,
@ -199,38 +214,24 @@ register(
 )

 # MP environments
-
-register(
-    id='SimpleReacherDMP-v0',
-    entry_point='alr_envs.utils.make_env_helpers:make_dmp_env',
-    # max_episode_steps=1,
-    kwargs={
-        "name": "alr_envs:EpisodicSimpleReacher-v0",
-        "num_dof": 2,
-        "num_basis": 5,
-        "duration": 2,
-        "alpha_phase": 2,
-        "learn_goal": True,
-        "policy_type": "velocity",
-        "weights_scale": 50,
-    }
-)
-
-register(
-    id='SimpleReacherDMP-v1',
-    entry_point='alr_envs.utils.make_env_helpers:make_dmp_env',
-    # max_episode_steps=1,
-    kwargs={
-        "name": "alr_envs:EpisodicSimpleReacher-v1",
-        "num_dof": 2,
-        "num_basis": 5,
-        "duration": 2,
-        "alpha_phase": 2,
-        "learn_goal": True,
-        "policy_type": "velocity",
-        "weights_scale": 50,
-    }
-)
+reacher_envs = ["SimpleReacher-v0", "SimpleReacher-v1", "LongSimpleReacher-v0", "LongSimpleReacher-v1"]
+for env in reacher_envs:
+    name = env.split("-")
+    register(
+        id=f'{name[0]}DMP-{name[1]}',
+        entry_point='alr_envs.utils.make_env_helpers:make_dmp_env',
+        # max_episode_steps=1,
+        kwargs={
+            "name": f"alr_envs:{env}",
+            "num_dof": 2 if "long" not in env.lower() else 5 ,
+            "num_basis": 5,
+            "duration": 2,
+            "alpha_phase": 2,
+            "learn_goal": True,
+            "policy_type": "velocity",
+            "weights_scale": 50,
+        }
+    )

 register(
    id='ViaPointReacherDMP-v0',
@ -266,6 +267,24 @@ register(
    }
 )

+register(
+    id='HoleReacherDMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_dmp_env',
+    # max_episode_steps=1,
+    kwargs={
+        "name": "alr_envs:HoleReacher-v1",
+        "num_dof": 5,
+        "num_basis": 5,
+        "duration": 2,
+        "learn_goal": True,
+        "alpha_phase": 2,
+        "bandwidth_factor": 2,
+        "policy_type": "velocity",
+        "weights_scale": 50,
+        "goal_scale": 0.1
+    }
+)
+
 register(
    id='HoleReacherDMP-v2',
    entry_point='alr_envs.utils.make_env_helpers:make_dmp_env',
--- a/alr_envs/classic_control/init.py
+++ b/alr_envs/classic_control/init.py
@ -1,4 +1,4 @@
 from alr_envs.classic_control.simple_reacher import SimpleReacherEnv
 from alr_envs.classic_control.episodic_simple_reacher import EpisodicSimpleReacherEnv
 from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
-from alr_envs.classic_control.hole_reacher import HoleReacher
+from alr_envs.classic_control.hole_reacher import HoleReacherEnv
--- a/alr_envs/classic_control/episodic_simple_reacher.py
+++ b/alr_envs/classic_control/episodic_simple_reacher.py
@ -35,7 +35,7 @@ class EpisodicSimpleReacherEnv(SimpleReacherEnv):

    def _get_obs(self):
        if self.random_start:
-            theta = self._joint_angle
+            theta = self._joint_angles
            return np.hstack([
                np.cos(theta),
                np.sin(theta),
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@ -1,27 +1,36 @@
+from typing import Union
+
 import gym
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+from gym.utils import seeding
 from matplotlib import patches
+
 from alr_envs.classic_control.utils import check_self_collision
+from alr_envs.utils.mps.mp_environments import MPEnv


-class HoleReacher(gym.Env):
+class HoleReacherEnv(MPEnv):

-    def __init__(self, n_links, hole_x, hole_width, hole_depth, allow_self_collision=False,
-                 allow_wall_collision=False, collision_penalty=1000):
+    def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
+                 hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
+                 allow_wall_collision: bool = False, collision_penalty: bool = 1000):

        self.n_links = n_links
        self.link_lengths = np.ones((n_links, 1))

-        # task
-        self.hole_x = hole_x  # x-position of center of hole
-        self.hole_width = hole_width  # width of hole
-        self.hole_depth = hole_depth  # depth of hole
+        self.random_start = random_start

-        self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth])
-        self.top_center_of_hole = np.hstack([hole_x, 0])
-        self.left_wall_edge = np.hstack([hole_x - self.hole_width / 2, 0])
-        self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0])
+        # provided initial parameters
+        self._hole_x = hole_x  # x-position of center of hole
+        self._hole_width = hole_width  # width of hole
+        self._hole_depth = hole_depth  # depth of hole
+
+        # temp container for current env state
+        self._tmp_hole_x = None
+        self._tmp_hole_width = None
+        self._tmp_hole_depth = None
+        self._goal = None  # x-y coordinates for reaching the center at the bottom of the hole

        # collision
        self.allow_self_collision = allow_self_collision
@ -32,95 +41,77 @@ class HoleReacher(gym.Env):
        self._joints = None
        self._joint_angles = None
        self._angle_velocity = None
-        self.start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)])
-        self.start_vel = np.zeros(self.n_links)
+        self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)])
+        self._start_vel = np.zeros(self.n_links)

        self.dt = 0.01
-        # self.time_limit = 2

        action_bound = np.pi * np.ones((self.n_links,))
        state_bound = np.hstack([
            [np.pi] * self.n_links,  # cos
            [np.pi] * self.n_links,  # sin
            [np.inf] * self.n_links,  # velocity
+            [np.inf],  # hole width
+            [np.inf],  # hole depth
            [np.inf] * 2,  # x-y coordinates of target distance
            [np.inf]  # env steps, because reward start after n steps TODO: Maybe
        ])
        self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
        self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)

+        # containers for plotting
+        self.metadata = {'render.modes': ["human", "partial"]}
        self.fig = None
-        rect_1 = patches.Rectangle((-self.n_links, -1),
-                                   self.n_links + self.hole_x - self.hole_width / 2, 1,
-                                   fill=True, edgecolor='k', facecolor='k')
-        rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1),
-                                   self.n_links - self.hole_x + self.hole_width / 2, 1,
-                                   fill=True, edgecolor='k', facecolor='k')
-        rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width,
-                                   1 - self.hole_depth,
-                                   fill=True, edgecolor='k', facecolor='k')

-        self.patches = [rect_1, rect_2, rect_3]
+        self._steps = 0
+        self.seed()

-    @property
-    def init_qpos(self):
-        return self.start_pos
+    def step(self, action: np.ndarray):
+        """
+        A single step with an action in joint velocity space
+        """

-    @property
-    def end_effector(self):
-        return self._joints[self.n_links].T
+        self._angle_velocity = action
+        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
+        self._update_joints()

-    def configure(self, context):
-        pass
+        acc = (action - self._angle_velocity) / self.dt
+        reward, info = self._get_reward(acc)
+
+        info.update({"is_collided": self._is_collided})
+
+        self._steps += 1
+        done = self._is_collided
+
+        return self._get_obs().copy(), reward, done, info

    def reset(self):
-        self._joint_angles = self.start_pos
-        self._angle_velocity = self.start_vel
+        if self.random_start:
+            # Maybe change more than dirst seed
+            first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
+            self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)])
+            self._start_pos = self._joint_angles.copy()
+        else:
+            self._joint_angles = self._start_pos
+
+        self._generate_hole()
+        self._set_patches()
+
+        self._angle_velocity = self._start_vel
        self._joints = np.zeros((self.n_links + 1, 2))
        self._update_joints()
        self._steps = 0

        return self._get_obs().copy()

-    def step(self, action: np.ndarray):
-        """
-        a single step with an action in joint velocity space
-        """
-        vel = action  # + 0.05 * np.random.randn(self.n_links)
-        acc = (vel - self._angle_velocity) / self.dt
-        self._angle_velocity = vel
-        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
-
-        self._update_joints()
-
-        # rew = self._reward()
-
-        # compute reward directly in step function
-
-        success = False
-        reward = 0
-        if not self._is_collided:
-            if self._steps == 199:
-                dist = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
-                reward = - dist ** 2
-                success = dist < 0.005
-        else:
-            dist = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
-            # if self.collision_penalty != 0:
-            #     reward = -self.collision_penalty
-            # else:
-            reward = - dist ** 2 - self.collision_penalty
-
-        reward -= 5e-8 * np.sum(acc ** 2)
-
-        info = {"is_collided": self._is_collided, "is_success": success}
-
-        self._steps += 1
-
-        # done = self._steps * self.dt > self.time_limit or self._is_collided
-        done = self._is_collided
-
-        return self._get_obs().copy(), reward, done, info
+    def _generate_hole(self):
+        self._tmp_hole_x = self.np_random.uniform(0.5, 3.5, 1) if self._hole_x is None else np.copy(self._hole_x)
+        self._tmp_hole_width = self.np_random.uniform(0.5, 0.1, 1) if self._hole_width is None else np.copy(
+            self._hole_width)
+        # TODO we do not want this right now.
+        self._tmp_hole_depth = self.np_random.uniform(1, 1, 1) if self._hole_depth is None else np.copy(
+            self._hole_depth)
+        self._goal = np.hstack([self._tmp_hole_x, -self._tmp_hole_depth])

    def _update_joints(self):
        """
@ -128,7 +119,7 @@ class HoleReacher(gym.Env):
        Returns:

        """
-        line_points_in_taskspace = self.get_forward_kinematics(num_points_per_link=20)
+        line_points_in_taskspace = self._get_forward_kinematics(num_points_per_link=20)

        self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0]
        self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]
@ -142,48 +133,65 @@ class HoleReacher(gym.Env):
                self_collision = True

        if not self.allow_wall_collision:
-            wall_collision = self.check_wall_collision(line_points_in_taskspace)
+            wall_collision = self._check_wall_collision(line_points_in_taskspace)

        self._is_collided = self_collision or wall_collision

+    def _get_reward(self, acc: np.ndarray):
+        success = False
+        reward = -np.inf
+        if not self._is_collided:
+            dist = 0
+            # return reward only in last time step
+            if self._steps == 199:
+                dist = np.linalg.norm(self.end_effector - self._goal)
+                success = dist < 0.005
+        else:
+            # Episode terminates when colliding, hence return reward
+            dist = np.linalg.norm(self.end_effector - self._goal)
+            reward = -self.collision_penalty
+
+        reward -= dist ** 2
+        reward -= 5e-8 * np.sum(acc ** 2)
+        info = {"is_success": success}
+
+        return reward, info
+
    def _get_obs(self):
        theta = self._joint_angles
        return np.hstack([
            np.cos(theta),
            np.sin(theta),
            self._angle_velocity,
-            self.end_effector - self.bottom_center_of_hole,
+            self._tmp_hole_width,
+            self._tmp_hole_depth,
+            self.end_effector - self._goal,
            self._steps
        ])

-    def get_forward_kinematics(self, num_points_per_link=1):
+    def _get_forward_kinematics(self, num_points_per_link=1):
        theta = self._joint_angles[:, None]

-        if num_points_per_link > 1:
-            intermediate_points = np.linspace(0, 1, num_points_per_link)
-        else:
-            intermediate_points = 1
-
+        intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1
        accumulated_theta = np.cumsum(theta, axis=0)
-
-        endeffector = np.zeros(shape=(self.n_links, num_points_per_link, 2))
+        end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2))

        x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points
        y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points

-        endeffector[0, :, 0] = x[0, :]
-        endeffector[0, :, 1] = y[0, :]
+        end_effector[0, :, 0] = x[0, :]
+        end_effector[0, :, 1] = y[0, :]

        for i in range(1, self.n_links):
-            endeffector[i, :, 0] = x[i, :] + endeffector[i - 1, -1, 0]
-            endeffector[i, :, 1] = y[i, :] + endeffector[i - 1, -1, 1]
+            end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0]
+            end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1]

-        return np.squeeze(endeffector + self._joints[0, :])
+        return np.squeeze(end_effector + self._joints[0, :])

-    def check_wall_collision(self, line_points):
+    def _check_wall_collision(self, line_points):

        # all points that are before the hole in x
-        r, c = np.where(line_points[:, :, 0] < (self.hole_x - self.hole_width / 2))
+        r, c = np.where(line_points[:, :, 0] < (self._tmp_hole_x - self._tmp_hole_width / 2))

        # check if any of those points are below surface
        nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0)
@ -192,7 +200,7 @@ class HoleReacher(gym.Env):
            return True

        # all points that are after the hole in x
-        r, c = np.where(line_points[:, :, 0] > (self.hole_x + self.hole_width / 2))
+        r, c = np.where(line_points[:, :, 0] > (self._tmp_hole_x + self._tmp_hole_width / 2))

        # check if any of those points are below surface
        nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0)
@ -201,11 +209,11 @@ class HoleReacher(gym.Env):
            return True

        # all points that are above the hole
-        r, c = np.where((line_points[:, :, 0] > (self.hole_x - self.hole_width / 2)) & (
-                line_points[:, :, 0] < (self.hole_x + self.hole_width / 2)))
+        r, c = np.where((line_points[:, :, 0] > (self._tmp_hole_x - self._tmp_hole_width / 2)) & (
+                line_points[:, :, 0] < (self._tmp_hole_x + self._tmp_hole_width / 2)))

        # check if any of those points are below surface
-        nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self.hole_depth)
+        nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_hole_depth)

        if nr_line_points_below_surface_in_hole > 0:
            return True
@ -214,64 +222,85 @@ class HoleReacher(gym.Env):

    def render(self, mode='human'):
        if self.fig is None:
+            # Create base figure once on the beginning. Afterwards only update
+            plt.ion()
            self.fig = plt.figure()
-            # plt.ion()
-            # plt.pause(0.01)
-        else:
-            plt.figure(self.fig.number)
+            ax = self.fig.add_subplot(1, 1, 1)
+
+            # limits
+            lim = np.sum(self.link_lengths) + 0.5
+            ax.set_xlim([-lim, lim])
+            ax.set_ylim([-1.1, lim])
+
+            self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
+            self._set_patches()
+            self.fig.show()
+
+        self.fig.gca().set_title(
+            f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")

        if mode == "human":
-            plt.cla()
-            plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}")

-            # Arm
-            plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
+            # arm
+            self.line.set_data(self._joints[:, 0], self._joints[:, 1])

-            # Add the patch to the Axes
-            [plt.gca().add_patch(rect) for rect in self.patches]
-
-            lim = np.sum(self.link_lengths) + 0.5
-            plt.xlim([-lim, lim])
-            plt.ylim([-1.1, lim])
-            # plt.draw()
-            plt.pause(1e-4)  # pushes window to foreground, which is annoying.
-            # self.fig.canvas.flush_events()
+            self.fig.canvas.draw()
+            self.fig.canvas.flush_events()

        elif mode == "partial":
-            if self._steps == 1:
-                # fig, ax = plt.subplots()
-                # Add the patch to the Axes
-                try:
-                    [plt.gca().add_patch(rect) for rect in self.patches]
-                except RuntimeError:
-                    pass
-                # plt.pause(0.01)
-
            if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
                # Arm
-                plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', alpha=self._steps / 200)
-                # ax.plot(line_points_in_taskspace[:, 0, 0],
-                #         line_points_in_taskspace[:, 0, 1],
-                #         line_points_in_taskspace[:, -1, 0],
-                #         line_points_in_taskspace[:, -1, 1], marker='o', color='k', alpha=t / 200)
+                plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
+                         alpha=self._steps / 200)

-                lim = np.sum(self.link_lengths) + 0.5
-                plt.xlim([-lim, lim])
-                plt.ylim([-1.1, lim])
-                plt.pause(0.01)
+    def _set_patches(self):
+        if self.fig is not None:
+            self.fig.gca().patches = []
+            left_block = patches.Rectangle((-self.n_links, -self._tmp_hole_depth),
+                                           self.n_links + self._tmp_hole_x - self._tmp_hole_width / 2,
+                                           self._tmp_hole_depth,
+                                           fill=True, edgecolor='k', facecolor='k')
+            right_block = patches.Rectangle((self._tmp_hole_x + self._tmp_hole_width / 2, -self._tmp_hole_depth),
+                                            self.n_links - self._tmp_hole_x + self._tmp_hole_width / 2,
+                                            self._tmp_hole_depth,
+                                            fill=True, edgecolor='k', facecolor='k')
+            hole_floor = patches.Rectangle((self._tmp_hole_x - self._tmp_hole_width / 2, -self._tmp_hole_depth),
+                                           self._tmp_hole_width,
+                                           1 - self._tmp_hole_depth,
+                                           fill=True, edgecolor='k', facecolor='k')

-        elif mode == "final":
-            if self._steps == 199 or self._is_collided:
-                # fig, ax = plt.subplots()
+            # Add the patch to the Axes
+            self.fig.gca().add_patch(left_block)
+            self.fig.gca().add_patch(right_block)
+            self.fig.gca().add_patch(hole_floor)

-                # Add the patch to the Axes
-                [plt.gca().add_patch(rect) for rect in self.patches]
+    @property
+    def active_obs(self):
+        return np.hstack([
+            [self.random_start] * self.n_links,  # cos
+            [self.random_start] * self.n_links,  # sin
+            [self.random_start] * self.n_links,  # velocity
+            [self._hole_width is None],  # hole width
+            [self._hole_depth is None],  # hole width
+            [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])

-                plt.xlim(-self.n_links, self.n_links), plt.ylim(-1, self.n_links)
-                # Arm
-                plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
+    @property
+    def start_pos(self) -> Union[float, int, np.ndarray]:
+        return self._start_pos

-                plt.pause(0.01)
+    @property
+    def goal_pos(self) -> Union[float, int, np.ndarray]:
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    @property
+    def end_effector(self):
+        return self._joints[self.n_links].T

    def close(self):
        if self.fig is not None:
@ -281,22 +310,20 @@ class HoleReacher(gym.Env):
 if __name__ == '__main__':
    nl = 5
    render_mode = "human"  # "human" or "partial" or "final"
-    env = HoleReacher(n_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15,
-                      hole_depth=1, hole_x=1)
-    env.reset()
-    # env.render(mode=render_mode)
+    env = HoleReacherEnv(n_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=None,
+                         hole_depth=1, hole_x=None)
+    obs = env.reset()

    for i in range(200):
        # objective.load_result("/tmp/cma")
        # test with random actions
        ac = 2 * env.action_space.sample()
-        # ac[0] += np.pi/2
        obs, rew, d, info = env.step(ac)
        env.render(mode=render_mode)

        print(rew)

        if d:
-            break
+            env.reset()

    env.close()
--- a/alr_envs/classic_control/simple_reacher.py
+++ b/alr_envs/classic_control/simple_reacher.py
@ -1,42 +1,41 @@
-import gym
+from typing import Iterable, Union
+
 import matplotlib.pyplot as plt
 import numpy as np
 from gym import spaces
 from gym.utils import seeding

-from alr_envs.utils.utils import angle_normalize
+from alr_envs.utils.mps.mp_environments import MPEnv


-# if os.environ.get("DISPLAY", None):
-#     mpl.use('Qt5Agg')
-
-
-class SimpleReacherEnv(gym.Env):
+class SimpleReacherEnv(MPEnv):
    """
    Simple Reaching Task without any physics simulation.
    Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
    towards the end of the trajectory.
    """

-    def __init__(self, n_links, random_start=True):
+    def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True):
        super().__init__()
        self.link_lengths = np.ones(n_links)
        self.n_links = n_links
-        self.dt = 0.01
+        self.dt = 0.1

        self.random_start = random_start

-        self._goal = None
-
        self._joints = None
-        self._joint_angle = None
+        self._joint_angles = None
        self._angle_velocity = None
-        self._start_pos = None
+        self._start_pos = np.zeros(self.n_links)
+        self._start_vel = np.zeros(self.n_links)

-        self.max_torque = 1  # 10
+        self._target = target  # provided target value
+        self._goal = None  # updated goal value, does not change when target != None
+
+        self.max_torque = 1
        self.steps_before_reward = 199

-        action_bound = np.ones((self.n_links,))
+        action_bound = np.ones((self.n_links,)) * self.max_torque
        state_bound = np.hstack([
            [np.pi] * self.n_links,  # cos
            [np.pi] * self.n_links,  # sin
@ -47,45 +46,76 @@ class SimpleReacherEnv(gym.Env):
        self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
        self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)

-        self.fig = None
+        # containers for plotting
        self.metadata = {'render.modes': ["human"]}
+        self.fig = None

        self._steps = 0
        self.seed()

    def step(self, action: np.ndarray):
+        """
+        A single step with action in torque space
+        """

        # action = self._add_action_noise(action)
-        # action = np.clip(action, -self.max_torque, self.max_torque)
-        vel = action
+        ac = np.clip(action, -self.max_torque, self.max_torque)

-        # self._angle_velocity = self._angle_velocity + self.dt * action
-        # self._joint_angle = angle_normalize(self._joint_angle + self.dt * self._angle_velocity)
-        self._angle_velocity = vel
-        self._joint_angle = self._joint_angle + self.dt * self._angle_velocity
+        self._angle_velocity = self._angle_velocity + self.dt * ac
+        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
        self._update_joints()
-        self._steps += 1

        reward, info = self._get_reward(action)

-        # done = np.abs(self.end_effector - self._goal_pos) < 0.1
+        self._steps += 1
        done = False

        return self._get_obs().copy(), reward, done, info

-    def _add_action_noise(self, action: np.ndarray):
-        """
-        add unobserved Gaussian Noise N(0,0.01) to the actions
-        Args:
-            action:
+    def reset(self):

-        Returns: actions with noise
+        # TODO: maybe do initialisation more random?
+        # Sample only orientation of first link, i.e. the arm is always straight.
+        if self.random_start:
+            self._joint_angles = np.hstack([[self.np_random.uniform(-np.pi, np.pi)], np.zeros(self.n_links - 1)])
+            self._start_pos = self._joint_angles.copy()
+        else:
+            self._joint_angles = self._start_pos
+
+        self._generate_goal()
+
+        self._angle_velocity = self._start_vel
+        self._joints = np.zeros((self.n_links + 1, 2))
+        self._update_joints()
+        self._steps = 0
+
+        return self._get_obs().copy()
+
+    def _update_joints(self):
+        """
+        update joints to get new end-effector position. The other links are only required for rendering.
+        Returns:

        """
-        return self.np_random.normal(0, 0.1, *action.shape) + action
+        angles = np.cumsum(self._joint_angles)
+        x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
+        self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0)
+
+    def _get_reward(self, action: np.ndarray):
+        diff = self.end_effector - self._goal
+        reward_dist = 0
+
+        if self._steps >= self.steps_before_reward:
+            reward_dist -= np.linalg.norm(diff)
+            # reward_dist = np.exp(-0.1 * diff ** 2).mean()
+            # reward_dist = - (diff ** 2).mean()
+
+        reward_ctrl = (action ** 2).sum()
+        reward = reward_dist - reward_ctrl
+        return reward, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)

    def _get_obs(self):
-        theta = self._joint_angle
+        theta = self._joint_angles
        return np.hstack([
            np.cos(theta),
            np.sin(theta),
@ -94,91 +124,108 @@ class SimpleReacherEnv(gym.Env):
            self._steps
        ])

-    def _update_joints(self):
-        """
-        update joints to get new end-effector position. The other links are only required for rendering.
-        Returns:
+    def _generate_goal(self):

-        """
-        angles = np.cumsum(self._joint_angle)
-        x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
-        self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0)
+        if self._target is None:
+            # center = self._joints[0]
+            # # Sample uniformly in circle with radius R around center of reacher.
+            # R = np.sum(self.link_lengths)
+            # r = R * np.sqrt(self.np_random.uniform())
+            # theta = self.np_random.uniform() * 2 * np.pi
+            # goal = center + r * np.stack([np.cos(theta), np.sin(theta)])

-    def _get_reward(self, action: np.ndarray):
-        diff = self.end_effector - self._goal
-        reward_dist = 0
-
-        # TODO: Is this the best option
-        if self._steps >= self.steps_before_reward:
-            reward_dist -= np.linalg.norm(diff)
-            # reward_dist = np.exp(-0.1 * diff ** 2).mean()
-            # reward_dist = - (diff ** 2).mean()
-
-        reward_ctrl = 1e-5 * (action ** 2).sum()
-        reward = reward_dist - reward_ctrl
-        return reward, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
-
-    def reset(self):
-
-        # TODO: maybe do initialisation more random?
-        # Sample only orientation of first link, i.e. the arm is always straight.
-        if self.random_start:
-            self._joint_angle = np.hstack([[self.np_random.uniform(-np.pi, np.pi)], np.zeros(self.n_links - 1)])
+            total_length = np.sum(self.link_lengths)
+            goal = np.array([total_length, total_length])
+            while np.linalg.norm(goal) >= total_length:
+                goal = self.np_random.uniform(low=-total_length, high=total_length, size=2)
        else:
-            self._joint_angle = np.zeros(self.n_links)
+            goal = np.copy(self._target)

-        self._start_pos = self._joint_angle
-        self._angle_velocity = np.zeros(self.n_links)
-        self._joints = np.zeros((self.n_links + 1, 2))
-        self._update_joints()
-        self._steps = 0
+        self._goal = goal

-        self._goal = self._get_random_goal()
-        return self._get_obs().copy()
+    def render(self, mode='human'):  # pragma: no cover
+        if self.fig is None:
+            # Create base figure once on the beginning. Afterwards only update
+            plt.ion()
+            self.fig = plt.figure()
+            ax = self.fig.add_subplot(1, 1, 1)

-    def _get_random_goal(self):
-        center = self._joints[0]
+            # limits
+            lim = np.sum(self.link_lengths) + 0.5
+            ax.set_xlim([-lim, lim])
+            ax.set_ylim([-lim, lim])

-        # Sample uniformly in circle with radius R around center of reacher.
-        R = np.sum(self.link_lengths)
-        r = R * np.sqrt(self.np_random.uniform())
-        theta = np.pi/2 + 0.001 * np.random.randn()  # self.np_random.uniform() * 2 * np.pi
-        return center + r * np.stack([np.cos(theta), np.sin(theta)])
+            self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
+            goal_pos = self._goal.T
+            self.goal_point, = ax.plot(goal_pos[0], goal_pos[1], 'gx')
+            self.goal_dist, = ax.plot([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]], 'g--')
+
+            self.fig.show()
+
+        self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
+
+        # goal
+        goal_pos = self._goal.T
+        if self._steps == 1:
+            self.goal_point.set_data(goal_pos[0], goal_pos[1])
+
+        # arm
+        self.line.set_data(self._joints[:, 0], self._joints[:, 1])
+
+        # distance between end effector and goal
+        self.goal_dist.set_data([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]])
+
+        self.fig.canvas.draw()
+        self.fig.canvas.flush_events()
+
+    @property
+    def active_obs(self):
+        return np.hstack([
+            [self.random_start] * self.n_links,  # cos
+            [self.random_start] * self.n_links,  # sin
+            [self.random_start] * self.n_links,  # velocity
+            [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def start_pos(self):
+        return self._start_pos
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

-    def render(self, mode='human'):  # pragma: no cover
-        if self.fig is None:
-            self.fig = plt.figure()
-            plt.ion()
-            plt.show()
-        else:
-            plt.figure(self.fig.number)
-
-        plt.cla()
-        plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
-
-        # Arm
-        plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
-
-        # goal
-        goal_pos = self._goal.T
-        plt.plot(goal_pos[0], goal_pos[1], 'gx')
-        # distance between end effector and goal
-        plt.plot([self.end_effector[0], goal_pos[0]], [self.end_effector[1], goal_pos[1]], 'g--')
-
-        lim = np.sum(self.link_lengths) + 0.5
-        plt.xlim([-lim, lim])
-        plt.ylim([-lim, lim])
-        # plt.draw()
-        plt.pause(1e-4)  # pushes window to foreground, which is annoying.
-        # self.fig.canvas.flush_events()
-
    def close(self):
        del self.fig

    @property
    def end_effector(self):
        return self._joints[self.n_links].T
+
+
+if __name__ == '__main__':
+    nl = 5
+    render_mode = "human"  # "human" or "partial" or "final"
+    env = SimpleReacherEnv(n_links=nl)
+    obs = env.reset()
+    print("First", obs)
+
+    for i in range(2000):
+        # objective.load_result("/tmp/cma")
+        # test with random actions
+        ac = 2 * env.action_space.sample()
+        # ac = np.ones(env.action_space.shape)
+        obs, rew, d, info = env.step(ac)
+        env.render(mode=render_mode)
+
+        print(obs[env.active_obs].shape)
+
+        if d or i % 200 == 0:
+            env.reset()
+
+    env.close()
--- a/alr_envs/classic_control/viapoint_reacher.py
+++ b/alr_envs/classic_control/viapoint_reacher.py
@ -1,19 +1,31 @@
+from typing import Iterable, Union
+
 import gym
 import matplotlib.pyplot as plt
 import numpy as np
+from gym.utils import seeding

 from alr_envs.classic_control.utils import check_self_collision
+from alr_envs.utils.mps.mp_environments import MPEnv


-class ViaPointReacher(gym.Env):
+class ViaPointReacher(MPEnv):

-    def __init__(self, n_links, allow_self_collision=False, collision_penalty=1000):
-        self.num_links = n_links
+    def __init__(self, n_links, random_start: bool = True, via_target: Union[None, Iterable] = None,
+                 target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):
+
+        self.n_links = n_links
        self.link_lengths = np.ones((n_links, 1))

-        # task
-        self.via_point = np.ones(2)
-        self.goal_point = np.array((n_links, 0))
+        self.random_start = random_start
+
+        # provided initial parameters
+        self._target = target  # provided target value
+        self._via_target = via_target  # provided via point target value
+
+        # temp container for current env state
+        self._via_point = np.ones(2)
+        self._goal = np.array((n_links, 0))

        # collision
        self.allow_self_collision = allow_self_collision
@ -23,78 +35,74 @@ class ViaPointReacher(gym.Env):
        self._joints = None
        self._joint_angles = None
        self._angle_velocity = None
-        self.start_pos = np.hstack([[np.pi / 2], np.zeros(self.num_links - 1)])
-        self.start_vel = np.zeros(self.num_links)
+        self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)])
+        self._start_vel = np.zeros(self.n_links)
        self.weight_matrix_scale = 1

-        self._steps = 0
        self.dt = 0.01
-        # self.time_limit = 2

-        action_bound = np.pi * np.ones((self.num_links,))
+        action_bound = np.pi * np.ones((self.n_links,))
        state_bound = np.hstack([
-            [np.pi] * self.num_links,  # cos
-            [np.pi] * self.num_links,  # sin
-            [np.inf] * self.num_links,  # velocity
+            [np.pi] * self.n_links,  # cos
+            [np.pi] * self.n_links,  # sin
+            [np.inf] * self.n_links,  # velocity
+            [np.inf] * 2,  # x-y coordinates of via point distance
            [np.inf] * 2,  # x-y coordinates of target distance
-            [np.inf]  # env steps, because reward start after n steps TODO: Maybe
+            [np.inf]  # env steps, because reward start after n steps
        ])
        self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
        self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)

+        # containers for plotting
+        self.metadata = {'render.modes': ["human", "partial"]}
        self.fig = None

-    @property
-    def end_effector(self):
-        return self._joints[self.num_links].T
-
-    def configure(self, context):
-        pass
-
-    def reset(self):
-        self._joint_angles = self.start_pos
-        self._angle_velocity = self.start_vel
-        self._joints = np.zeros((self.num_links + 1, 2))
-        self._update_joints()
        self._steps = 0
-
-        return self._get_obs().copy()
+        self.seed()

    def step(self, action: np.ndarray):
        """
        a single step with an action in joint velocity space
        """
        vel = action
-        acc = (vel - self._angle_velocity) / self.dt
        self._angle_velocity = vel
        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
-
        self._update_joints()

-        dist_reward = 0
-        if not self._is_collided:
-            if self._steps == 100:
-                dist_reward = np.linalg.norm(self.end_effector - self.via_point)
-            elif self._steps == 199:
-                dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
+        acc = (vel - self._angle_velocity) / self.dt
+        reward, info = self._get_reward(acc)

-        # TODO: Do we need that?
-        reward = - dist_reward ** 2
-
-        reward -= 5e-8 * np.sum(acc**2)
-
-        if self._is_collided:
-            reward -= self.collision_penalty
-
-        info = {"is_collided": self._is_collided}
+        info.update({"is_collided": self._is_collided})

        self._steps += 1
-
-        # done = self._steps * self.dt > self.time_limit or self._is_collided
        done = self._is_collided

        return self._get_obs().copy(), reward, done, info

+    def reset(self):
+
+        if self.random_start:
+            # Maybe change more than dirst seed
+            first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
+            self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)])
+            self._start_pos = self._joint_angles.copy()
+        else:
+            self._joint_angles = self._start_pos
+
+        self._generate_goal()
+
+        self._angle_velocity = self._start_vel
+        self._joints = np.zeros((self.n_links + 1, 2))
+        self._update_joints()
+        self._steps = 0
+
+        return self._get_obs().copy()
+
+    def _generate_goal(self):
+        self._via_point = self.np_random.uniform(0.5, 3.5, 2) if self._via_target is None else np.copy(self._via_target)
+        self._goal = self.np_random.uniform(0.5, 0.1, 2) if self._target is None else np.copy(self._target)
+        # raise NotImplementedError("How to properly sample points??")
+
    def _update_joints(self):
        """
        update _joints to get new end effector position. The other links are only required for rendering.
@ -115,14 +123,38 @@ class ViaPointReacher(gym.Env):

        self._is_collided = self_collision

+    def _get_reward(self, acc):
+        success = False
+        reward = -np.inf
+        if not self._is_collided:
+            dist = np.inf
+            # return intermediate reward for via point
+            if self._steps == 100:
+                dist = np.linalg.norm(self.end_effector - self._via_point)
+            # return reward in last time step for goal
+            elif self._steps == 199:
+                dist = np.linalg.norm(self.end_effector - self._goal)
+
+            success = dist < 0.005
+        else:
+            # Episode terminates when colliding, hence return reward
+            dist = np.linalg.norm(self.end_effector - self._goal)
+            reward = -self.collision_penalty
+
+        reward -= dist ** 2
+        reward -= 5e-8 * np.sum(acc ** 2)
+        info = {"is_success": success}
+
+        return reward, info
+
    def _get_obs(self):
        theta = self._joint_angles
        return np.hstack([
            np.cos(theta),
            np.sin(theta),
            self._angle_velocity,
-            self.end_effector - self.via_point,
-            self.end_effector - self.goal_point,
+            self.end_effector - self._via_point,
+            self.end_effector - self._goal,
            self._steps
        ])

@ -133,7 +165,7 @@ class ViaPointReacher(gym.Env):

        accumulated_theta = np.cumsum(theta, axis=0)

-        endeffector = np.zeros(shape=(self.num_links, num_points_per_link, 2))
+        endeffector = np.zeros(shape=(self.n_links, num_points_per_link, 2))

        x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points
        y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points
@ -141,33 +173,46 @@ class ViaPointReacher(gym.Env):
        endeffector[0, :, 0] = x[0, :]
        endeffector[0, :, 1] = y[0, :]

-        for i in range(1, self.num_links):
+        for i in range(1, self.n_links):
            endeffector[i, :, 0] = x[i, :] + endeffector[i - 1, -1, 0]
            endeffector[i, :, 1] = y[i, :] + endeffector[i - 1, -1, 1]

        return np.squeeze(endeffector + self._joints[0, :])

    def render(self, mode='human'):
+        goal_pos = self._goal.T
+        via_pos = self._via_point.T
+
        if self.fig is None:
+            # Create base figure once on the beginning. Afterwards only update
+            plt.ion()
            self.fig = plt.figure()
-            # plt.ion()
-            # plt.pause(0.01)
-        else:
-            plt.figure(self.fig.number)
+            ax = self.fig.add_subplot(1, 1, 1)
+
+            # limits
+            lim = np.sum(self.link_lengths) + 0.5
+            ax.set_xlim([-lim, lim])
+            ax.set_ylim([-lim, lim])
+
+            self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
+            self.goal_point_plot, = ax.plot(goal_pos[0], goal_pos[1], 'go')
+            self.via_point_plot, = ax.plot(via_pos[0], via_pos[1], 'gx')
+
+            self.fig.show()
+
+        self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")

        if mode == "human":
-            plt.cla()
-            plt.title(f"Iteration: {self._steps}")
+            # goal
+            if self._steps == 1:
+                self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
+                self.via_point_plot.set_data(via_pos[0], goal_pos[1])

-            # Arm
-            plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
+            # arm
+            self.line.set_data(self._joints[:, 0], self._joints[:, 1])

-            lim = np.sum(self.link_lengths) + 0.5
-            plt.xlim([-lim, lim])
-            plt.ylim([-lim, lim])
-            # plt.draw()
-            plt.pause(1e-4)  # pushes window to foreground, which is annoying.
-            # self.fig.canvas.flush_events()
+            self.fig.canvas.draw()
+            self.fig.canvas.flush_events()

        elif mode == "partial":
            if self._steps == 1:
@ -196,12 +241,39 @@ class ViaPointReacher(gym.Env):
                # Add the patch to the Axes
                [plt.gca().add_patch(rect) for rect in self.patches]

-                plt.xlim(-self.num_links, self.num_links), plt.ylim(-1, self.num_links)
+                plt.xlim(-self.n_links, self.n_links), plt.ylim(-1, self.n_links)
                # Arm
                plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')

                plt.pause(0.01)

+    @property
+    def active_obs(self):
+        return np.hstack([
+            [self.random_start] * self.n_links,  # cos
+            [self.random_start] * self.n_links,  # sin
+            [self.random_start] * self.n_links,  # velocity
+            [self._via_target is None] * 2,  # x-y coordinates of via point distance
+            [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def start_pos(self) -> Union[float, int, np.ndarray]:
+        return self._start_pos
+
+    @property
+    def goal_pos(self) -> Union[float, int, np.ndarray]:
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    @property
+    def end_effector(self):
+        return self._joints[self.n_links].T
+
    def close(self):
        if self.fig is not None:
            plt.close(self.fig)
--- a/alr_envs/mujoco/ball_in_a_cup/utils.py
+++ b/alr_envs/mujoco/ball_in_a_cup/utils.py
@ -1,5 +1,5 @@
-from alr_envs.utils.wrapper.detpmp_wrapper import DetPMPWrapper
-from alr_envs.utils.wrapper.dmp_wrapper import DmpWrapper
+from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper
+from alr_envs.utils.mps.dmp_wrapper import DmpWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv


@ -17,19 +17,8 @@ def make_contextual_env(rank, seed=0):
    def _init():
        env = ALRBallInACupEnv(reward_type="contextual_goal")

-        env = DetPMPWrapper(env,
-                            num_dof=7,
-                            num_basis=5,
-                            width=0.005,
-                            policy_type="motor",
-                            start_pos=env.start_pos,
-                            duration=3.5,
-                            post_traj_time=4.5,
-                            dt=env.dt,
-                            weights_scale=0.5,
-                            zero_start=True,
-                            zero_goal=True
-                            )
+        env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5,
+                            policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True)

        env.seed(seed + rank)
        return env
@ -51,19 +40,8 @@ def make_env(rank, seed=0):
    def _init():
        env = ALRBallInACupEnv(reward_type="simple")

-        env = DetPMPWrapper(env,
-                            num_dof=7,
-                            num_basis=5,
-                            width=0.005,
-                            policy_type="motor",
-                            start_pos=env.start_pos,
-                            duration=3.5,
-                            post_traj_time=4.5,
-                            dt=env.dt,
-                            weights_scale=0.2,
-                            zero_start=True,
-                            zero_goal=True
-                            )
+        env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5,
+                            policy_type="motor", weights_scale=0.2, zero_start=True, zero_goal=True)

        env.seed(seed + rank)
        return env
@ -85,20 +63,8 @@ def make_simple_env(rank, seed=0):
    def _init():
        env = ALRBallInACupEnv(reward_type="simple")

-        env = DetPMPWrapper(env,
-                            num_dof=3,
-                            num_basis=5,
-                            width=0.005,
-                            off=-0.1,
-                            policy_type="motor",
-                            start_pos=env.start_pos[1::2],
-                            duration=3.5,
-                            post_traj_time=4.5,
-                            dt=env.dt,
-                            weights_scale=0.25,
-                            zero_start=True,
-                            zero_goal=True
-                            )
+        env = DetPMPWrapper(env, num_dof=3, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5,
+                            policy_type="motor", weights_scale=0.25, zero_start=True, zero_goal=True, off=-0.1)

        env.seed(seed + rank)
        return env
--- a/alr_envs/mujoco/beerpong/utils.py
+++ b/alr_envs/mujoco/beerpong/utils.py
@ -1,4 +1,4 @@
-from alr_envs.utils.wrapper.detpmp_wrapper import DetPMPWrapper
+from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper
 from alr_envs.mujoco.beerpong.beerpong import ALRBeerpongEnv
 from alr_envs.mujoco.beerpong.beerpong_simple import ALRBeerpongEnv as ALRBeerpongEnvSimple

@ -17,19 +17,8 @@ def make_contextual_env(rank, seed=0):
    def _init():
        env = ALRBeerpongEnv()

-        env = DetPMPWrapper(env,
-                            num_dof=7,
-                            num_basis=5,
-                            width=0.005,
-                            policy_type="motor",
-                            start_pos=env.start_pos,
-                            duration=3.5,
-                            post_traj_time=4.5,
-                            dt=env.dt,
-                            weights_scale=0.5,
-                            zero_start=True,
-                            zero_goal=True
-                            )
+        env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5,
+                            policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True)

        env.seed(seed + rank)
        return env
@ -51,19 +40,8 @@ def make_env(rank, seed=0):
    def _init():
        env = ALRBeerpongEnvSimple()

-        env = DetPMPWrapper(env,
-                            num_dof=7,
-                            num_basis=5,
-                            width=0.005,
-                            policy_type="motor",
-                            start_pos=env.start_pos,
-                            duration=3.5,
-                            post_traj_time=4.5,
-                            dt=env.dt,
-                            weights_scale=0.25,
-                            zero_start=True,
-                            zero_goal=True
-                            )
+        env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5,
+                            policy_type="motor", weights_scale=0.25, zero_start=True, zero_goal=True)

        env.seed(seed + rank)
        return env
@ -85,19 +63,8 @@ def make_simple_env(rank, seed=0):
    def _init():
        env = ALRBeerpongEnvSimple()

-        env = DetPMPWrapper(env,
-                            num_dof=3,
-                            num_basis=5,
-                            width=0.005,
-                            policy_type="motor",
-                            start_pos=env.start_pos[1::2],
-                            duration=3.5,
-                            post_traj_time=4.5,
-                            dt=env.dt,
-                            weights_scale=0.5,
-                            zero_start=True,
-                            zero_goal=True
-                            )
+        env = DetPMPWrapper(env, num_dof=3, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5,
+                            policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True)

        env.seed(seed + rank)
        return env
--- a/alr_envs/utils/legacy/utils.py
+++ b/alr_envs/utils/legacy/utils.py
@ -1,7 +1,7 @@
 import alr_envs.classic_control.hole_reacher as hr
 import alr_envs.classic_control.viapoint_reacher as vpr
-from alr_envs.utils.wrapper.dmp_wrapper import DmpWrapper
-from alr_envs.utils.wrapper.detpmp_wrapper import DetPMPWrapper
+from alr_envs.utils.mps.dmp_wrapper import DmpWrapper
+from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper
 import numpy as np


@ -49,13 +49,13 @@ def make_holereacher_env(rank, seed=0):
    """

    def _init():
-        _env = hr.HoleReacher(n_links=5,
-                           allow_self_collision=False,
-                           allow_wall_collision=False,
-                           hole_width=0.25,
-                           hole_depth=1,
-                           hole_x=2,
-                           collision_penalty=100)
+        _env = hr.HoleReacherEnv(n_links=5,
+                                 allow_self_collision=False,
+                                 allow_wall_collision=False,
+                                 hole_width=0.25,
+                                 hole_depth=1,
+                                 hole_x=2,
+                                 collision_penalty=100)

        _env = DmpWrapper(_env,
                          num_dof=5,
@ -65,7 +65,7 @@ def make_holereacher_env(rank, seed=0):
                          dt=_env.dt,
                          learn_goal=True,
                          alpha_phase=2,
-                          start_pos=_env.start_pos,
+                          start_pos=_env._start_pos,
                          policy_type="velocity",
                          weights_scale=50,
                          goal_scale=0.1
@ -89,13 +89,13 @@ def make_holereacher_fix_goal_env(rank, seed=0):
    """

    def _init():
-        _env = hr.HoleReacher(n_links=5,
-                           allow_self_collision=False,
-                           allow_wall_collision=False,
-                           hole_width=0.15,
-                           hole_depth=1,
-                           hole_x=1,
-                           collision_penalty=100)
+        _env = hr.HoleReacherEnv(n_links=5,
+                                 allow_self_collision=False,
+                                 allow_wall_collision=False,
+                                 hole_width=0.15,
+                                 hole_depth=1,
+                                 hole_x=1,
+                                 collision_penalty=100)

        _env = DmpWrapper(_env,
                          num_dof=5,
@ -105,7 +105,7 @@ def make_holereacher_fix_goal_env(rank, seed=0):
                          learn_goal=False,
                          final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476,  0.02012344]),
                          alpha_phase=2,
-                          start_pos=_env.start_pos,
+                          start_pos=_env._start_pos,
                          policy_type="velocity",
                          weights_scale=50,
                          goal_scale=1
@ -129,27 +129,16 @@ def make_holereacher_env_pmp(rank, seed=0):
    """

    def _init():
-        _env = hr.HoleReacher(n_links=5,
-                           allow_self_collision=False,
-                           allow_wall_collision=False,
-                           hole_width=0.15,
-                           hole_depth=1,
-                           hole_x=1,
-                           collision_penalty=1000)
+        _env = hr.HoleReacherEnv(n_links=5,
+                                 allow_self_collision=False,
+                                 allow_wall_collision=False,
+                                 hole_width=0.15,
+                                 hole_depth=1,
+                                 hole_x=1,
+                                 collision_penalty=1000)

-        _env = DetPMPWrapper(_env,
-                             num_dof=5,
-                             num_basis=5,
-                             width=0.02,
-                             policy_type="velocity",
-                             start_pos=_env.start_pos,
-                             duration=2,
-                             post_traj_time=0,
-                             dt=_env.dt,
-                             weights_scale=0.2,
-                             zero_start=True,
-                             zero_goal=False
-                             )
+        _env = DetPMPWrapper(_env, num_dof=5, num_basis=5, width=0.02, duration=2, dt=_env.dt, post_traj_time=0,
+                             policy_type="velocity", weights_scale=0.2, zero_start=True, zero_goal=False)
        _env.seed(seed + rank)
        return _env

--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@ -1,5 +1,5 @@
-from alr_envs.utils.wrapper.dmp_wrapper import DmpWrapper
-from alr_envs.utils.wrapper.detpmp_wrapper import DetPMPWrapper
+from alr_envs.utils.mps.dmp_wrapper import DmpWrapper
+from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper
 import gym
 from gym.vector.utils import write_to_shared_memory
 import sys
--- a/alr_envs/utils/wrapper/init.py
+++ b/alr_envs/utils/wrapper/init.py
--- a/alr_envs/utils/wrapper/detpmp_wrapper.py
+++ b/alr_envs/utils/wrapper/detpmp_wrapper.py
@ -2,26 +2,27 @@ import gym
 import numpy as np
 from mp_lib import det_promp

-from alr_envs.utils.wrapper.mp_wrapper import MPWrapper
+from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_wrapper import MPWrapper


 class DetPMPWrapper(MPWrapper):
-    def __init__(self, env, num_dof, num_basis, width, start_pos=None, duration=1, dt=0.01, post_traj_time=0.,
-                 policy_type=None, weights_scale=1, zero_start=False, zero_goal=False, **mp_kwargs):
-        # self.duration = duration  # seconds
+    def __init__(self, env: MPEnv, num_dof: int, num_basis: int, width: int, duration: int = 1, dt: float = 0.01,
+                 post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1.,
+                 zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
+        self.duration = duration  # seconds

-        super().__init__(env, num_dof, duration, dt, post_traj_time, policy_type, weights_scale,
-                         num_basis=num_basis, width=width, start_pos=start_pos, zero_start=zero_start,
-                         zero_goal=zero_goal)
+        super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
+                         width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
+
+        self.dt = dt

        action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
        self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)

-        self.start_pos = start_pos
-        self.dt = dt

    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
-                      start_pos: np.ndarray = None, zero_start: bool = False, zero_goal: bool = False):
+                      zero_start: bool = False, zero_goal: bool = False):
        pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=0.01,
                                           zero_start=zero_start, zero_goal=zero_goal)

--- a/alr_envs/utils/wrapper/dmp_wrapper.py
+++ b/alr_envs/utils/wrapper/dmp_wrapper.py
@ -1,19 +1,18 @@
-from mp_lib.phase import ExpDecayPhaseGenerator
-from mp_lib.basis import DMPBasisGenerator
-from mp_lib import dmps
-import numpy as np
 import gym
+import numpy as np
+from mp_lib import dmps
+from mp_lib.basis import DMPBasisGenerator
+from mp_lib.phase import ExpDecayPhaseGenerator

-from alr_envs.utils.wrapper.mp_wrapper import MPWrapper
+from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_wrapper import MPWrapper


 class DmpWrapper(MPWrapper):

-    def __init__(self, env: gym.Env, num_dof: int, num_basis: int,
-                 # start_pos: np.ndarray = None,
-                 # final_pos: np.ndarray = None,
+    def __init__(self, env: MPEnv, num_dof: int, num_basis: int,
                 duration: int = 1, alpha_phase: float = 2., dt: float = None,
-                 learn_goal: bool = False, return_to_start: bool = False, post_traj_time: float = 0.,
+                 learn_goal: bool = False, post_traj_time: float = 0.,
                 weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,
                 policy_type: str = None, render_mode: str = None):

@ -23,8 +22,6 @@ class DmpWrapper(MPWrapper):
            env:
            num_dof:
            num_basis:
-            start_pos:
-            final_pos:
            duration:
            alpha_phase:
            dt:
@ -37,30 +34,17 @@ class DmpWrapper(MPWrapper):
        self.learn_goal = learn_goal
        dt = env.dt if hasattr(env, "dt") else dt
        assert dt is not None
-        # start_pos = start_pos if start_pos is not None else env.start_pos if hasattr(env, "start_pos") else None
-        # TODO: assert start_pos is not None  # start_pos will be set in initialize, do we need this here?
-        # if learn_goal:
-            # final_pos = np.zeros_like(start_pos)  # arbitrary, will be learned
-            # final_pos = np.zeros((1, num_dof))  # arbitrary, will be learned
-        # else:
-        #     final_pos = final_pos if final_pos is not None else start_pos if return_to_start else None
-        # assert final_pos is not None
        self.t = np.linspace(0, duration, int(duration / dt))
        self.goal_scale = goal_scale

-        super().__init__(env, num_dof, duration, dt, post_traj_time, policy_type, weights_scale, render_mode,
-                         num_basis=num_basis,
-                         # start_pos=start_pos, final_pos=final_pos,
-                         alpha_phase=alpha_phase,
-                         bandwidth_factor=bandwidth_factor)
+        super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode,
+                         num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor)

        action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0)))
        self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)

-    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5,
-                      # start_pos: np.ndarray = None,
-                      # final_pos: np.ndarray = None,
-                      alpha_phase: float = 2., bandwidth_factor: float = 3.):
+    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2.,
+                      bandwidth_factor: int = 3):

        phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
        basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=num_basis,
@ -69,15 +53,6 @@ class DmpWrapper(MPWrapper):
        dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
                       num_time_steps=int(duration / dt), dt=dt)

-        # dmp.dmp_start_pos = start_pos.reshape((1, num_dof))
-        # in a contextual environment, the start_pos may be not fixed, set in mp_rollout?
-        # TODO: Should we set start_pos in init at all? It's only used after calling rollout anyway...
-        # dmp.dmp_start_pos = start_pos.reshape((1, num_dof)) if start_pos is not None else np.zeros((1, num_dof))
-
-        # weights = np.zeros((num_basis, num_dof))
-        # goal_pos = np.zeros(num_dof) if self.learn_goal else final_pos
-
-        # dmp.set_weights(weights, goal_pos)
        return dmp

    def goal_and_weights(self, params):
@ -87,18 +62,15 @@ class DmpWrapper(MPWrapper):
        if self.learn_goal:
            goal_pos = params[0, -self.mp.num_dimensions:]  # [num_dof]
            params = params[:, :-self.mp.num_dimensions]  # [1,num_dof]
-            # weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
        else:
-            goal_pos = self.env.goal_pos  # self.mp.dmp_goal_pos.flatten()
+            goal_pos = self.env.goal_pos
            assert goal_pos is not None
-            # weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])

-        weight_matrix = np.reshape(params, self.mp.dmp_weights.shape)
+        weight_matrix = np.reshape(params, self.mp.dmp_weights.shape)  # [num_basis, num_dof]
        return goal_pos * self.goal_scale, weight_matrix * self.weights_scale

    def mp_rollout(self, action):
-        # if self.mp.start_pos is None:
-        self.mp.dmp_start_pos = self.env.init_qpos.reshape((1, self.num_dof))  # start_pos
+        self.mp.dmp_start_pos = self.env.start_pos
        goal_pos, weight_matrix = self.goal_and_weights(action)
        self.mp.set_weights(weight_matrix, goal_pos)
        return self.mp.reference_trajectory(self.t)
--- a/alr_envs/utils/mps/mp_environments.py
+++ b/alr_envs/utils/mps/mp_environments.py
@ -0,0 +1,33 @@
+from abc import abstractmethod
+from typing import Union
+
+import gym
+import numpy as np
+
+
+class MPEnv(gym.Env):
+
+    @property
+    @abstractmethod
+    def active_obs(self):
+        """Returns boolean value for each observation entry
+        whether the observation is returned by the DMP for the contextual case or not.
+        This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
+        """
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def start_pos(self) -> Union[float, int, np.ndarray]:
+        """
+        Returns the current position of the joints
+        """
+        raise NotImplementedError()
+
+    @property
+    def goal_pos(self) -> Union[float, int, np.ndarray]:
+        """
+        Returns the current final position of the joints for the MP.
+        By default this returns the starting position.
+        """
+        return self.start_pos
--- a/alr_envs/utils/wrapper/mp_wrapper.py
+++ b/alr_envs/utils/wrapper/mp_wrapper.py
@ -1,32 +1,24 @@
 from abc import ABC, abstractmethod
-from collections import defaultdict

 import gym
 import numpy as np

+from alr_envs.utils.mps.mp_environments import MPEnv
 from alr_envs.utils.policies import get_policy_class


 class MPWrapper(gym.Wrapper, ABC):

-    def __init__(self,
-                 env: gym.Env,
-                 num_dof: int,
-                 duration: int = 1,
-                 dt: float = None,
-                 post_traj_time: float = 0.,
-                 policy_type: str = None,
-                 weights_scale: float = 1.,
-                 render_mode: str = None,
-                 **mp_kwargs
-                 ):
+    def __init__(self, env: MPEnv, num_dof: int, dt: float, duration: int = 1, post_traj_time: float = 0.,
+                 policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs):
        super().__init__(env)

-        self.num_dof = num_dof
-        # self.num_basis = num_basis
-        # self.duration = duration  # seconds
+        # adjust observation space to reduce version
+        obs_sp = self.env.observation_space
+        self.observation_space = gym.spaces.Box(low=obs_sp.low[self.env.active_obs],
+                                                high=obs_sp.high[self.env.active_obs],
+                                                dtype=obs_sp.dtype)

-        # dt = env.dt if hasattr(env, "dt") else dt
        assert dt is not None  # this should never happen as MPWrapper is a base class
        self.post_traj_steps = int(post_traj_time / dt)

@ -40,8 +32,11 @@ class MPWrapper(gym.Wrapper, ABC):
        self.render_mode = render_mode
        self.render_kwargs = {}

-    # TODO: not yet final
+    # TODO: @Max I think this should not be in this class, this functionality should be part of your sampler.
    def __call__(self, params, contexts=None):
+        """
+        Can be used to provide a batch of parameter sets
+        """
        params = np.atleast_2d(params)
        obs = []
        rewards = []
@ -50,7 +45,6 @@ class MPWrapper(gym.Wrapper, ABC):
        # for p, c in zip(params, contexts):
        for p in params:
            # self.configure(c)
-            # context = self.reset()
            ob, reward, done, info = self.step(p)
            obs.append(ob)
            rewards.append(reward)
@ -63,8 +57,7 @@ class MPWrapper(gym.Wrapper, ABC):
        self.env.configure(context)

    def reset(self):
-        obs = self.env.reset()
-        return obs
+        return self.env.reset()[self.env.active_obs]

    def step(self, action: np.ndarray):
        """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
@ -78,15 +71,9 @@ class MPWrapper(gym.Wrapper, ABC):
        # self._velocity = velocity

        rewards = 0
-        # infos = defaultdict(list)
-
-        # TODO: @Max Why do we need this configure, states should be part of the model
-        # TODO: Ask Onur if the context distribution needs to be outside the environment
-        # TODO: For now create a new env with each context
-        # TODO: Explicitly call reset before step to obtain context from obs?
-        # self.env.configure(context)
-        # obs = self.env.reset()
        info = {}
+        # create random obs as the reset function is called externally
+        obs = self.env.observation_space.sample()

        for t, pos_vel in enumerate(zip(trajectory, velocity)):
            ac = self.policy.get_action(pos_vel[0], pos_vel[1])
@ -100,7 +87,7 @@ class MPWrapper(gym.Wrapper, ABC):
                break

        done = True
-        return obs, rewards, done, info
+        return obs[self.env.active_obs], rewards, done, info

    def render(self, mode='human', **kwargs):
        """Only set render options here, such that they can be used during the rollout.
@ -108,18 +95,6 @@ class MPWrapper(gym.Wrapper, ABC):
        self.render_mode = mode
        self.render_kwargs = kwargs

-    # def __call__(self, actions):
-    #     return self.step(actions)
-        # params = np.atleast_2d(params)
-        # rewards = []
-        # infos = []
-        # for p, c in zip(params, contexts):
-        #     reward, info = self.rollout(p, c)
-        #     rewards.append(reward)
-        #     infos.append(info)
-        #
-        # return np.array(rewards), infos
-
    @abstractmethod
    def mp_rollout(self, action):
        """
--- a/example.py
+++ b/example.py
@ -46,7 +46,7 @@ def example_dmp():
            obs = env.reset()


-def example_async(n_cpu=4, seed=int('533D', 16)):
+def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)):
    def make_env(env_id, seed, rank):
        env = gym.make(env_id)
        env.seed(seed + rank)
@ -73,7 +73,7 @@ def example_async(n_cpu=4, seed=int('533D', 16)):
        # do not return values above threshold
        return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),)

-    envs = gym.vector.AsyncVectorEnv([make_env("alr_envs:HoleReacherDMP-v0", seed, i) for i in range(n_cpu)])
+    envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed, i) for i in range(n_cpu)])

    obs = envs.reset()
    print(sample(envs, 16))
@ -82,7 +82,6 @@ def example_async(n_cpu=4, seed=int('533D', 16)):
 if __name__ == '__main__':
    # example_mujoco()
    # example_dmp()
-    # example_async()
-    env = gym.make("alr_envs:HoleReacherDMP-v0")
-    # env = gym.make("alr_envs:SimpleReacherDMP-v1")
-    print()
+    example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
+    # env = gym.make("alr_envs:HoleReacherDMP-v0", context=0.1)
+    # env = gym.make("alr_envs:HoleReacherDMP-v1")