diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index 986265c..708852a 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -1,3 +1,4 @@
+import numpy as np
 from gym.envs.registration import register
 
 from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
@@ -321,7 +322,9 @@ register(
         "bandwidth_factor": 2.5,
         "policy_type": "motor",
         "weights_scale": 100,
-        "return_to_start": True
+        "return_to_start": True,
+        "p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
+        "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
     }
 )
 
@@ -339,7 +342,9 @@ register(
         "bandwidth_factor": 2.5,
         "policy_type": "motor",
         "weights_scale": 100,
-        "return_to_start": True
+        "return_to_start": True,
+        "p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
+        "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
     }
 )
 
@@ -357,7 +362,9 @@ register(
         "policy_type": "motor",
         "weights_scale": 0.2,
         "zero_start": True,
-        "zero_goal": True
+        "zero_goal": True,
+        "p_gains": np.array([4./3., 2.4, 2.5, 5./3., 2., 2., 1.25]),
+        "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
     }
 )
 
@@ -374,7 +381,9 @@ register(
         "policy_type": "motor",
         "weights_scale": 0.2,
         "zero_start": True,
-        "zero_goal": True
+        "zero_goal": True,
+        "p_gains": np.array([4./3., 2.4, 2.5, 5./3., 2., 2., 1.25]),
+        "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
     }
 )
 
@@ -392,7 +401,9 @@ register(
         "bandwidth_factor": 2.5,
         "policy_type": "motor",
         "weights_scale": 50,
-        "goal_scale": 0.1
+        "goal_scale": 0.1,
+        "p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
+        "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
     }
 )
 
diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/classic_control/hole_reacher.py
index 730e7bf..281a771 100644
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@@ -7,7 +7,7 @@ from gym.utils import seeding
 from matplotlib import patches
 
 from alr_envs.classic_control.utils import check_self_collision
-from alr_envs.utils.mps.mp_environments import AlrEnv
+from alr_envs.utils.mps.alr_env import AlrEnv
 
 
 class HoleReacherEnv(AlrEnv):
diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/classic_control/simple_reacher.py
index 5b04aad..43b0381 100644
--- a/alr_envs/classic_control/simple_reacher.py
+++ b/alr_envs/classic_control/simple_reacher.py
@@ -5,7 +5,7 @@ import numpy as np
 from gym import spaces
 from gym.utils import seeding
 
-from alr_envs.utils.mps.mp_environments import AlrEnv
+from alr_envs.utils.mps.alr_env import AlrEnv
 
 
 class SimpleReacherEnv(AlrEnv):
diff --git a/alr_envs/classic_control/viapoint_reacher.py b/alr_envs/classic_control/viapoint_reacher.py
index 8edcc69..afaa26e 100644
--- a/alr_envs/classic_control/viapoint_reacher.py
+++ b/alr_envs/classic_control/viapoint_reacher.py
@@ -6,7 +6,7 @@ import numpy as np
 from gym.utils import seeding
 
 from alr_envs.classic_control.utils import check_self_collision
-from alr_envs.utils.mps.mp_environments import AlrEnv
+from alr_envs.utils.mps.alr_env import AlrEnv
 
 
 class ViaPointReacher(AlrEnv):
diff --git a/alr_envs/mujoco/alr_mujoco_env.py b/alr_envs/mujoco/alr_mujoco_env.py
index 8dbdabb..a95165c 100644
--- a/alr_envs/mujoco/alr_mujoco_env.py
+++ b/alr_envs/mujoco/alr_mujoco_env.py
@@ -7,7 +7,9 @@ from gym import error, spaces
 from gym.utils import seeding
 import numpy as np
 from os import path
-import gym
+
+from alr_envs.utils.mps.alr_env import AlrEnv
+from alr_envs.utils.positional_env import PositionalEnv
 
 try:
     import mujoco_py
@@ -33,7 +35,7 @@ def convert_observation_to_space(observation):
     return space
 
 
-class AlrMujocoEnv(gym.Env):
+class AlrMujocoEnv(PositionalEnv, AlrEnv):
     """
     Superclass for all MuJoCo environments.
     """
@@ -44,7 +46,7 @@ class AlrMujocoEnv(gym.Env):
         Args:
             model_path: path to xml file
             n_substeps: how many steps mujoco does per call to env.step
-            use_servo: use actuator defined in xml, use False for direct torque control
+            apply_gravity_comp: Whether gravity compensation should be active
         """
         if model_path.startswith("/"):
             fullpath = model_path
@@ -73,10 +75,6 @@ class AlrMujocoEnv(gym.Env):
 
         self._set_action_space()
 
-        # action = self.action_space.sample()
-        # observation, _reward, done, _info = self.step(action)
-        # assert not done
-
         observation = self._get_obs()  # TODO: is calling get_obs enough? should we call reset, or even step?
 
         self._set_observation_space(observation)
@@ -204,7 +202,7 @@ class AlrMujocoEnv(gym.Env):
 
         try:
             self.sim.step()
-        except mujoco_py.builder.MujocoException as e:
+        except mujoco_py.builder.MujocoException:
             error_in_sim = True
 
         return error_in_sim
diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
index 345b3ce..a7816c9 100644
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
@@ -16,8 +16,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
         self._q_vel = []
         # self.weight_matrix_scale = 50
         self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-        self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
-        self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
 
         self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
         self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
diff --git a/alr_envs/utils/mps/mp_environments.py b/alr_envs/utils/mps/alr_env.py
similarity index 78%
rename from alr_envs/utils/mps/mp_environments.py
rename to alr_envs/utils/mps/alr_env.py
index 60db99b..fca02b9 100644
--- a/alr_envs/utils/mps/mp_environments.py
+++ b/alr_envs/utils/mps/alr_env.py
@@ -1,14 +1,13 @@
-from abc import abstractmethod
+from abc import abstractmethod, ABC
 from typing import Union
 
 import gym
 import numpy as np
 
 
-class AlrEnv(gym.Env):
+class AlrEnv(gym.Env, ABC):
 
     @property
-    @abstractmethod
     def active_obs(self):
         """Returns boolean mask for each observation entry
         whether the observation is returned for the contextual case or not.
@@ -31,3 +30,11 @@ class AlrEnv(gym.Env):
         By default this returns the starting position.
         """
         return self.start_pos
+
+    @property
+    @abstractmethod
+    def dt(self) -> Union[float, int]:
+        """
+        Returns the time between two simulated steps of the environment
+        """
+        raise NotImplementedError()
\ No newline at end of file
diff --git a/alr_envs/utils/mps/detpmp_wrapper.py b/alr_envs/utils/mps/detpmp_wrapper.py
index 8661781..724a50c 100644
--- a/alr_envs/utils/mps/detpmp_wrapper.py
+++ b/alr_envs/utils/mps/detpmp_wrapper.py
@@ -2,29 +2,36 @@ import gym
 import numpy as np
 from mp_lib import det_promp
 
-from alr_envs.utils.mps.mp_environments import AlrEnv
+from alr_envs.utils.mps.alr_env import AlrEnv
 from alr_envs.utils.mps.mp_wrapper import MPWrapper
 
 
 class DetPMPWrapper(MPWrapper):
-    def __init__(self, env: AlrEnv, num_dof: int, num_basis: int, width: float, duration: float = 1, dt: float = 0.01,
-                 post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1.,
-                 zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
+    def __init__(self, env: AlrEnv, num_dof, num_basis, width, start_pos=None, duration=1, post_traj_time=0.,
+                 policy_type=None, weights_scale=1, zero_start=False, zero_goal=False, learn_mp_length: bool =True,
+                 **mp_kwargs):
         self.duration = duration  # seconds
 
-        dt = env.dt if hasattr(env, "dt") else dt
-        assert dt is not None
-        self.dt = dt
+        super().__init__(env=env, num_dof=num_dof, duration=duration, post_traj_time=post_traj_time,
+                         policy_type=policy_type, weights_scale=weights_scale, num_basis=num_basis,
+                         width=width, zero_start=zero_start, zero_goal=zero_goal,
+                         **mp_kwargs)
 
-        super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
-                         width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
+        self.learn_mp_length = learn_mp_length
+        if self.learn_mp_length:
+            parameter_space_shape = (1+num_basis*num_dof,)
+        else:
+            parameter_space_shape = (num_basis * num_dof,)
+        self.min_param = -np.inf
+        self.max_param = np.inf
+        self.parameterization_space = gym.spaces.Box(low=self.min_param, high=self.max_param,
+                                                     shape=parameter_space_shape, dtype=np.float32)
 
-        action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
-        self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
+        self.start_pos = start_pos
 
-    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
-                      off: float = 0.01, zero_start: bool = False, zero_goal: bool = False):
-        pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
+    def initialize_mp(self, num_dof: int, duration: int, num_basis: int = 5, width: float = None,
+                      zero_start: bool = False, zero_goal: bool = False, **kwargs):
+        pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=0.01,
                                            zero_start=zero_start, zero_goal=zero_goal)
 
         weights = np.zeros(shape=(num_basis, num_dof))
@@ -33,10 +40,15 @@ class DetPMPWrapper(MPWrapper):
         return pmp
 
     def mp_rollout(self, action):
-        params = np.reshape(action, newshape=(self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
-        self.mp.set_weights(self.duration, params)
-        _, des_pos, des_vel, _ = self.mp.compute_trajectory(1 / self.dt, 1.)
+        if self.learn_mp_length:
+            duration = max(1, self.duration*abs(action[0]))
+            params = np.reshape(action[1:], (self.mp.n_basis, -1)) * self.weights_scale # TODO: Fix Bug when zero_start is true
+        else:
+            duration = self.duration
+            params = np.reshape(action, (self.mp.n_basis, -1)) * self.weights_scale # TODO: Fix Bug when zero_start is true
+        self.mp.set_weights(1., params)
+        _, des_pos, des_vel, _ = self.mp.compute_trajectory(frequency=max(1, duration))
         if self.mp.zero_start:
-            des_pos += self.env.start_pos[None, :]
+            des_pos += self.start_pos
 
         return des_pos, des_vel
diff --git a/alr_envs/utils/mps/dmp_wrapper.py b/alr_envs/utils/mps/dmp_wrapper.py
index 4958a98..8dbd26d 100644
--- a/alr_envs/utils/mps/dmp_wrapper.py
+++ b/alr_envs/utils/mps/dmp_wrapper.py
@@ -4,7 +4,7 @@ from mp_lib import dmps
 from mp_lib.basis import DMPBasisGenerator
 from mp_lib.phase import ExpDecayPhaseGenerator
 
-from alr_envs.utils.mps.mp_environments import AlrEnv
+from alr_envs.utils.mps.alr_env import AlrEnv
 from alr_envs.utils.mps.mp_wrapper import MPWrapper
 
 
@@ -32,26 +32,26 @@ class DmpWrapper(MPWrapper):
             goal_scale:
         """
         self.learn_goal = learn_goal
-        dt = env.dt if hasattr(env, "dt") else dt
-        assert dt is not None
+
         self.t = np.linspace(0, duration, int(duration / dt))
         self.goal_scale = goal_scale
 
-        super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode,
+        super().__init__(env=env, num_dof=num_dof, duration=duration, post_traj_time=post_traj_time,
+                         policy_type=policy_type, weights_scale=weights_scale, render_mode=render_mode,
                          num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor)
 
         action_bounds = np.inf * np.ones((np.prod(self.mp.weights.shape) + (num_dof if learn_goal else 0)))
         self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
 
-    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2.,
-                      bandwidth_factor: int = 3):
+    def initialize_mp(self, num_dof: int, duration: int, num_basis: int, alpha_phase: float = 2.,
+                      bandwidth_factor: int = 3, **kwargs):
 
         phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
         basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=num_basis,
                                             basis_bandwidth_factor=bandwidth_factor)
 
         dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
-                       duration=duration, dt=dt)
+                       dt=self.dt)
 
         return dmp
 
diff --git a/alr_envs/utils/mps/mp_wrapper.py b/alr_envs/utils/mps/mp_wrapper.py
index c31072f..84bffd1 100644
--- a/alr_envs/utils/mps/mp_wrapper.py
+++ b/alr_envs/utils/mps/mp_wrapper.py
@@ -1,16 +1,36 @@
 from abc import ABC, abstractmethod
+from typing import Union
 
 import gym
 import numpy as np
 
-from alr_envs.utils.mps.mp_environments import AlrEnv
-from alr_envs.utils.policies import get_policy_class
+from alr_envs.utils.mps.alr_env import AlrEnv
+from alr_envs.utils.policies import get_policy_class, BaseController
 
 
 class MPWrapper(gym.Wrapper, ABC):
+    """
+    Base class for movement primitive based gym.Wrapper implementations.
 
-    def __init__(self, env: AlrEnv, num_dof: int, dt: float, duration: float = 1, post_traj_time: float = 0.,
-                 policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs):
+    :param env: The (wrapped) environment this wrapper is applied on
+    :param num_dof: Dimension of the action space of the wrapped env
+    :param duration: Number of timesteps in the trajectory of the movement primitive
+    :param post_traj_time: Time for which the last position of the trajectory is fed to the environment to continue
+    simulation
+    :param policy_type: Type or object defining the policy that is used to generate action based on the trajectory
+    :param weight_scale: Scaling parameter for the actions given to this wrapper
+    :param render_mode: Equivalent to gym render mode
+    """
+    def __init__(self,
+                 env: AlrEnv,
+                 num_dof: int,
+                 duration: int = 1,
+                 post_traj_time: float = 0.,
+                 policy_type: Union[str, BaseController] = None,
+                 weights_scale: float = 1.,
+                 render_mode: str = None,
+                 **mp_kwargs
+                 ):
         super().__init__(env)
 
         # adjust observation space to reduce version
@@ -19,14 +39,15 @@ class MPWrapper(gym.Wrapper, ABC):
                                                 high=obs_sp.high[self.env.active_obs],
                                                 dtype=obs_sp.dtype)
 
-        assert dt is not None  # this should never happen as MPWrapper is a base class
-        self.post_traj_steps = int(post_traj_time / dt)
+        self.post_traj_steps = int(post_traj_time / env.dt)
 
-        self.mp = self.initialize_mp(num_dof, duration, dt, **mp_kwargs)
+        self.mp = self.initialize_mp(num_dof=num_dof, duration=duration, **mp_kwargs)
         self.weights_scale = weights_scale
 
-        policy_class = get_policy_class(policy_type)
-        self.policy = policy_class(env)
+        if type(policy_type) is str:
+            self.policy = get_policy_class(policy_type, env, mp_kwargs)
+        else:
+            self.policy = policy_type
 
         # rendering
         self.render_mode = render_mode
@@ -62,29 +83,30 @@ class MPWrapper(gym.Wrapper, ABC):
 
         if self.post_traj_steps > 0:
             trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
-            velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
-
-        # self._trajectory = trajectory
-        # self._velocity = velocity
-
-        rewards = 0
-        info = {}
-        # create random obs as the reset function is called externally
-        obs = self.env.observation_space.sample()
+            velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dimensions))])
 
+        trajectory_length = len(trajectory)
+        actions = np.zeros(shape=(trajectory_length, self.mp.num_dimensions))
+        observations= np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape)
+        rewards = np.zeros(shape=(trajectory_length,))
+        trajectory_return = 0
+        infos = dict(step_infos =[])
         for t, pos_vel in enumerate(zip(trajectory, velocity)):
-            ac = self.policy.get_action(pos_vel[0], pos_vel[1])
-            obs, rew, done, info = self.env.step(ac)
-            rewards += rew
-            # TODO return all dicts?
-            # [infos[k].append(v) for k, v in info.items()]
+            actions[t,:] = self.policy.get_action(pos_vel[0], pos_vel[1])
+            observations[t,:], rewards[t], done, info = self.env.step(actions[t,:])
+            trajectory_return += rewards[t]
+            infos['step_infos'].append(info)
             if self.render_mode:
                 self.env.render(mode=self.render_mode, **self.render_kwargs)
             if done:
                 break
 
+        infos['step_actions'] = actions[:t+1]
+        infos['step_observations'] = observations[:t+1]
+        infos['step_rewards'] = rewards[:t+1]
+        infos['trajectory_length'] = t+1
         done = True
-        return obs[self.env.active_obs], rewards, done, info
+        return observations[t][self.env.active_obs], trajectory_return, done, infos
 
     def render(self, mode='human', **kwargs):
         """Only set render options here, such that they can be used during the rollout.
@@ -102,7 +124,7 @@ class MPWrapper(gym.Wrapper, ABC):
         raise NotImplementedError()
 
     @abstractmethod
-    def initialize_mp(self, num_dof: int, duration: float, dt: float, **kwargs):
+    def initialize_mp(self, num_dof: int, duration: float, **kwargs):
         """
         Create respective instance of MP
         Returns:
diff --git a/alr_envs/utils/policies.py b/alr_envs/utils/policies.py
index e8874d6..3c0dbf8 100644
--- a/alr_envs/utils/policies.py
+++ b/alr_envs/utils/policies.py
@@ -1,10 +1,12 @@
+from typing import Tuple, Union
+
 from gym import Env
 
-from alr_envs.mujoco.alr_mujoco_env import AlrMujocoEnv
+from alr_envs.utils.positional_env import PositionalEnv
 
 
 class BaseController:
-    def __init__(self, env: Env):
+    def __init__(self, env: Env, **kwargs):
         self.env = env
 
     def get_action(self, des_pos, des_vel):
@@ -22,27 +24,38 @@ class VelController(BaseController):
 
 
 class PDController(BaseController):
-    def __init__(self, env: AlrMujocoEnv):
-        self.p_gains = env.p_gains
-        self.d_gains = env.d_gains
-        super(PDController, self).__init__(env)
+    """
+    A PD-Controller. Using position and velocity information from a provided positional environment,
+    the controller calculates a response based on the desired position and velocity
+
+    :param env: A position environment
+    :param p_gains: Factors for the proportional gains
+    :param d_gains: Factors for the differential gains
+    """
+    def __init__(self,
+                 env: PositionalEnv,
+                 p_gains: Union[float, Tuple],
+                 d_gains: Union[float, Tuple]):
+        self.p_gains = p_gains
+        self.d_gains = d_gains
+        super(PDController, self).__init__(env, )
 
     def get_action(self, des_pos, des_vel):
-        # TODO: make standardized ALRenv such that all of them have current_pos/vel attributes
         cur_pos = self.env.current_pos
         cur_vel = self.env.current_vel
-        if len(des_pos) != len(cur_pos):
-            des_pos = self.env.extend_des_pos(des_pos)
-        if len(des_vel) != len(cur_vel):
-            des_vel = self.env.extend_des_vel(des_vel)
+        assert des_pos.shape != cur_pos.shape, \
+            "Mismatch in dimension between desired position {} and current position {}".format(des_pos.shape, cur_pos.shape)
+        assert des_vel.shape != cur_vel.shape, \
+            "Mismatch in dimension between desired velocity {} and current velocity {}".format(des_vel.shape,
+                                                                                               cur_vel.shape)
         trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
         return trq
 
 
-def get_policy_class(policy_type):
+def get_policy_class(policy_type, env, mp_kwargs, **kwargs):
     if policy_type == "motor":
-        return PDController
+        return PDController(env, p_gains=mp_kwargs['p_gains'], d_gains=mp_kwargs['d_gains'])
     elif policy_type == "velocity":
-        return VelController
+        return VelController(env)
     elif policy_type == "position":
-        return PosController
+        return PosController(env)
diff --git a/alr_envs/utils/positional_env.py b/alr_envs/utils/positional_env.py
new file mode 100644
index 0000000..37473df
--- /dev/null
+++ b/alr_envs/utils/positional_env.py
@@ -0,0 +1,22 @@
+from abc import abstractmethod
+from typing import Union, Tuple
+
+import numpy as np
+from gym import Env
+
+class PositionalEnv(Env):
+    """A position and velocity based environment. It functions just as any regular OpenAI Gym
+    environment but it provides position, velocity and acceleration information. This usually means that the
+    corresponding information from the agent is forwarded via the properties.
+    PD-Controller based policies require this environment to calculate the state dependent actions for example.
+    """
+
+    @property
+    @abstractmethod
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        raise NotImplementedError