commit last version of

2022-05-05 18:50:20 +02:00 · 2022-05-05 18:50:20 +02:00 · 2cc1ab759c
commit 2cc1ab759c
parent a26b9f463b
11 changed files with 250 additions and 129 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -391,8 +391,7 @@ register(
        max_episode_steps=600,
        kwargs={
            "rndm_goal": False,
-            "cup_goal_pos": [0.1, -2.0],
-            "learn_release_step": True
+            "cup_goal_pos": [0.1, -2.0]
        }
    )

@ -404,8 +403,7 @@ register(
        max_episode_steps=600,
        kwargs={
            "rndm_goal": True,
-            "cup_goal_pos": [-0.3, -1.2],
-            "learn_release_step": True
+            "cup_goal_pos": [-0.3, -1.2]
        }
    )

@ -646,7 +644,7 @@ for _v in _versions:
                },
            "movement_primitives_kwargs": {
                'movement_primitives_type': 'promp',
-                'num_dof': 7
+                'action_dim': 7
                },
            "phase_generator_kwargs": {
                'phase_generator_type': 'linear',
@ -658,7 +656,9 @@ for _v in _versions:
            "controller_kwargs": {
                'controller_type': 'motor',
                "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
+                # "p_gains": 0.125*np.array([200, 300, 100, 100, 10, 10, 2.5]),
                "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
+                # "d_gains": 0.025*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]),
                },
            "basis_generator_kwargs": {
                'basis_generator_type': 'zero_rbf',
@ -753,29 +753,92 @@ for _v in _versions:
    )
    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)

+# ## HopperJump
+# _versions = ["v0", "v1"]
+# for _v in _versions:
+#     _env_id = f'ALRHopperJumpProMP-{_v}'
+#     register(
+#         id= _env_id,
+#         entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+#         kwargs={
+#             "name": f"alr_envs:ALRHopperJump-{_v}",
+#             "wrappers": [mujoco.hopper_jump.MPWrapper],
+#             "mp_kwargs": {
+#                 "num_dof": 3,
+#                 "num_basis": 5,
+#                 "duration": 2,
+#                 "post_traj_time": 0,
+#                 "policy_type": "motor",
+#                 "weights_scale": 1.0,
+#                 "zero_start": True,
+#                 "zero_goal": False,
+#                 "policy_kwargs": {
+#                     "p_gains": np.ones(3),
+#                     "d_gains": 0.1*np.ones(3)
+#                 }
+#             }
+#         }
+#     )
+#     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+# ## HopperJump
+# register(
+#     id= "ALRHopperJumpProMP-v2",
+#     entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+#     kwargs={
+#         "name": f"alr_envs:ALRHopperJump-v2",
+#         "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
+#         "mp_kwargs": {
+#             "num_dof": 3,
+#             "num_basis": 5,
+#             "duration": 2,
+#             "post_traj_time": 0,
+#             "policy_type": "motor",
+#             "weights_scale": 1.0,
+#             "zero_start": True,
+#             "zero_goal": False,
+#             "policy_kwargs": {
+#                 "p_gains": np.ones(3),
+#                 "d_gains": 0.1*np.ones(3)
+#             }
+#         }
+#     }
+# )
+# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
+
 ## HopperJump
 _versions = ["v0", "v1"]
 for _v in _versions:
    _env_id = f'ALRHopperJumpProMP-{_v}'
    register(
        id= _env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
        kwargs={
            "name": f"alr_envs:ALRHopperJump-{_v}",
-            "wrappers": [mujoco.hopper_jump.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 3,
-                "num_basis": 5,
-                "duration": 2,
-                "post_traj_time": 0,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": np.ones(3),
-                    "d_gains": 0.1*np.ones(3)
-                }
+            "wrappers": [mujoco.hopper_jump.NewMPWrapper],
+            "ep_wrapper_kwargs": {
+                "weight_scale": 1
+            },
+            "movement_primitives_kwargs": {
+                'movement_primitives_type': 'promp',
+                'action_dim': 3
+            },
+            "phase_generator_kwargs": {
+                'phase_generator_type': 'linear',
+                'delay': 0,
+                'tau': 2,  # initial value
+                'learn_tau': False,
+                'learn_delay': False
+            },
+            "controller_kwargs": {
+                'controller_type': 'motor',
+                "p_gains": np.ones(3),
+                "d_gains": 0.1*np.ones(3),
+            },
+            "basis_generator_kwargs": {
+                'basis_generator_type': 'zero_rbf',
+                'num_basis': 5,
+                'num_basis_zero_start': 2
            }
        }
    )
@ -784,25 +847,35 @@ for _v in _versions:
 ## HopperJump
 register(
    id= "ALRHopperJumpProMP-v2",
-    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+    entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
    kwargs={
        "name": f"alr_envs:ALRHopperJump-v2",
-        "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
-        "mp_kwargs": {
-            "num_dof": 3,
-            "num_basis": 5,
-            "duration": 2,
-            "post_traj_time": 0,
-            "policy_type": "motor",
-            "weights_scale": 1.0,
-            "zero_start": True,
-            "zero_goal": False,
-            "policy_kwargs": {
+        "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
+        "ep_wrapper_kwargs": {
+                "weight_scale": 1
+            },
+            "movement_primitives_kwargs": {
+                'movement_primitives_type': 'promp',
+                'action_dim': 3
+            },
+            "phase_generator_kwargs": {
+                'phase_generator_type': 'linear',
+                'delay': 0,
+                'tau': 2,  # initial value
+                'learn_tau': False,
+                'learn_delay': False
+            },
+            "controller_kwargs": {
+                'controller_type': 'motor',
                "p_gains": np.ones(3),
-                "d_gains": 0.1*np.ones(3)
+                "d_gains": 0.1*np.ones(3),
+            },
+            "basis_generator_kwargs": {
+                'basis_generator_type': 'zero_rbf',
+                'num_basis': 5,
+                'num_basis_zero_start': 2
            }
        }
-    }
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")

@ -917,10 +990,3 @@ for _v in _versions:
        }
    )
    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-
-# --------------------- Testing new mp wrapper -----------------------------------------------------
-
-# register(
-#     id='ALRReacherProMP-v0'
-# )
--- a/alr_envs/alr/mujoco/init.py
+++ b/alr_envs/alr/mujoco/init.py
@ -1,4 +1,3 @@
-from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
 from .reacher.balancing import BalancingEnv
 from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
 from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
@ -11,3 +10,4 @@ from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
 from .hopper_throw.hopper_throw import ALRHopperThrowEnv
 from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
 from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
+from .reacher.alr_reacher import ALRReacherEnv
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@ -3,7 +3,6 @@ import os

 import numpy as np
 from gym import utils
-from gym import spaces
 from gym.envs.mujoco import MujocoEnv
 from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward

@ -18,7 +17,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])

 class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
    def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
-                 rndm_goal=False, learn_release_step=True, cup_goal_pos=None):
+                 rndm_goal=False, cup_goal_pos=None):
        cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
        if cup_goal_pos.shape[0]==2:
            cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
@ -43,7 +42,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):

        # self._release_step = 175  # time step of ball release
        # self._release_step = 130  # time step of ball release
-        self._release_step = 100  # time step of ball release
+        self.release_step = 100  # time step of ball release

        self.ep_length = 600  # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
        self.cup_table_id = 10
@ -52,7 +51,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
            self.noise_std = 0.01
        else:
            self.noise_std = 0
-        self.learn_release_step = learn_release_step
        reward_function = BeerPongReward
        self.reward_function = reward_function()

@ -63,13 +61,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
    def start_pos(self):
        return self._start_pos

-    def _set_action_space(self):
-        bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
-        bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
-        low, high = bounds.T
-        self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
-        return self.action_space
-
    @property
    def start_vel(self):
        return self._start_vel
@ -109,21 +100,22 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        return self._get_obs()

    def step(self, a):
-        self._release_step = a[-1] if self.learn_release_step else self._release_step
-        self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \
-            if self.learn_release_step else self._release_step
+        # if a.shape[0] == 8: # we learn also when to release the ball
+        #     self._release_step = a[-1]
+        # self._release_step = np.clip(self._release_step, 50, 250)
+        # self.release_step = 0.5/self.dt
        reward_dist = 0.0
        angular_vel = 0.0
-        applied_action = a[:a.shape[0]-int(self.learn_release_step)]
+        applied_action = a
        reward_ctrl = - np.square(applied_action).sum()
        if self.apply_gravity_comp:
            applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
        try:
            self.do_simulation(applied_action, self.frame_skip)
-            if self._steps < self._release_step:
+            if self._steps < self.release_step:
                self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
                self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
-            elif self._steps == self._release_step and self.add_noise:
+            elif self._steps == self.release_step and self.add_noise:
                 self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
            crash = False
        except mujoco_py.builder.MujocoException:
@ -160,7 +152,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
                     ball_pos=ball_pos,
                     ball_vel=ball_vel,
                     success=success,
-                     is_collided=is_collided, sim_crash=crash)
+                     is_collided=is_collided, sim_crash=crash,
+                     table_contact_first=int(not self.reward_function.ball_ground_contact_first))
        infos.update(reward_infos)

        return ob, reward, done, infos
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@ -162,12 +162,16 @@ class BeerPongReward:
                min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
            reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
                     1e-4 * np.mean(action_cost)
-            if env.learn_release_step and not self.ball_in_cup:
-                too_small = (env._release_step<50)*(env._release_step-50)**2
-                too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
-                reward = reward - too_small -too_big
            # 1e-7*np.mean(action_cost)
+            # release step punishment
+            min_time_bound = 0.1
+            max_time_bound = 1.0
+            release_time = env.release_step*env.dt
+            release_time_rew = int(release_time<min_time_bound)*(-30-10*(release_time-min_time_bound)**2) \
+                                +int(release_time>max_time_bound)*(-30-10*(release_time-max_time_bound)**2)
+            reward += release_time_rew
            success = self.ball_in_cup
+            # print('release time :', release_time)
        else:
            reward = - 1e-2 * action_cost
            # reward = - 1e-4 * action_cost
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@ -21,24 +21,35 @@ class NewMPWrapper(EpisodicWrapper):
            [False]  # env steps
        ])

-    def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
-        if self.env.learn_release_step:
-            return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
-        else:
-            return step_action
+    # def set_mp_action_space(self):
+    #     min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+    #     if self.mp.learn_tau:
+    #         min_action_bounds[0] = 20*self.env.dt
+    #         max_action_bounds[0] = 260*self.env.dt
+    #     mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
+    #                                      dtype=np.float32)
+    #     return mp_action_space
+
+    # def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
+    #     if self.mp.learn_tau:
+    #         return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
+    #     else:
+    #         return step_action

    def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
-        if self.env.learn_release_step:
-            return action[:-1], action[-1]  # mp_params, release step
+        if self.mp.learn_tau:
+            self.env.env.release_step = action[0]/self.env.dt         # Tau value
+            # self.env.env.release_step = np.clip(action[0]/self.env.dt, 20, 260)        # Tau value
+            return action, None
        else:
            return action, None

-    def set_action_space(self):
-        if self.env.learn_release_step:
-            min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
-            min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
-            max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
-            self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
-            return self.action_space
-        else:
-            return super(NewMPWrapper, self).set_action_space()
+    # def set_action_space(self):
+    #     if self.mp.learn_tau:
+    #         min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+    #         min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
+    #         max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
+    #         self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
+    #         return self.action_space
+    #     else:
+    #         return super(NewMPWrapper, self).set_action_space()
--- a/alr_envs/alr/mujoco/hopper_jump/init.py
+++ b/alr_envs/alr/mujoco/hopper_jump/init.py
@ -1 +1,2 @@
 from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
+from .new_mp_wrapper import NewMPWrapper, NewHighCtxtMPWrapper
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@ -80,8 +80,6 @@ class ALRHopperJumpEnv(HopperEnv):

    # overwrite reset_model to make it deterministic
    def reset_model(self):
-        noise_low = -self._reset_noise_scale
-        noise_high = self._reset_noise_scale

        qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
        qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
@ -104,14 +102,26 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
    def reset_model(self):
        self._floor_geom_id = self.model.geom_name2id('floor')
        self._foot_geom_id = self.model.geom_name2id('foot_geom')
-        noise_low = -self._reset_noise_scale
-        noise_high = self._reset_noise_scale
+        noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
+        noise_low[1] = 0
+        noise_low[2] = -0.3
+        noise_low[3] = -0.1
+        noise_low[4] = -1.1
+        noise_low[5] = -0.785
+
+        noise_high = np.ones(self.model.nq)*self._reset_noise_scale
+        noise_high[1] = 0
+        noise_high[2] = 0.3
+        noise_high[3] = 0
+        noise_high[4] = 0
+        noise_high[5] = 0.785
+
        rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
-        rnd_vec[2] *= 0.05  # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
+        # rnd_vec[2] *= 0.05  # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
                            # can not recover
-        rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
+        # rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
        qpos = self.init_qpos + rnd_vec
-        qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
+        qvel = self.init_qvel

        self.set_state(qpos, qvel)

@ -167,16 +177,19 @@ if __name__ == '__main__':
    env = ALRHopperJumpRndmPosEnv()
    obs = env.reset()

-    for i in range(2000):
-        # objective.load_result("/tmp/cma")
-        # test with random actions
-        ac = env.action_space.sample()
-        obs, rew, d, info = env.step(ac)
-        # if i % 10 == 0:
-        #     env.render(mode=render_mode)
-        env.render(mode=render_mode)
-        if d:
-            print('After ', i, ' steps, done: ', d)
-            env.reset()
+    for k in range(10):
+        obs = env.reset()
+        print('observation :', obs[:6])
+        for i in range(200):
+            # objective.load_result("/tmp/cma")
+            # test with random actions
+            ac = env.action_space.sample()
+            obs, rew, d, info = env.step(ac)
+            # if i % 10 == 0:
+            #     env.render(mode=render_mode)
+            env.render(mode=render_mode)
+            if d:
+                print('After ', i, ' steps, done: ', d)
+                env.reset()

    env.close()
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@ -0,0 +1,29 @@
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from typing import Union, Tuple
+import numpy as np
+
+
+class NewMPWrapper(EpisodicWrapper):
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.sim.data.qpos[3:6].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.sim.data.qvel[3:6].copy()
+
+    def set_active_obs(self):
+        return np.hstack([
+            [False] * (5 + int(not self.env.exclude_current_positions_from_observation)),  # position
+            [False] * 6,  # velocity
+            [True]
+        ])
+
+
+class NewHighCtxtMPWrapper(NewMPWrapper):
+    def set_active_obs(self):
+        return np.hstack([
+            [True] * (5 + int(not self.env.exclude_current_positions_from_observation)),  # position
+            [False] * 6,  # velocity
+            [False]
+        ])
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@ -68,35 +68,35 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0

-    # def reset_model(self):
-    #     qpos = self.init_qpos
-    #     if not hasattr(self, "goal"):
-    #         self.goal = np.array([-0.25, 0.25])
-    #         # self.goal = self.init_qpos.copy()[:2] + 0.05
-    #     qpos[-2:] = self.goal
-    #     qvel = self.init_qvel
-    #     qvel[-2:] = 0
-    #     self.set_state(qpos, qvel)
-    #     self._steps = 0
-    #
-    #     return self._get_obs()
-
    def reset_model(self):
-        qpos = self.init_qpos.copy()
-        while True:
-            self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
-            # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
-            # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
-            if np.linalg.norm(self.goal) < self.n_links / 10:
-                break
+        qpos = self.init_qpos
+        if not hasattr(self, "goal"):
+            self.goal = np.array([-0.25, 0.25])
+            # self.goal = self.init_qpos.copy()[:2] + 0.05
        qpos[-2:] = self.goal
-        qvel = self.init_qvel.copy()
+        qvel = self.init_qvel
        qvel[-2:] = 0
        self.set_state(qpos, qvel)
        self._steps = 0

        return self._get_obs()

+    # def reset_model(self):
+    #     qpos = self.init_qpos.copy()
+    #     while True:
+    #         self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
+    #         # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
+    #         # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
+    #         if np.linalg.norm(self.goal) < self.n_links / 10:
+    #             break
+    #     qpos[-2:] = self.goal
+    #     qvel = self.init_qvel.copy()
+    #     qvel[-2:] = 0
+    #     self.set_state(qpos, qvel)
+    #     self._steps = 0
+    #
+    #     return self._get_obs()
+
    # def reset_model(self):
    #     qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
    #     while True:
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@ -50,13 +50,10 @@ class EpisodicWrapper(gym.Env, ABC):
        # rendering
        self.render_mode = render_mode
        self.render_kwargs = {}
-        # self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
        self.time_steps = np.linspace(0, self.duration, self.traj_steps)
        self.mp.set_mp_times(self.time_steps)
        # action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
-        min_action_bounds, max_action_bounds = mp.get_param_bounds()
-        self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
-                                           dtype=np.float32)
+        self.mp_action_space = self.set_mp_action_space()

        self.action_space = self.set_action_space()
        self.active_obs = self.set_active_obs()
@ -65,7 +62,7 @@ class EpisodicWrapper(gym.Env, ABC):
                                            dtype=self.env.observation_space.dtype)

    def get_trajectory(self, action: np.ndarray) -> Tuple:
-        # TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at
+        # TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
        #  the beginning of the array.
        ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
        scaled_mp_params = action.copy()
@ -84,6 +81,13 @@ class EpisodicWrapper(gym.Env, ABC):

        return trajectory, velocity

+    def set_mp_action_space(self):
+        """This function can be used to set up an individual space for the parameters of the mp."""
+        min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
+        mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
+                                              dtype=np.float32)
+        return mp_action_space
+
    def set_action_space(self):
        """
        This function can be used to modify the action space for considering actions which are not learned via motion
@ -179,6 +183,7 @@ class EpisodicWrapper(gym.Env, ABC):
            step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
            step_action = self._step_callback(t, env_spec_params, step_action)   # include possible callback info
            c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
+            # print('step/clipped action ratio: ', step_action/c_action)
            obs, c_reward, done, info = self.env.step(c_action)
            if self.verbose >= 2:
                actions[t, :] = c_action
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@ -156,12 +156,11 @@ def make_mp_from_kwargs(
        ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
    if phase_kwargs.get('tau', None) is None:
        phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
-    action_dim = mp_kwargs.pop('num_dof', None)
-    action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
+    mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
    phase_gen = get_phase_generator(**phase_kwargs)
    basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
    controller = get_controller(**controller_kwargs)
-    mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs)
+    mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
    _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
                             ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
    return _env