From b04e7c1dcd813e1ad5238a38f54a05fe1969811e Mon Sep 17 00:00:00 2001
From: philipp <philippbecker93@googlemail.com>
Date: Wed, 13 Jul 2022 13:28:39 +0200
Subject: [PATCH] removed dependency on mujoco-py

---
 alr_envs/envs/mujoco/ant_jump/ant_jump.py     | 15 +++++--
 alr_envs/envs/mujoco/ant_jump/mp_wrapper.py   |  4 +-
 .../assets/beerpong_wo_cup_big_table.xml      |  5 +++
 alr_envs/envs/mujoco/beerpong/beerpong.py     | 39 +++++++++---------
 alr_envs/envs/mujoco/beerpong/mp_wrapper.py   | 10 ++---
 .../half_cheetah_jump/half_cheetah_jump.py    | 10 +++--
 .../mujoco/half_cheetah_jump/mp_wrapper.py    |  4 +-
 .../envs/mujoco/hopper_jump/hopper_jump.py    | 40 +++++++++++++------
 .../mujoco/hopper_jump/hopper_jump_on_box.py  | 10 ++---
 .../envs/mujoco/hopper_jump/mp_wrapper.py     |  4 +-
 .../envs/mujoco/hopper_throw/hopper_throw.py  | 15 +++++--
 .../hopper_throw/hopper_throw_in_basket.py    | 21 ++++++----
 .../envs/mujoco/hopper_throw/mp_wrapper.py    |  4 +-
 alr_envs/envs/mujoco/reacher/mp_wrapper.py    | 10 ++---
 alr_envs/envs/mujoco/reacher/reacher.py       |  5 ++-
 .../envs/mujoco/walker_2d_jump/mp_wrapper.py  |  4 +-
 .../mujoco/walker_2d_jump/walker_2d_jump.py   | 14 +++++--
 17 files changed, 134 insertions(+), 80 deletions(-)

diff --git a/alr_envs/envs/mujoco/ant_jump/ant_jump.py b/alr_envs/envs/mujoco/ant_jump/ant_jump.py
index 74a66a3..7c00225 100644
--- a/alr_envs/envs/mujoco/ant_jump/ant_jump.py
+++ b/alr_envs/envs/mujoco/ant_jump/ant_jump.py
@@ -2,7 +2,7 @@ from typing import Tuple, Union, Optional
 
 import numpy as np
 from gym.core import ObsType
-from gym.envs.mujoco.ant_v3 import AntEnv
+from gym.envs.mujoco.ant_v4 import AntEnv
 
 MAX_EPISODE_STEPS_ANTJUMP = 200
 
@@ -36,9 +36,15 @@ class AntJumpEnv(AntEnv):
         self.current_step = 0
         self.max_height = 0
         self.goal = 0
-        super().__init__(xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy,
-                         healthy_z_range, contact_force_range, reset_noise_scale,
-                         exclude_current_positions_from_observation)
+        super().__init__(xml_file=xml_file,
+                         ctrl_cost_weight=ctrl_cost_weight,
+                         contact_cost_weight=contact_cost_weight,
+                         healthy_reward=healthy_reward,
+                         terminate_when_unhealthy=terminate_when_unhealthy,
+                         healthy_z_range=healthy_z_range,
+                         contact_force_range=contact_force_range,
+                         reset_noise_scale=reset_noise_scale,
+                         exclude_current_positions_from_observation=exclude_current_positions_from_observation)
 
     def step(self, action):
         self.current_step += 1
@@ -90,6 +96,7 @@ class AntJumpEnv(AntEnv):
 
     # reset_model had to be implemented in every env to make it deterministic
     def reset_model(self):
+        # Todo remove if not needed
         noise_low = -self._reset_noise_scale
         noise_high = self._reset_noise_scale
 
diff --git a/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py b/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py
index a481d6f..c1dfbde 100644
--- a/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py
+++ b/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py
@@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray]:
-        return self.env.sim.data.qpos[7:15].copy()
+        return self.data.qpos[7:15].copy()
 
     @property
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.sim.data.qvel[6:14].copy()
+        return self.data.qvel[6:14].copy()
diff --git a/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
index 756e3d2..4497818 100644
--- a/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
+++ b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
@@ -185,4 +185,9 @@
         <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
     </actuator>
 
+    <sensor>
+        <framelinvel name="init_ball_vel" objtype="site" objname="init_ball_pos" />
+    </sensor>
+
+
 </mujoco>
diff --git a/alr_envs/envs/mujoco/beerpong/beerpong.py b/alr_envs/envs/mujoco/beerpong/beerpong.py
index b49c180..60d442a 100644
--- a/alr_envs/envs/mujoco/beerpong/beerpong.py
+++ b/alr_envs/envs/mujoco/beerpong/beerpong.py
@@ -1,7 +1,6 @@
 import os
 from typing import Optional
 
-import mujoco_py.builder
 import numpy as np
 from gym import utils
 from gym.envs.mujoco import MujocoEnv
@@ -50,9 +49,9 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
         self.repeat_action = frame_skip
         # TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
         self.model = None
-        self.site_id = lambda x: self.model.site_name2id(x)
-        self.body_id = lambda x: self.model.body_name2id(x)
-        self.geom_id = lambda x: self.model.geom_name2id(x)
+        self.geom_id = lambda x: self._mujoco_bindings.mj_name2id(self.model,
+                                                                  self._mujoco_bindings.mjtObj.mjOBJ_GEOM,
+                                                                  x)
 
         # for reward calculation
         self.dists = []
@@ -65,7 +64,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
         self.ball_in_cup = False
         self.dist_ground_cup = -1  # distance floor to cup if first floor contact
 
-        MujocoEnv.__init__(self, self.xml_path, frame_skip=1, mujoco_bindings="mujoco_py")
+        MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=1, mujoco_bindings="mujoco")
         utils.EzPickle.__init__(self)
 
     @property
@@ -100,13 +99,13 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
 
         # TODO: Ask Max why we need to set the state twice.
         self.set_state(start_pos, init_vel)
-        start_pos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy()
+        start_pos[7::] = self.data.site("init_ball_pos").xpos.copy()
         self.set_state(start_pos, init_vel)
         xy = self.np_random.uniform(self._cup_pos_min, self._cup_pos_max)
         xyz = np.zeros(3)
         xyz[:2] = xy
         xyz[-1] = 0.840
-        self.model.body_pos[self.body_id("cup_table")] = xyz
+        self.model.body("cup_table").pos[:] = xyz
         return self._get_obs()
 
     def step(self, a):
@@ -117,10 +116,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
                 self.do_simulation(applied_action, self.frame_skip)
                 # self.reward_function.check_contacts(self.sim)   # I assume this is not important?
                 if self._steps < self.release_step:
-                    self.data.qpos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy()
-                    self.data.qvel[7::] = self.data.site_xvelp[self.site_id("init_ball_pos"), :].copy()
+                    self.data.qpos[7::] = self.data.site('init_ball_pos').xpos.copy()
+                    self.data.qvel[7::] = self.data.sensor('init_ball_vel').data.copy()
                 crash = False
-            except mujoco_py.builder.MujocoException:
+            except Exception as e:
                 crash = True
 
         ob = self._get_obs()
@@ -145,18 +144,18 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
         return ob, reward, done, infos
 
     def _get_obs(self):
-        theta = self.data.qpos.flat[:7]
-        theta_dot = self.data.qvel.flat[:7]
-        ball_pos = self.data.get_body_xpos("ball").copy()
-        cup_goal_diff_final = ball_pos - self.data.get_site_xpos("cup_goal_final_table").copy()
-        cup_goal_diff_top = ball_pos - self.data.get_site_xpos("cup_goal_table").copy()
+        theta = self.data.qpos.flat[:7].copy()
+        theta_dot = self.data.qvel.flat[:7].copy()
+        ball_pos = self.data.qpos.flat[7:].copy()
+        cup_goal_diff_final = ball_pos - self.data.site("cup_goal_final_table").xpos.copy()
+        cup_goal_diff_top = ball_pos - self.data.site("cup_goal_table").xpos.copy()
         return np.concatenate([
             np.cos(theta),
             np.sin(theta),
             theta_dot,
             cup_goal_diff_final,
             cup_goal_diff_top,
-            self.model.body_pos[self.body_id("cup_table")][:2].copy(),
+            self.model.body("cup_table").pos[:2].copy(),
             # [self._steps],  # Use TimeAwareObservation Wrapper instead ....
         ])
 
@@ -165,10 +164,10 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
         return super(BeerPongEnv, self).dt * self.repeat_action
 
     def _get_reward(self, action):
-        goal_pos = self.data.get_site_xpos("cup_goal_table")
-        ball_pos = self.data.get_body_xpos("ball")
-        ball_vel = self.data.get_body_xvelp("ball")
-        goal_final_pos = self.data.get_site_xpos("cup_goal_final_table")
+        goal_pos = self.data.site("cup_goal_table").xpos
+        goal_final_pos = self.data.site("cup_goal_final_table").xpos
+        ball_pos = self.data.qpos.flat[7:].copy()
+        ball_vel = self.data.qvel.flat[7:].copy()
 
         self._check_contacts()
         self.dists.append(np.linalg.norm(goal_pos - ball_pos))
diff --git a/alr_envs/envs/mujoco/beerpong/mp_wrapper.py b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py
index 5b53e77..22c50f7 100644
--- a/alr_envs/envs/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py
@@ -21,16 +21,16 @@ class MPWrapper(RawInterfaceWrapper):
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.data.qpos[0:7].copy()
+        return self.data.qpos[0:7].copy()
 
     @property
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.data.qvel[0:7].copy()
+        return self.data.qvel[0:7].copy()
 
     # TODO: Fix this
     def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
         if mp.learn_tau:
-            self.env.env.release_step = action[0] / self.env.dt  # Tau value
+            self.release_step = action[0] / self.dt  # Tau value
             return action, None
         else:
             return action, None
@@ -39,5 +39,5 @@ class MPWrapper(RawInterfaceWrapper):
         xyz = np.zeros(3)
         xyz[:2] = context
         xyz[-1] = 0.840
-        self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
-        return self.get_observation_from_step(self.env.env._get_obs())
+        self.model.body_pos[self.cup_table_id] = xyz
+        return self.get_observation_from_step(self.get_obs())
diff --git a/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py
index 7916d0c..3d948ac 100644
--- a/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py
+++ b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py
@@ -2,7 +2,7 @@ import os
 from typing import Tuple, Union, Optional
 
 from gym.core import ObsType
-from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
+from gym.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
 import numpy as np
 
 MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100
@@ -27,8 +27,11 @@ class HalfCheetahJumpEnv(HalfCheetahEnv):
         self.goal = 0
         self.context = context
         xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
-        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale,
-                         exclude_current_positions_from_observation)
+        super().__init__(xml_file=xml_file,
+                         forward_reward_weight=forward_reward_weight,
+                         ctrl_cost_weight=ctrl_cost_weight,
+                         reset_noise_scale=reset_noise_scale,
+                         exclude_current_positions_from_observation=exclude_current_positions_from_observation)
 
     def step(self, action):
 
@@ -74,6 +77,7 @@ class HalfCheetahJumpEnv(HalfCheetahEnv):
 
     # overwrite reset_model to make it deterministic
     def reset_model(self):
+        # TODO remove if not needed!
         noise_low = -self._reset_noise_scale
         noise_high = self._reset_noise_scale
 
diff --git a/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py
index 298bf53..9681b3a 100644
--- a/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py
+++ b/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py
@@ -15,8 +15,8 @@ class MPWrapper(RawInterfaceWrapper):
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray]:
-        return self.env.sim.data.qpos[3:9].copy()
+        return self.data.qpos[3:9].copy()
 
     @property
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.sim.data.qvel[3:9].copy()
+        return self.data.qvel[3:9].copy()
diff --git a/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py
index 74643f3..5a334a2 100644
--- a/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py
@@ -2,8 +2,7 @@ import copy
 import os
 
 import numpy as np
-from gym.envs.mujoco.hopper_v3 import HopperEnv
-
+from gym.envs.mujoco.hopper_v4 import HopperEnv
 MAX_EPISODE_STEPS_HOPPERJUMP = 250
 
 
@@ -50,9 +49,16 @@ class HopperJumpEnv(HopperEnv):
         self.contact_dist = None
 
         xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
-        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
-                         healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
-                         exclude_current_positions_from_observation)
+        super().__init__(xml_file=xml_file,
+                         forward_reward_weight=forward_reward_weight,
+                         ctrl_cost_weight=ctrl_cost_weight,
+                         healthy_reward=healthy_reward,
+                         terminate_when_unhealthy=terminate_when_unhealthy,
+                         healthy_state_range=healthy_state_range,
+                         healthy_z_range=healthy_z_range,
+                         healthy_angle_range=healthy_angle_range,
+                         reset_noise_scale=reset_noise_scale,
+                         exclude_current_positions_from_observation=exclude_current_positions_from_observation)
 
         # increase initial height
         self.init_qpos[1] = 1.5
@@ -67,7 +73,8 @@ class HopperJumpEnv(HopperEnv):
         self.do_simulation(action, self.frame_skip)
 
         height_after = self.get_body_com("torso")[2]
-        site_pos_after = self.data.get_site_xpos('foot_site')
+        #site_pos_after = self.data.get_site_xpos('foot_site')
+        site_pos_after = self.data.site('foot_site').xpos
         self.max_height = max(height_after, self.max_height)
 
         has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
@@ -111,7 +118,8 @@ class HopperJumpEnv(HopperEnv):
         return observation, reward, done, info
 
     def _get_obs(self):
-        goal_dist = self.data.get_site_xpos('foot_site') - self.goal
+        # goal_dist = self.data.get_site_xpos('foot_site') - self.goal
+        goal_dist = self.data.site('foot_site').xpos - self.goal
         return np.concatenate((super(HopperJumpEnv, self)._get_obs(), goal_dist.copy(), self.goal[:1]))
 
     def reset_model(self):
@@ -119,7 +127,8 @@ class HopperJumpEnv(HopperEnv):
 
         # self.goal = self.np_random.uniform(0.3, 1.35, 1)[0]
         self.goal = np.concatenate([self.np_random.uniform(0.3, 1.35, 1), np.zeros(2, )])
-        self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = self.goal
+        # self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = self.goal
+        self.data.body('goal_site_body').xpos[:] = np.copy(self.goal)
         self.max_height = 0
         self._steps = 0
 
@@ -153,8 +162,15 @@ class HopperJumpEnv(HopperEnv):
         return observation
 
     def _is_floor_foot_contact(self):
-        floor_geom_id = self.model.geom_name2id('floor')
-        foot_geom_id = self.model.geom_name2id('foot_geom')
+        # floor_geom_id = self.model.geom_name2id('floor')
+        # foot_geom_id = self.model.geom_name2id('foot_geom')
+        # TODO: do this properly over a sensor in the xml file, see dmc hopper
+        floor_geom_id = self._mujoco_bindings.mj_name2id(self.model,
+                                                         self._mujoco_bindings.mjtObj.mjOBJ_GEOM,
+                                                         'floor')
+        foot_geom_id = self._mujoco_bindings.mj_name2id(self.model,
+                                                        self._mujoco_bindings.mjtObj.mjOBJ_GEOM,
+                                                        'foot_geom')
         for i in range(self.data.ncon):
             contact = self.data.contact[i]
             collision = contact.geom1 == floor_geom_id and contact.geom2 == foot_geom_id
@@ -163,7 +179,7 @@ class HopperJumpEnv(HopperEnv):
                 return True
         return False
 
-
+# TODO is that needed? if so test it
 class HopperJumpStepEnv(HopperJumpEnv):
 
     def __init__(self,
@@ -193,7 +209,7 @@ class HopperJumpStepEnv(HopperJumpEnv):
         self.do_simulation(action, self.frame_skip)
 
         height_after = self.get_body_com("torso")[2]
-        site_pos_after = self.data.get_site_xpos('foot_site')
+        site_pos_after = self.data.site('foot_site').xpos.copy()
         self.max_height = max(height_after, self.max_height)
 
         ctrl_cost = self.control_cost(action)
diff --git a/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py
index 845edaa..e184723 100644
--- a/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py
+++ b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py
@@ -1,4 +1,4 @@
-from gym.envs.mujoco.hopper_v3 import HopperEnv
+from gym.envs.mujoco.hopper_v4 import HopperEnv
 
 import numpy as np
 import os
@@ -46,7 +46,7 @@ class HopperJumpOnBoxEnv(HopperEnv):
         foot_pos = self.get_body_com("foot")
         self.max_height = max(height_after, self.max_height)
 
-        vx, vz, vangle = self.sim.data.qvel[0:3]
+        vx, vz, vangle = self.data.qvel[0:3]
 
         s = self.state_vector()
         fell_over = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height_after > .7))
@@ -67,7 +67,8 @@ class HopperJumpOnBoxEnv(HopperEnv):
         is_on_box_y = True  # is y always true because he can only move in x and z direction?
         is_on_box_z = box_height - 0.02 <= foot_pos[2] <= box_height + 0.02
         is_on_box = is_on_box_x and is_on_box_y and is_on_box_z
-        if is_on_box: self.hopper_on_box = True
+        if is_on_box:
+            self.hopper_on_box = True
 
         ctrl_cost = self.control_cost(action)
 
@@ -133,9 +134,8 @@ class HopperJumpOnBoxEnv(HopperEnv):
         self.current_step = 0
         self.hopper_on_box = False
         if self.context:
-            box_id = self.sim.model.body_name2id("box")
             self.box_x = self.np_random.uniform(1, 3, 1)
-            self.sim.model.body_pos[box_id] = [self.box_x, 0, 0]
+            self.model.body("box").pos = [self.box_x[0], 0, 0]
         return super().reset()
 
     # overwrite reset_model to make it deterministic
diff --git a/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py
index 8e91bea..0f5e8a8 100644
--- a/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py
+++ b/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py
@@ -20,8 +20,8 @@ class MPWrapper(RawInterfaceWrapper):
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.sim.data.qpos[3:6].copy()
+        return self.data.qpos[3:6].copy()
 
     @property
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.sim.data.qvel[3:6].copy()
+        return self.data.qvel[3:6].copy()
diff --git a/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py
index c2bda19..3b2ce46 100644
--- a/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py
+++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py
@@ -1,7 +1,7 @@
 import os
 from typing import Optional
 
-from gym.envs.mujoco.hopper_v3 import HopperEnv
+from gym.envs.mujoco.hopper_v4 import HopperEnv
 import numpy as np
 
 MAX_EPISODE_STEPS_HOPPERTHROW = 250
@@ -36,9 +36,16 @@ class HopperThrowEnv(HopperEnv):
         self.max_episode_steps = max_episode_steps
         self.context = context
         self.goal = 0
-        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
-                         healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
-                         exclude_current_positions_from_observation)
+        super().__init__(xml_file=xml_file,
+                         forward_reward_weight=forward_reward_weight,
+                         ctrl_cost_weight=ctrl_cost_weight,
+                         healthy_reward=healthy_reward,
+                         terminate_when_unhealthy=terminate_when_unhealthy,
+                         healthy_angle_range=healthy_state_range,
+                         healthy_z_range=healthy_z_range,
+                         healthy_state_range=healthy_angle_range,
+                         reset_noise_scale=reset_noise_scale,
+                         exclude_current_positions_from_observation=exclude_current_positions_from_observation)
 
     def step(self, action):
         self.current_step += 1
diff --git a/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py
index b1fa3ef..fb46809 100644
--- a/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py
+++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py
@@ -1,7 +1,7 @@
 import os
 from typing import Optional
 
-from gym.envs.mujoco.hopper_v3 import HopperEnv
+from gym.envs.mujoco.hopper_v4 import HopperEnv
 
 import numpy as np
 
@@ -44,9 +44,16 @@ class HopperThrowInBasketEnv(HopperEnv):
         self.penalty = penalty
         self.basket_x = 5
         xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
-        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
-                         healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
-                         exclude_current_positions_from_observation)
+        super().__init__(xml_file=xml_file,
+                         forward_reward_weight=forward_reward_weight,
+                         ctrl_cost_weight=ctrl_cost_weight,
+                         healthy_reward=healthy_reward,
+                         terminate_when_unhealthy=terminate_when_unhealthy,
+                         healthy_state_range=healthy_state_range,
+                         healthy_z_range=healthy_z_range,
+                         healthy_angle_range=healthy_angle_range,
+                         reset_noise_scale=reset_noise_scale,
+                         exclude_current_positions_from_observation=exclude_current_positions_from_observation)
 
     def step(self, action):
 
@@ -109,13 +116,13 @@ class HopperThrowInBasketEnv(HopperEnv):
         self.current_step = 0
         self.ball_in_basket = False
         if self.context:
-            basket_id = self.sim.model.body_name2id("basket_ground")
-            self.basket_x = self.np_random.uniform(3, 7, 1)
-            self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
+            self.basket_x = self.np_random.uniform(low=3, high=7, size=1)
+            self.model.body("basket_ground").pos[:] = [self.basket_x[0], 0, 0]
         return super().reset()
 
     # overwrite reset_model to make it deterministic
     def reset_model(self):
+        # Todo remove if not needed!
         noise_low = -self._reset_noise_scale
         noise_high = self._reset_noise_scale
 
diff --git a/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py
index 78c2f51..247668f 100644
--- a/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py
+++ b/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py
@@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray]:
-        return self.env.data.qpos[3:6].copy()
+        return self.data.qpos[3:6].copy()
 
     @property
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.data.qvel[3:6].copy()
+        return self.data.qvel[3:6].copy()
diff --git a/alr_envs/envs/mujoco/reacher/mp_wrapper.py b/alr_envs/envs/mujoco/reacher/mp_wrapper.py
index 2ef7c78..1c1e2f1 100644
--- a/alr_envs/envs/mujoco/reacher/mp_wrapper.py
+++ b/alr_envs/envs/mujoco/reacher/mp_wrapper.py
@@ -10,18 +10,18 @@ class MPWrapper(RawInterfaceWrapper):
 
     @property
     def context_mask(self):
-        return np.concatenate([[False] * self.env.n_links,  # cos
-                               [False] * self.env.n_links,  # sin
+        return np.concatenate([[False] * self.n_links,  # cos
+                               [False] * self.n_links,  # sin
                                [True] * 2,  # goal position
-                               [False] * self.env.n_links,  # angular velocity
+                               [False] * self.n_links,  # angular velocity
                                [False] * 3,  # goal distance
                                # [False],  # step
                                ])
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.data.qpos.flat[:self.env.n_links]
+        return self.data.qpos.flat[:self.n_links]
 
     @property
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.data.qvel.flat[:self.env.n_links]
+        return self.data.qvel.flat[:self.n_links]
diff --git a/alr_envs/envs/mujoco/reacher/reacher.py b/alr_envs/envs/mujoco/reacher/reacher.py
index 3ebc6e8..3d52be7 100644
--- a/alr_envs/envs/mujoco/reacher/reacher.py
+++ b/alr_envs/envs/mujoco/reacher/reacher.py
@@ -23,7 +23,10 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
 
         file_name = f'reacher_{n_links}links.xml'
 
-        MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
+        MujocoEnv.__init__(self,
+                           model_path=os.path.join(os.path.dirname(__file__), "assets", file_name),
+                           frame_skip=2,
+                           mujoco_bindings="mujoco")
 
     def step(self, action):
         self._steps += 1
diff --git a/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py
index 63432a7..f91d494 100644
--- a/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py
+++ b/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py
@@ -16,8 +16,8 @@ class MPWrapper(RawInterfaceWrapper):
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray]:
-        return self.env.data.qpos[3:9].copy()
+        return self.data.qpos[3:9].copy()
 
     @property
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
-        return self.env.data.qvel[3:9].copy()
+        return self.data.qvel[3:9].copy()
diff --git a/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py
index 76cb688..dadf8fd 100644
--- a/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py
+++ b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py
@@ -1,7 +1,7 @@
 import os
 from typing import Optional
 
-from gym.envs.mujoco.walker2d_v3 import Walker2dEnv
+from gym.envs.mujoco.walker2d_v4 import Walker2dEnv
 import numpy as np
 
 MAX_EPISODE_STEPS_WALKERJUMP = 300
@@ -36,9 +36,15 @@ class Walker2dJumpEnv(Walker2dEnv):
         self._penalty = penalty
         self.goal = 0
         xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
-        super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
-                         healthy_z_range, healthy_angle_range, reset_noise_scale,
-                         exclude_current_positions_from_observation)
+        super().__init__(xml_file=xml_file,
+                         forward_reward_weight=forward_reward_weight,
+                         ctrl_cost_weight=ctrl_cost_weight,
+                         healthy_reward=healthy_reward,
+                         terminate_when_unhealthy=terminate_when_unhealthy,
+                         healthy_z_range=healthy_z_range,
+                         healthy_angle_range=healthy_angle_range,
+                         reset_noise_scale=reset_noise_scale,
+                         exclude_current_positions_from_observation=exclude_current_positions_from_observation)
 
     def step(self, action):
         self.current_step += 1