diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py
index a40c81f..8c0fa23 100644
--- a/fancy_gym/envs/__init__.py
+++ b/fancy_gym/envs/__init__.py
@@ -289,3 +289,25 @@ register(
         'goal_switching_step': 99
     }
 )
+
+# Air Hockey environments
+for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]:
+    register(
+        id=f'fancy/AirHockey-{env_mode}-v0',
+        entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
+        max_episode_steps=500,
+        add_mp_types=[],
+        kwargs={
+            'env_mode': env_mode
+        }
+    )
+
+register(
+    id=f'fancy/AirHockey-tournament-v0',
+    entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
+    max_episode_steps=15000,
+    add_mp_types=[],
+    kwargs={
+        'env_mode': 'tournament'
+    }
+)
diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
index 4e5caaf..70b9bfd 100644
--- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
+++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
@@ -178,9 +178,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
 
         return False
 
-    def render(self, mode=None):
-        if mode==None:
-            mode = self.render_mode
+    def render(self):
         if self.fig is None:
             # Create base figure once on the beginning. Afterwards only update
             plt.ion()
@@ -199,7 +197,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
         self.fig.gca().set_title(
             f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}")
 
-        if mode == "human":
+        if self.render_mode == "human":
 
             # arm
             self.line.set_data(self._joints[:, 0], self._joints[:, 1])
@@ -207,7 +205,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
             self.fig.canvas.draw()
             self.fig.canvas.flush_events()
 
-        elif mode == "partial":
+        elif self.render_mode == "partial":
             if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
                 # Arm
                 plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
index 40a8153..9264b39 100644
--- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
+++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
@@ -98,9 +98,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
     def _check_collisions(self) -> bool:
         return self._check_self_collision()
 
-    def render(self, mode=None):  # pragma: no cover
-        if mode==None:
-            mode = self.render_mode
+    def render(self):  # pragma: no cover
         if self.fig is None:
             # Create base figure once on the beginning. Afterwards only update
             plt.ion()
diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
index 932f50a..4003319 100644
--- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
+++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
@@ -123,9 +123,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
     def _check_collisions(self) -> bool:
         return self._check_self_collision()
 
-    def render(self, mode=None):
-        if mode==None:
-            mode = self.render_mode
+    def render(self):
         goal_pos = self._goal.T
         via_pos = self._via_point.T
 
@@ -148,7 +146,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
 
         self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
 
-        if mode == "human":
+        if self.render_mode == "human":
             # goal
             if self._steps == 1:
                 self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
@@ -160,7 +158,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
             self.fig.canvas.draw()
             self.fig.canvas.flush_events()
 
-        elif mode == "partial":
+        elif self.render_mode == "partial":
             if self._steps == 1:
                 # fig, ax = plt.subplots()
                 # Add the patch to the Axes
@@ -180,7 +178,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
                 plt.ylim([-1.1, lim])
                 plt.pause(0.01)
 
-        elif mode == "final":
+        elif self.render_mode == "final":
             if self._steps == 199 or self._is_collided:
                 # fig, ax = plt.subplots()
 
diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py
index ff51711..4afdcce 100644
--- a/fancy_gym/envs/mujoco/__init__.py
+++ b/fancy_gym/envs/mujoco/__init__.py
@@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv
 from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
 from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
 from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching
+
+try:
+    from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv
+except ModuleNotFoundError:
+    print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)")
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/LICENSE b/fancy_gym/envs/mujoco/air_hockey/LICENSE
new file mode 100644
index 0000000..27c6bbb
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/LICENSE
@@ -0,0 +1,26 @@
+MIT License
+
+Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+---
+
+This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge).
+The changes are mostly focused on adapting the provided environments to fancy_gym.
diff --git a/fancy_gym/envs/mujoco/air_hockey/__init__.py b/fancy_gym/envs/mujoco/air_hockey/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
new file mode 100644
index 0000000..0e8e31e
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
@@ -0,0 +1,180 @@
+from copy import deepcopy
+import numpy as np
+from gymnasium import spaces
+
+import fancy_gym.envs.mujoco.air_hockey.constraints as constraints
+from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position
+from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
+from mushroom_rl.core import Environment
+
+class AirHockeyEnv(Environment):
+    metadata = {"render_modes": ["human"], "render_fps": 50}
+
+    def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs):
+        """
+        Environment Constructor
+
+        Args:
+            env [string]:
+                The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament].
+            interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
+                    polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
+                    Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
+                    of position, velocity and acceleration of the shape (20, 3, n_joints)
+        """
+
+        env_dict = {
+            "tournament": position.IiwaPositionTournament,
+
+            "7dof-hit": position.IiwaPositionHit,
+            "7dof-defend": position.IiwaPositionDefend,
+
+            "3dof-hit": position.PlanarPositionHit,
+            "3dof-defend": position.PlanarPositionDefend
+        }
+
+        if env_mode not in env_dict:
+            raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!")
+
+        if env_mode == "tournament" and type(interpolation_order) != tuple:
+            interpolation_order = (interpolation_order, interpolation_order)
+
+        self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
+        self.env_name = env_mode
+        self.env_info = self.base_env.env_info
+        single_robot_obs_size = len(self.base_env.info.observation_space.low)
+        if env_mode == "tournament":
+            self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
+        else:
+            self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
+        robot_info = self.env_info["robot"]
+
+        if env_mode != "tournament":
+            if interpolation_order in [1, 2]:
+                self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
+            if interpolation_order in [3, 4, -1]:
+                self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
+            if interpolation_order in [5]:
+                self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
+        else:
+            acts = [None, None]
+            for i in range(2):
+                if interpolation_order[i] in [1, 2]:
+                    acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
+                if interpolation_order[i] in [3, 4, -1]:
+                    acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
+                if interpolation_order[i] in [5]:
+                    acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
+            self.action_space = spaces.Tuple((acts[0], acts[1]))
+
+        constraint_list = constraints.ConstraintList()
+        constraint_list.add(constraints.JointPositionConstraint(self.env_info))
+        constraint_list.add(constraints.JointVelocityConstraint(self.env_info))
+        constraint_list.add(constraints.EndEffectorConstraint(self.env_info))
+        if "7dof" in self.env_name or self.env_name == "tournament":
+            constraint_list.add(constraints.LinkConstraint(self.env_info))
+
+        self.env_info['constraints'] = constraint_list
+        self.env_info['env_name'] = self.env_name
+
+        self.render_mode = render_mode
+        self.render_human_active = False
+
+        super().__init__(self.base_env.info)
+
+    def step(self, action):
+        obs, reward, done, info = self.base_env.step(action)
+
+        if "tournament" in self.env_name:
+            info["constraints_value"] = list()
+            info["jerk"] = list()
+            for i in range(2):
+                obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)]
+                info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun(
+                    obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']])))
+                info["jerk"].append(
+                    self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][
+                        'n_joints']])
+
+            info["score"] = self.base_env.score
+            info["faults"] = self.base_env.faults
+
+        else:
+            info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']],
+                                                                                  obs[self.env_info['joint_vel_ids']]))
+            info["jerk"] = self.base_env.jerk
+            info["success"] = self.check_success(obs)
+
+        if self.env_info['env_name'] == "tournament":
+            obs = np.array(np.split(obs, 2))
+        
+        if self.render_human_active:
+            self.base_env.render()
+
+        return obs, reward, done, False, info
+
+    def render(self):
+        self.render_human_active = True
+
+    def reset(self, seed=None, options={}):
+        self.base_env.seed(seed)
+        obs = self.base_env.reset()
+        if self.env_info['env_name'] == "tournament":
+            obs = np.array(np.split(obs, 2))
+        return obs, {}
+
+    def check_success(self, obs):
+        puck_pos, puck_vel = self.base_env.get_puck(obs)
+
+        puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos)
+        success = 0
+
+        if "hit" in self.env_name:
+            if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \
+                    np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0:
+                success = 1
+
+        elif "defend" in self.env_name:
+            if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1:
+                success = 1
+
+        elif "prepare" in self.env_name:
+            if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1:
+                success = 1
+        return success
+
+    @property
+    def unwrapped(self):
+        return self
+    
+    def close(self):
+        self.base_env.stop()
+
+
+if __name__ == "__main__":
+    env = AirHockeyEnv(env_mode="7dof-hit")
+    env.reset()
+
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    while True:
+        action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3
+        observation, reward, done, info = env.step(action)
+        env.render()
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
new file mode 100644
index 0000000..4486dce
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
@@ -0,0 +1 @@
+from .constraints import *
diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
new file mode 100644
index 0000000..399b35b
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
@@ -0,0 +1,212 @@
+import copy
+
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian
+
+
+class Constraint:
+    def __init__(self, env_info, output_dim, **kwargs):
+        """
+        Constructor
+
+        Args
+        ----
+        env_info: dict
+            A dictionary contains information about the environment;
+        output_dim: int
+            The output dimension of the constraints.
+        **kwargs: dict
+            A dictionary contains agent related information.
+        """
+        self._env_info = env_info
+        self._name = None
+
+        self.output_dim = output_dim
+
+        self._fun_value = np.zeros(self.output_dim)
+        self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"]))
+        self._q_prev = None
+        self._dq_prev = None
+
+    @property
+    def name(self):
+        """
+        The name of the constraints
+
+        """
+        return self._name
+
+    def fun(self, q, dq):
+        """
+        The function of the constraint.
+
+        Args
+        ----
+        q: numpy.ndarray, (num_joints,)
+            The joint position of the robot
+        dq: numpy.ndarray, (num_joints,)
+            The joint velocity of the robot
+
+        Returns
+        -------
+        numpy.ndarray, (out_dim,):
+            The value computed by the constraints function.
+        """
+        if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
+            return self._fun_value
+        else:
+            self._jacobian(q, dq)
+            return self._fun(q, dq)
+
+    def jacobian(self, q, dq):
+        """
+        Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity.
+
+        Args
+        ----
+        q: ndarray, (num_joints,)
+            The joint position of the robot
+        dq: ndarray, (num_joints,)
+            The joint velocity of the robot
+
+        Returns
+        -------
+        numpy.ndarray, (dim_output, num_joints * 2):
+            The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot]
+
+        """
+        if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
+            return self._fun_value
+        else:
+            self._fun(q, dq)
+            return self._jacobian(q, dq)
+
+    def _fun(self, q, dq):
+        raise NotImplementedError
+
+    def _jacobian(self, q, dq):
+        raise NotImplementedError
+
+
+class ConstraintList:
+    def __init__(self):
+        self.constraints = dict()
+
+    def keys(self):
+        return self.constraints.keys()
+
+    def get(self, key):
+        return self.constraints.get(key)
+
+    def add(self, c):
+        self.constraints.update({c.name: c})
+
+    def delete(self, name):
+        del self.constraints[name]
+
+    def fun(self, q, dq):
+        return {key: self.constraints[key].fun(q, dq) for key in self.constraints}
+
+    def jacobian(self, q, dq):
+        return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints}
+
+
+class JointPositionConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
+        self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95
+        self._name = 'joint_pos_constr'
+
+    def _fun(self, q, dq):
+        self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1]
+        self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye(
+            self._env_info['robot']['n_joints'])
+        self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye(
+            self._env_info['robot']['n_joints'])
+        return self._jac_value
+
+
+class JointVelocityConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
+        self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95
+        self._name = 'joint_vel_constr'
+
+    def _fun(self, q, dq):
+        self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1]
+        self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye(
+            self._env_info['robot']['n_joints'])
+        self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye(
+            self._env_info['robot']['n_joints'])
+        return self._jac_value
+
+
+class EndEffectorConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        # 1 Dimension on x direction: x > x_lb
+        # 2 Dimension on y direction: y > y_lb, y < y_ub
+        # 2 Dimension on z direction: z > z_lb, z < z_ub
+        super().__init__(env_info, output_dim=5, **kwargs)
+        self._name = "ee_constr"
+        tolerance = 0.02
+
+        self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
+        self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
+
+        self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - (
+                self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius'])
+        self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
+        self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
+        self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance
+        self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance
+
+    def _fun(self, q, dq):
+        ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q)
+        self._fun_value = np.array([-ee_pos[0] + self.x_lb,
+                                    -ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub,
+                                    -ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub])
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        jac = jacobian(self.robot_model, self.robot_data, q)
+        dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]])
+        self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']]
+        return self._jac_value
+
+
+class LinkConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        # 1 Dimension: wrist_z > minimum_height
+        # 2 Dimension: elbow_z > minimum_height
+        super().__init__(env_info, output_dim=2, **kwargs)
+        self._name = "link_constr"
+
+        self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
+        self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
+
+        self.z_lb = 0.25
+
+    def _fun(self, q, dq):
+        wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7")
+        elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4")
+        self._fun_value = np.array([-wrist_pos[2] + self.z_lb,
+                                    -elbow_pos[2] + self.z_lb])
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7")
+        jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4")
+        self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([
+            -jac_wrist[2, :self._env_info['robot']['n_joints']],
+            -jac_elbow[2, :self._env_info['robot']['n_joints']],
+        ])
+        return self._jac_value
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
new file mode 100644
index 0000000..b2dd80e
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl
new file mode 100644
index 0000000..28ad13f
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
new file mode 100644
index 0000000..e057ea1
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
new file mode 100644
index 0000000..4eaa051
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
new file mode 100644
index 0000000..2eb3709
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
new file mode 100644
index 0000000..2892530
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
new file mode 100644
index 0000000..dda08fe
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
new file mode 100644
index 0000000..6095451
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
new file mode 100644
index 0000000..6224ddb
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
new file mode 100644
index 0000000..359cb48
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
new file mode 100644
index 0000000..12f4b3d
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
@@ -0,0 +1,58 @@
+
+    
+
+    
+
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+
+        
+
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+
+        
+    
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
new file mode 100644
index 0000000..044aae5
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
@@ -0,0 +1,108 @@
+
+    
+
+    
+        
+            
+        
+        
+            
+        
+    
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+    
+        
+            
+            
+                
+                
+                
+                
+                    
+                    
+                    
+                    
+                        
+                        
+                        
+                        
+                            
+                            
+                            
+                            
+                                
+                                
+                                
+                                
+                                    
+                                    
+                                    
+                                    
+                                        
+                                        
+                                        
+                                        
+                                        
+                                            
+                                            
+                                                
+                                                
+                                                
+                                                
+                                                
+                                            
+                                        
+                                    
+                                
+                            
+                        
+                    
+                
+            
+        
+    
+
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
new file mode 100644
index 0000000..450ef8f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
@@ -0,0 +1,89 @@
+
+    
+
+    
+        
+            
+            
+                
+                
+                
+                
+                    
+                    
+                    
+                    
+                        
+                        
+                        
+                        
+                            
+                            
+                            
+                            
+                                
+                                
+                                
+                                
+                                    
+                                    
+                                    
+                                    
+                                        
+                                        
+                                        
+                                        
+                                        
+                                            
+                                            
+                                                
+                                                
+                                                
+                                                
+                                                
+                                            
+                                        
+                                    
+                                
+                            
+                        
+                    
+                
+            
+        
+    
+
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
new file mode 100644
index 0000000..6d9ca5f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
@@ -0,0 +1,94 @@
+
+    
+
+    
+        
+            
+        
+        
+            
+        
+    
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+    
+        
+            
+            
+                
+                
+                
+                
+                    
+                    
+                    
+                    
+                        
+                        
+                        
+                        
+                            
+                            
+                            
+                            
+                                
+                                
+                                
+                                
+                                    
+                                    
+                                    
+                                    
+                                        
+                                        
+                                        
+                                        
+                                        
+                                            
+                                        
+                                    
+                                
+                            
+                        
+                    
+                
+            
+        
+    
+
+
+    
+        
+        
+        
+        
+        
+        
+        
+    
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl
new file mode 100644
index 0000000..e057ea1
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl
new file mode 100644
index 0000000..2deabe0
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl
new file mode 100644
index 0000000..3187389
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl
new file mode 100644
index 0000000..6b172ff
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl
new file mode 100644
index 0000000..f1d761f
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl
new file mode 100644
index 0000000..f2d1ec7
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl
new file mode 100644
index 0000000..df26d8f
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl
new file mode 100644
index 0000000..5e0eba9
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl
new file mode 100644
index 0000000..ea2d9d2
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl
new file mode 100644
index 0000000..062ceb7
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl
new file mode 100644
index 0000000..12af33a
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl
new file mode 100644
index 0000000..a9f30b7
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl
new file mode 100644
index 0000000..b2dd80e
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl
new file mode 100644
index 0000000..7595fb0
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl
new file mode 100644
index 0000000..c03f7b2
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl
new file mode 100644
index 0000000..e057ea1
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl
new file mode 100644
index 0000000..4eaa051
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl
new file mode 100644
index 0000000..2eb3709
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl
new file mode 100644
index 0000000..2892530
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl
new file mode 100644
index 0000000..dda08fe
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl
new file mode 100644
index 0000000..6095451
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl
new file mode 100644
index 0000000..6224ddb
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl
new file mode 100644
index 0000000..359cb48
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl
new file mode 100644
index 0000000..3d1760b
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
new file mode 100644
index 0000000..9eb57a8
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
@@ -0,0 +1,32 @@
+
+
+    
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+
+        
+    
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
new file mode 100644
index 0000000..73fdfb4
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
@@ -0,0 +1,37 @@
+
+
+    
+
+    
+
+    
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
new file mode 100644
index 0000000..a8d057c
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
@@ -0,0 +1,61 @@
+
+    
+    
+        
+        
+    
+
+    
+        
+            
+        
+
+        
+            
+        
+    
+
+    
+        
+            
+            
+            
+                
+                
+                
+                
+                    
+                    
+                    
+                    
+                    
+                        
+                        
+                        
+                        
+                        
+                            
+                            
+                            
+                            
+                                
+                                
+                            
+                        
+                    
+                
+            
+        
+    
+
+    
+        
+        
+        
+    
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
new file mode 100644
index 0000000..c14d75a
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
@@ -0,0 +1,46 @@
+
+    
+
+    
+        
+            
+            
+            
+                
+                
+                
+                
+                    
+                    
+                    
+                    
+                    
+                        
+                        
+                        
+                        
+                        
+                            
+                            
+                            
+                            
+                                
+                                
+                            
+                        
+                    
+                
+            
+        
+    
+
+    
+        
+        
+        
+    
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
new file mode 100644
index 0000000..1f63668
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
@@ -0,0 +1,24 @@
+
+
+
+    
+
+    
+
+
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/table.xml b/fancy_gym/envs/mujoco/air_hockey/data/table.xml
new file mode 100644
index 0000000..578af15
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/table.xml
@@ -0,0 +1,89 @@
+
+
+    
+
+    
+        
+        
+        
+        
+        
+
+        
+        
+        
+    
+
+    
+        
+        
+            
+        
+    
+
+    
+        
+            
+                
+            
+            
+
+
+
+                
+                
+                
+                
+                
+                
+
+                
+
+                
+                
+
+
+
+
+                
+                
+                
+                
+                
+                
+                
+            
+        
+
+        
+            
+            
+            
+            
+                
+                
+                
+            
+        
+
+        
+        
+    
+
+    
+        
+    
+
+    
+        
+        
+    
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
new file mode 100644
index 0000000..91e38e1
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
@@ -0,0 +1,270 @@
+from collections import deque
+
+import mujoco
+import numpy as np
+import scipy
+
+from fancy_gym.envs.mujoco.air_hockey import seven_dof
+from fancy_gym.envs.mujoco.air_hockey import three_dof
+
+
+class PositionControl:
+    def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs):
+        """
+            Mixin that adds position controller to mujoco environments.
+
+            Args:
+                p_gain (float): Proportional controller gain
+                d_gain (float): Differential controller gain
+                i_gain (float): Integral controller gain
+                interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
+                    polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
+                    Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
+                    of position, velocity and acceleration of the shape (20, 3, n_joints)
+                    In the case of 2 agents it is a tuple, which describes the interpolation order for each agent
+                debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the
+                    entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk.
+            """
+
+        self.debug = debug
+
+        super(PositionControl, self).__init__(*args, **kwargs)
+
+        self.robot_model = self.env_info['robot']['robot_model']
+        self.robot_data = self.env_info['robot']['robot_data']
+
+        self.p_gain = np.array(p_gain * self.n_agents)
+        self.d_gain = np.array(d_gain * self.n_agents)
+        self.i_gain = np.array(i_gain * self.n_agents)
+
+        self.prev_pos = np.zeros(len(self.actuator_joint_ids))
+        self.prev_vel = np.zeros(len(self.actuator_joint_ids))
+        self.prev_acc = np.zeros(len(self.actuator_joint_ids))
+        self.i_error = np.zeros(len(self.actuator_joint_ids))
+        self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids))
+
+        self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,)
+
+        self._num_env_joints = len(self.actuator_joint_ids)
+        self.n_robot_joints = self.env_info['robot']["n_joints"]
+
+        self.action_shape = [None] * self.n_agents
+
+        for i in range(self.n_agents):
+            if self.interp_order[i] is None:
+                self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints)
+            elif self.interp_order[i] in [1, 2]:
+                self.action_shape[i] = (self.n_robot_joints,)
+            elif self.interp_order[i] in [3, 4, -1]:
+                self.action_shape[i] = (2, self.n_robot_joints)
+            elif self.interp_order[i] == 5:
+                self.action_shape[i] = (3, self.n_robot_joints)
+
+        self.traj = None
+
+        self.jerk = np.zeros(self._num_env_joints)
+
+        if self.debug:
+            self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
+
+    def _enforce_safety_limits(self, desired_pos, desired_vel):
+        # ROS safe controller
+        pos = self.prev_controller_cmd_pos
+        k = 20
+
+        joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents))
+        joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents))
+
+        min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1])
+
+        max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1])
+
+        clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel)
+
+        min_pos = pos + min_vel * self._timestep
+        max_pos = pos + max_vel * self._timestep
+
+        clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos)
+        self.prev_controller_cmd_pos = clipped_pos.copy()
+
+        return clipped_pos, clipped_vel
+
+    def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel):
+        clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel)
+
+        error = (clipped_pos - current_pos)
+
+        self.i_error += self.i_gain * error * self._timestep
+        torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error
+
+        # Acceleration FeedForward
+        tau_ff = np.zeros(self.robot_model.nv)
+        for i in range(self.n_agents):
+            robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i
+            self.robot_data.qpos = current_pos[robot_joint_ids]
+            self.robot_data.qvel = current_vel[robot_joint_ids]
+            acc_ff = desired_acc[robot_joint_ids]
+            mujoco.mj_forward(self.robot_model, self.robot_data)
+
+            mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff)
+            torque[robot_joint_ids] += tau_ff
+
+            # Gravity Compensation and Coriolis and Centrifugal force
+            torque[robot_joint_ids] += self.robot_data.qfrc_bias
+
+            torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids],
+                                                            self.robot_model.actuator_ctrlrange[:, 0]),
+                                                 self.robot_model.actuator_ctrlrange[:, 1])
+
+        if self.debug:
+            self.controller_record.append(
+                np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk]))
+
+        return torque
+
+    def _interpolate_trajectory(self, interp_order, action, i=0):
+        tf = self.dt
+        prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+        prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+        prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+        if interp_order == 1 and action.ndim == 1:
+            coef = np.array([[1, 0], [1, tf]])
+            results = np.vstack([prev_pos, action])
+        elif interp_order == 2 and action.ndim == 1:
+            coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]])
+            if np.linalg.norm(action - prev_pos) < 1e-3:
+                prev_vel = np.zeros_like(prev_vel)
+            results = np.vstack([prev_pos, action, prev_vel])
+        elif interp_order == 3 and action.shape[0] == 2:
+            coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]])
+            results = np.vstack([prev_pos, action[0], prev_vel, action[1]])
+        elif interp_order == 4 and action.shape[0] == 2:
+            coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4],
+                             [0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3],
+                             [0, 0, 2, 0, 0]])
+            results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc])
+        elif interp_order == 5 and action.shape[0] == 3:
+            coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5],
+                             [0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4],
+                             [0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]])
+            results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]])
+        elif interp_order == -1:
+            # Interpolate position and velocity linearly
+            pass
+        else:
+            raise ValueError("Undefined interpolator order or the action dimension does not match!")
+
+        if interp_order > 0:
+            A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints)
+            y = results.reshape(-2, order='F')
+            weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1)
+            weights_d = np.polynomial.polynomial.polyder(weights, axis=1)
+            weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
+        elif interp_order == -1:
+            weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T
+            weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T
+            weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
+
+        if interp_order in [3, 4, 5]:
+            self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep
+        else:
+            self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf
+
+        self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T)
+        self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T)
+        self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T)
+
+        for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps):
+            q = np.polynomial.polynomial.polyval(t, weights.T)
+            qd = np.polynomial.polynomial.polyval(t, weights_d.T)
+            qdd = np.polynomial.polynomial.polyval(t, weights_dd.T)
+            yield q, qd, qdd
+
+    def reset(self, obs=None):
+        obs = super(PositionControl, self).reset(obs)
+        self.prev_pos = self._data.qpos[self.actuator_joint_ids]
+        self.prev_vel = self._data.qvel[self.actuator_joint_ids]
+        self.prev_acc = np.zeros(len(self.actuator_joint_ids))
+        self.i_error = np.zeros(len(self.actuator_joint_ids))
+        self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids]
+
+        if self.debug:
+            self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
+        return obs
+
+    def _step_init(self, obs, action):
+        super(PositionControl, self)._step_init(obs, action)
+
+        if self.n_agents == 1:
+            self.traj = self._create_traj(self.interp_order[0], action)
+        else:
+            def _traj():
+                traj_1 = self._create_traj(self.interp_order[0], action[0], 0)
+                traj_2 = self._create_traj(self.interp_order[1], action[1], 1)
+
+                for a1, a2 in zip(traj_1, traj_2):
+                    yield np.hstack([a1, a2])
+
+            self.traj = _traj()
+
+    def _create_traj(self, interp_order, action, i=0):
+        if interp_order is None:
+            return iter(action)
+        return self._interpolate_trajectory(interp_order, action, i)
+
+    def _compute_action(self, obs, action):
+        cur_pos, cur_vel = self.get_joints(obs)
+
+        desired_pos, desired_vel, desired_acc = next(self.traj)
+
+        return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel)
+
+    def _preprocess_action(self, action):
+        action = super(PositionControl, self)._preprocess_action(action)
+
+        if self.n_agents == 1:
+            assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \
+                                                      f" {action.shape}"
+        else:
+            for i in range(self.n_agents):
+                assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \
+                                                          f" {action[i].shape}"
+
+        return action
+
+
+class PositionControlIIWA(PositionControl):
+    def __init__(self, *args, **kwargs):
+        p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.]
+        d_gain = [60, 80, 60, 30, 10, 1, 0.5]
+        i_gain = [0, 0, 0, 0, 0, 0, 0]
+
+        super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
+
+
+class PositionControlPlanar(PositionControl):
+    def __init__(self, *args, **kwargs):
+        p_gain = [960, 480, 240]
+        d_gain = [60, 20, 4]
+        i_gain = [0, 0, 0]
+        super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
+
+
+class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit):
+    pass
+
+
+class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
+    pass
+
+
+class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
+    pass
+
+
+class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
+    pass
+
+
+class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
+    pass
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
new file mode 100644
index 0000000..ef48d24
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
@@ -0,0 +1,4 @@
+from .env_base import AirHockeyBase
+from .tournament import AirHockeyTournament
+from .hit import AirHockeyHit
+from .defend import AirHockeyDefend
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
new file mode 100644
index 0000000..0ffdf66
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
@@ -0,0 +1,75 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
+
+
+class AirHockeyDefend(AirHockeySingle):
+    """
+        Class for the air hockey defending task.
+        The agent should stop the puck at the line x=-0.6.
+    """
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+        self.init_velocity_range = (1, 3)
+
+        self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]])  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+    def setup(self, obs):
+        puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
+
+        lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+        angle = np.random.uniform(-0.5, 0.5)
+
+        puck_vel = np.zeros(3)
+        puck_vel[0] = -np.cos(angle) * lin_vel
+        puck_vel[1] = np.sin(angle) * lin_vel
+        puck_vel[2] = np.random.uniform(-10, 10)
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+        self._write_data("puck_x_vel", puck_vel[0])
+        self._write_data("puck_y_vel", puck_vel[1])
+        self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyDefend, self).setup(obs)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, state):
+        puck_pos, puck_vel = self.get_puck(state)
+        # If puck is over the middle line and moving towards opponent
+        if puck_pos[0] > 0 and puck_vel[0] > 0:
+            return True
+        if np.linalg.norm(puck_vel[:2]) < 0.1:
+            return True
+        return super().is_absorbing(state)
+
+
+if __name__ == '__main__':
+    env = AirHockeyDefend()
+
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    env.reset()
+    env.render()
+    while True:
+        # action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8
+        action = np.zeros(7)
+        observation, reward, done, info = env.step(action)
+        env.render()
+        print(observation)
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
new file mode 100644
index 0000000..fef5394
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
@@ -0,0 +1,258 @@
+import os
+
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path
+from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin
+from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
+from mushroom_rl.utils.spaces import Box
+
+
+"""
+    Abstract class for all AirHockey Environments.
+
+"""
+class AirHockeyBase(MuJoCo):
+    def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
+                 n_agents=1, viewer_params={}):
+
+        """
+            Constructor.
+
+            Args:
+                n_agents (int, 1): number of agent to be used in the environment (one or two)
+        """
+
+        self.n_agents = n_agents
+
+        action_spec = []
+        observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                            ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                            ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                            ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                            ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                            ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                           ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                           ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                           ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                           ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                           ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        collision_spec = [("puck", ["puck"]),
+                          ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
+                          ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
+
+        if 1 <= self.n_agents <= 2:
+            scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
+
+            action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
+                            "iiwa_1/joint_6", "iiwa_1/joint_7"]
+
+            observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)]
+
+            additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS),
+                                ("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS),
+                                ("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL),
+                                ("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL),
+                                ("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS),
+                                ("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL),
+                                ("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)]
+
+            collision_spec += [("robot_1/ee", ["iiwa_1/ee"])]
+
+            if self.n_agents == 2:
+                scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
+
+                observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)]
+
+                action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4",
+                                "iiwa_2/joint_5",
+                                "iiwa_2/joint_6", "iiwa_2/joint_7"]
+
+                observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)]
+
+                observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)]
+
+                additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS),
+                                    ("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS),
+                                    ("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL),
+                                    ("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL),
+                                    ("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS),
+                                    ("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL),
+                                    ("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)]
+
+                collision_spec += [("robot_2/ee", ["iiwa_2/ee"])]
+        else:
+            raise ValueError('n_agents should be 1 or 2')
+
+        self.env_info = dict()
+        self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
+        self.env_info['puck'] = {"radius": 0.03165}
+        self.env_info['mallet'] = {"radius": 0.04815}
+        self.env_info['n_agents'] = self.n_agents
+        self.env_info['robot'] = {
+            "n_joints": 7,
+            "ee_desired_height": 0.1645,
+            "joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
+                                         [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi,
+            "joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
+                                         [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10,
+            "base_frame": [],
+            "universal_height": 0.0645,
+            "control_frequency": 50,
+        }
+
+        self.env_info['puck_pos_ids'] = [0, 1, 2]
+        self.env_info['puck_vel_ids'] = [3, 4, 5]
+        self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12]
+        self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19]
+        if self.n_agents == 2:
+            self.env_info['opponent_ee_ids'] = [20, 21, 22]
+        else:
+            self.env_info['opponent_ee_ids'] = []
+
+        max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents
+
+        super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
+                         n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
+
+        # Construct the mujoco model at origin
+        robot_model = mujoco.MjModel.from_xml_path(
+            os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml"))
+        robot_model.body('iiwa_1/base').pos = np.zeros(3)
+        robot_data = mujoco.MjData(robot_model)
+
+        # Add env_info that requires mujoco models
+        self.env_info['dt'] = self.dt
+        self.env_info["robot"]["joint_pos_limit"] = np.array(
+            [self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T
+        self.env_info["robot"]["robot_model"] = robot_model
+        self.env_info["robot"]["robot_data"] = robot_data
+        self.env_info["rl_info"] = self.info
+
+        frame_T = np.eye(4)
+        temp = np.zeros((9, 1))
+        mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat)
+        frame_T[:3, :3] = temp.reshape(3, 3)
+        frame_T[:3, 3] = self._model.body("iiwa_1/base").pos
+        self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        if self.n_agents == 2:
+            mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat)
+            frame_T[:3, :3] = temp.reshape(3, 3)
+            frame_T[:3, 3] = self._model.body("iiwa_2/base").pos
+            self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        # Ids of the joint, which are controller by the action space
+        self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
+
+        self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info)
+
+    def _modify_mdp_info(self, mdp_info):
+        obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
+                            *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0]
+                                       for i in range(self.env_info['robot']['n_joints'])]),
+                            *self.env_info['robot']['joint_vel_limit'][0]])
+        obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
+                             *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1]
+                                        for i in range(self.env_info['robot']['n_joints'])]),
+                             *self.env_info['robot']['joint_vel_limit'][1]])
+        if self.n_agents == 2:
+            obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
+            obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
+        mdp_info.observation_space = Box(obs_low, obs_high)
+        return mdp_info
+
+    def _simulation_pre_step(self):
+        self.universal_joint_plugin.update()
+
+    def is_absorbing(self, obs):
+        boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
+            return True
+        return False
+
+    @staticmethod
+    def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
+        if type == 'pose':
+            puck_w = np.eye(4)
+            puck_w[:2, 3] = puck_in[:2]
+            puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
+
+            puck_r = np.linalg.inv(robot_frame) @ puck_w
+            puck_out = np.concatenate([puck_r[:2, 3],
+                                       R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
+
+        if type == 'vel':
+            rot_mat = robot_frame[:3, :3]
+
+            vel_lin = np.array([*puck_in[:2], 0])
+            vel_ang = np.array([0., 0., puck_in[2]])
+
+            vel_lin_r = rot_mat.T @ vel_lin
+            vel_ang_r = rot_mat.T @ vel_ang
+
+            puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
+        return puck_out
+
+    def get_puck(self, obs):
+        """
+        Getting the puck properties from the observations
+        Args:
+            obs: The current observation
+
+        Returns:
+            ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
+
+        """
+        puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
+        puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
+        return puck_pos, puck_vel
+
+    def get_ee(self):
+        raise NotImplementedError
+
+    def get_joints(self, obs):
+        raise NotImplementedError
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
new file mode 100644
index 0000000..54a5252
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
@@ -0,0 +1,171 @@
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
+from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
+
+class AirHockeyDouble(AirHockeyBase):
+    """
+    Base class for two agents air hockey tasks.
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params)
+
+        self._compute_init_state()
+
+        self.filter_ratio = 0.274
+        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
+        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
+
+    def _compute_init_state(self):
+        init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
+
+        success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
+                                                      self.env_info['robot']['robot_data'],
+                                                      np.array([0.65, 0., 0.1645]),
+                                                      R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
+                                                      initial_q=init_state)
+
+        assert success is True
+
+    def get_ee(self, robot=1):
+        """
+        Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics
+        on the current joint position, this function exists to avoid redundant computations.
+        Args:
+            robot: ID of robot, either 1 or 2
+
+        Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+        """
+        ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos")
+
+        ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel")
+
+        return ee_pos, ee_vel
+
+    def get_joints(self, obs, agent=None):
+        """
+        Get joint position and velocity of the robots
+        Can choose the robot with agent = 1 / 2. If agent is None both are returned
+        """
+        if agent:
+            q_pos = np.zeros(7)
+            q_vel = np.zeros(7)
+            for i in range(7):
+                q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0]
+                q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0]
+        else:
+            q_pos = np.zeros(14)
+            q_vel = np.zeros(14)
+            for i in range(7):
+                q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+                q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+                q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0]
+                q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0]
+
+        return q_pos, q_vel
+
+    def _create_observation(self, obs):
+        # Filter the joint velocity
+        q_pos, q_vel = self.get_joints(obs)
+        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+        self.q_pos_prev = q_pos
+        self.q_vel_prev = q_vel_filter
+
+        for i in range(7):
+            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+            self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7]
+
+        # Wrap puck's rotation angle to [-pi, pi)
+        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+        return obs
+
+    def _modify_observation(self, obs):
+        new_obs = obs.copy()
+
+        puck_pos, puck_vel = self.get_puck(new_obs)
+
+        puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+        puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2]
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2]
+
+        opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')
+        self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \
+            (np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3]
+
+        puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1])
+        puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2]
+
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2]
+
+        opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')
+        self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \
+            (np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3]
+
+        return new_obs
+
+    def setup(self, obs):
+        for i in range(7):
+            self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+            self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i]
+
+            self.q_pos_prev[i] = self.init_state[i]
+            self.q_pos_prev[i + 7] = self.init_state[i]
+            self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
+            self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0]
+
+        self.universal_joint_plugin.reset()
+
+        super().setup(obs)
+        # Update body positions, needed for _compute_universal_joint
+        mujoco.mj_fwdPosition(self._model, self._data)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+
+def main():
+    env = AirHockeyDouble(viewer_params={'start_paused': True})
+    env.reset()
+    env.render()
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    while True:
+        action = np.zeros(14)
+        observation, reward, done, info = env.step(action)
+        env.render()
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
new file mode 100644
index 0000000..accfc83
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
@@ -0,0 +1,104 @@
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
+from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
+
+class AirHockeySingle(AirHockeyBase):
+    """
+    Base class for single agent air hockey tasks.
+
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
+
+        self._compute_init_state()
+
+        self.filter_ratio = 0.274
+        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
+        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
+
+    def _compute_init_state(self):
+        init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
+
+        success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
+                                                      self.env_info['robot']['robot_data'],
+                                                      np.array([0.65, 0., 0.1645]),
+                                                      R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
+                                                      initial_q=init_state)
+
+        assert success is True
+
+    def get_ee(self):
+        """
+        Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
+        on the current joint position, this function exists to avoid redundant computations.
+
+        Returns:
+            ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+        """
+        ee_pos = self._read_data("robot_1/ee_pos")
+
+        ee_vel = self._read_data("robot_1/ee_vel")
+
+        return ee_pos, ee_vel
+
+    def get_joints(self, obs):
+        """
+        Get joint position and velocity of the robot
+        """
+        q_pos = np.zeros(7)
+        q_vel = np.zeros(7)
+        for i in range(7):
+            q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+            q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+        return q_pos, q_vel
+
+    def _create_observation(self, obs):
+        # Filter the joint velocity
+        q_pos, q_vel = self.get_joints(obs)
+        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+        self.q_pos_prev = q_pos
+        self.q_vel_prev = q_vel_filter
+
+        for i in range(7):
+            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+
+        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+        return obs
+
+    def _modify_observation(self, obs):
+        new_obs = obs.copy()
+        puck_pos, puck_vel = self.get_puck(new_obs)
+
+        puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+
+        puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
+
+        return new_obs
+
+    def setup(self, obs):
+        for i in range(7):
+            self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+            self.q_pos_prev[i] = self.init_state[i]
+            self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
+
+        self.universal_joint_plugin.reset()
+
+        super().setup(obs)
+
+        # Update body positions, needed for _compute_universal_joint
+        mujoco.mj_fwdPosition(self._model, self._data)
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
new file mode 100644
index 0000000..59386d5
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
@@ -0,0 +1,74 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
+
+
+class AirHockeyHit(AirHockeySingle):
+    """
+        Class for the air hockey hitting task.
+    """
+    def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}):
+        """
+            Constructor
+            Args:
+                opponent_agent(Agent, None): Agent which controls the opponent
+                moving_init(bool, False): If true, initialize the puck with inital velocity.
+        """
+        self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]])  # Table Frame
+        self.init_velocity_range = (0, 0.5)  # Table Frame
+
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+        self.moving_init = moving_init
+        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+                    self.env_info['mallet']['radius'] * 2
+        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
+        self.init_velocity_range = (0, 0.5)  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+
+
+    def setup(self, obs):
+        # Initial position of the puck
+        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+
+        if self.moving_init:
+            lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+            angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
+            puck_vel = np.zeros(3)
+            puck_vel[0] = -np.cos(angle) * lin_vel
+            puck_vel[1] = np.sin(angle) * lin_vel
+            puck_vel[2] = np.random.uniform(-2, 2)
+
+            self._write_data("puck_x_vel", puck_vel[0])
+            self._write_data("puck_y_vel", puck_vel[1])
+            self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyHit, self).setup(obs)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, obs):
+        puck_pos, puck_vel = self.get_puck(obs)
+        # Stop if the puck bounces back on the opponents wall
+        if puck_pos[0] > 0 and puck_vel[0] < 0:
+            return True
+        return super(AirHockeyHit, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+    env = AirHockeyHit(moving_init=True)
+    env.reset()
+
+    steps = 0
+    while True:
+        action = np.zeros(7)
+
+        observation, reward, done, info = env.step(action)
+        env.render()
+        if done or steps > env.info.horizon:
+            steps = 0
+            env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
new file mode 100644
index 0000000..49713a0
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
@@ -0,0 +1,111 @@
+import mujoco
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble
+
+
+class AirHockeyTournament(AirHockeyDouble):
+    """
+        Class for the air hockey tournament. Consists of 2 robots which should play against each other.
+        When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty.
+        If a player accumulates 3 penalties his score is reduced by 1.
+    """
+    def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"):
+        self.agent_name = agent_name
+        self.opponent_name = opponent_name
+
+        self.score = [0, 0]
+        self.faults = [0, 0]
+        self.start_side = None
+
+        self.timer = 0
+
+        def custom_render_callback(viewport, context):
+            names = f"Agents \nScores \nFaults "
+            data = f"{self.agent_name} - {self.opponent_name}\n "
+            data += f"{self.score[0]} - {self.score[1]}\n "
+            data += f"{self.faults[0]} - {self.faults[1]}"
+            mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context)
+
+        viewer_params["custom_render_callback"] = custom_render_callback
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+                    self.env_info['mallet']['radius'] * 2
+
+        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
+
+    def setup(self, obs):
+        if self.start_side == None:
+            self.start_side = np.random.choice([1, -1])
+            self.prev_side = self.start_side
+
+        # Initial position of the puck
+        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+        self._write_data("puck_x_pos", puck_pos[0] * self.start_side)
+        self._write_data("puck_y_pos", puck_pos[1])
+
+        self.prev_side = self.start_side
+        self.timer = 0
+
+        super(AirHockeyTournament, self).setup(obs)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, obs):
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        # Puck stuck on one side for more than 15s
+        if np.sign(puck_pos[0]) == self.prev_side:
+            self.timer += self.dt
+        else:
+            self.prev_side *= -1
+            self.timer = 0
+
+        if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15:
+            if self.prev_side == -1:
+                self.faults[0] += 1
+                self.start_side = -1
+                if self.faults[0] % 3 == 0:
+                    self.score[1] += 1
+            else:
+                self.faults[1] += 1
+                self.start_side = 1
+                if self.faults[1] % 3 == 0:
+                    self.score[0] += 1
+
+            return True
+
+        # Puck in Goal
+        if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0:
+            if puck_pos[0] > self.env_info['table']['length'] / 2:
+                self.score[0] += 1
+                self.start_side = -1
+                return True
+
+            if puck_pos[0] < -self.env_info['table']['length'] / 2:
+                self.score[1] += 1
+                self.start_side = 1
+                return True
+
+        # Puck stuck in the middle
+        if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025:
+            return True
+        return super(AirHockeyTournament, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+    env = AirHockeyTournament()
+    env.reset()
+
+    steps = 0
+    while True:
+        action = np.zeros(14)
+        steps += 1
+        observation, reward, done, info = env.step(action)
+        env.render()
+        if done or steps > env.info.horizon:
+            steps = 0
+            env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
new file mode 100644
index 0000000..db285f1
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
@@ -0,0 +1,3 @@
+from .env_base import AirHockeyBase
+from .defend import AirHockeyDefend
+from .hit import AirHockeyHit
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
new file mode 100644
index 0000000..1f1224f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
@@ -0,0 +1,76 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
+
+
+class AirHockeyDefend(AirHockeySingle):
+    """
+    Class for the air hockey defending task.
+    The agent should stop the puck at the line x=-0.6.
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        self.init_velocity_range = (1, 3)
+
+        self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]])  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+    def setup(self, state=None):
+        # Set initial puck parameters
+        puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
+
+        lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+        angle = np.random.uniform(-0.5, 0.5)
+
+        puck_vel = np.zeros(3)
+        puck_vel[0] = -np.cos(angle) * lin_vel
+        puck_vel[1] = np.sin(angle) * lin_vel
+        puck_vel[2] = np.random.uniform(-10, 10)
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+        self._write_data("puck_x_vel", puck_vel[0])
+        self._write_data("puck_y_vel", puck_vel[1])
+        self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyDefend, self).setup(state)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, state):
+        puck_pos, puck_vel = self.get_puck(state)
+        # If puck is over the middle line and moving towards opponent
+        if puck_pos[0] > 0 and puck_vel[0] > 0:
+            return True
+        if np.linalg.norm(puck_vel[:2]) < 0.1:
+            return True
+        return super().is_absorbing(state)
+
+
+if __name__ == '__main__':
+    env = AirHockeyDefend()
+
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    env.reset()
+    while True:
+        action = np.zeros(3)
+        observation, reward, done, info = env.step(action)
+        env.render()
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
new file mode 100644
index 0000000..d9747f3
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
@@ -0,0 +1,224 @@
+import os
+
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path
+from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
+from mushroom_rl.utils.spaces import Box
+
+
+class AirHockeyBase(MuJoCo):
+    """
+    Abstract class for all AirHockey Environments.
+
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
+                 n_agents=1, viewer_params={}):
+        """
+        Constructor.
+
+        Args:
+            n_agents (int, 1): number of agent to be used in the environment (one or two)
+        """
+
+        self.n_agents = n_agents
+
+        action_spec = []
+
+        observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                            ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                            ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                            ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                            ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                            ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                           ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                           ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                           ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                           ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                           ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        collision_spec = [("puck", ["puck"]),
+                          ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
+                          ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
+
+        if 1 <= self.n_agents <= 2:
+            scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
+
+            action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"]
+
+            observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)]
+
+            additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS),
+                                ("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)]
+
+            collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])]
+
+            if self.n_agents == 2:
+                scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
+
+                observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)]
+
+                action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"]
+                # Add puck pos/vel again to transform into second agents frame
+                observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)]
+
+                observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)]
+
+                additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS),
+                                    ("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)]
+
+                collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])]
+        else:
+            raise ValueError('n_agents should be 1 or 2')
+
+        self.env_info = dict()
+        self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
+        self.env_info['puck'] = {"radius": 0.03165}
+        self.env_info['mallet'] = {"radius": 0.04815}
+        self.env_info['n_agents'] = self.n_agents
+        self.env_info['robot'] = {
+            "n_joints": 3,
+            "ee_desired_height": 0.1,
+            "joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3],
+                                         [np.pi / 2, np.pi / 2, np.pi * 2 / 3]]),
+
+            "joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi],
+                                         [2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]),
+            "base_frame": [],
+            "control_frequency": 50,
+        }
+
+        self.env_info['puck_pos_ids'] = [0, 1, 2]
+        self.env_info['puck_vel_ids'] = [3, 4, 5]
+        self.env_info['joint_pos_ids'] = [6, 7, 8]
+        self.env_info['joint_vel_ids'] = [9, 10, 11]
+        if self.n_agents == 2:
+            self.env_info['opponent_ee_ids'] = [13, 14, 15]
+        else:
+            self.env_info['opponent_ee_ids'] = []
+
+        max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents
+
+        super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
+                         n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
+
+        # Construct the mujoco model at origin
+        robot_model = mujoco.MjModel.from_xml_path(
+            os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml"))
+        robot_model.body('planar_robot_1/base').pos = np.zeros(3)
+        robot_data = mujoco.MjData(robot_model)
+
+        # Add env_info that requires mujoco models
+        self.env_info['dt'] = self.dt
+        self.env_info["robot"]["joint_pos_limit"] = np.array(
+            [self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T
+        self.env_info["robot"]["robot_model"] = robot_model
+        self.env_info["robot"]["robot_data"] = robot_data
+        self.env_info["rl_info"] = self.info
+
+        frame_T = np.eye(4)
+        temp = np.zeros((9, 1))
+        mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat)
+        frame_T[:3, :3] = temp.reshape(3, 3)
+        frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos
+        self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        if self.n_agents == 2:
+            mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat)
+            frame_T[:3, :3] = temp.reshape(3, 3)
+            frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos
+            self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        # Ids of the joint, which are controller by the action space
+        self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
+
+    def _modify_mdp_info(self, mdp_info):
+        obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
+                            *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0]
+                                       for i in range(self.env_info['robot']['n_joints'])]),
+                            *self.env_info['robot']['joint_vel_limit'][0]])
+        obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
+                             *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1]
+                                        for i in range(self.env_info['robot']['n_joints'])]),
+                             *self.env_info['robot']['joint_vel_limit'][1]])
+        if self.n_agents == 2:
+            obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
+            obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
+        mdp_info.observation_space = Box(obs_low, obs_high)
+        return mdp_info
+
+    def is_absorbing(self, obs):
+        boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
+            return True
+        return False
+
+    @staticmethod
+    def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
+        if type == 'pose':
+            puck_w = np.eye(4)
+            puck_w[:2, 3] = puck_in[:2]
+            puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
+
+            puck_r = np.linalg.inv(robot_frame) @ puck_w
+            puck_out = np.concatenate([puck_r[:2, 3],
+                                       R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
+
+        if type == 'vel':
+            rot_mat = robot_frame[:3, :3]
+
+            vel_lin = np.array([*puck_in[:2], 0])
+            vel_ang = np.array([0., 0., puck_in[2]])
+
+            vel_lin_r = rot_mat.T @ vel_lin
+            vel_ang_r = rot_mat.T @ vel_ang
+
+            puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
+        return puck_out
+
+    def get_puck(self, obs):
+        """
+        Getting the puck properties from the observations
+        Args:
+            obs: The current observation
+
+        Returns:
+            ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
+
+        """
+        puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
+        puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
+        return puck_pos, puck_vel
+
+    def get_ee(self):
+        raise NotImplementedError
+
+    def get_joints(self, obs):
+        raise NotImplementedError
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
new file mode 100644
index 0000000..4dfa117
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
@@ -0,0 +1,91 @@
+import mujoco
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase
+
+
+class AirHockeySingle(AirHockeyBase):
+    """
+    Base class for single agent air hockey tasks.
+
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        """
+        Constructor.
+
+        """
+        self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414])
+        super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
+
+        self.filter_ratio = 0.274
+        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
+        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
+
+    def get_ee(self):
+        """
+        Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
+        on the current joint position, this function exists to avoid redundant computations.
+
+        Returns:
+            ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+        """
+        ee_pos = self._read_data("robot_1/ee_pos")
+
+        ee_vel = self._read_data("robot_1/ee_vel")
+
+        return ee_pos, ee_vel
+
+    def get_joints(self, obs):
+        """
+        Get joint position and velocity of the robot
+        """
+        q_pos = np.zeros(3)
+        q_vel = np.zeros(3)
+        for i in range(3):
+            q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+            q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+        return q_pos, q_vel
+
+    def _modify_observation(self, obs):
+        new_obs = obs.copy()
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+
+        puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
+
+        return new_obs
+
+    def setup(self, state=None):
+        for i in range(3):
+            self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+            self.q_pos_prev[i] = self.init_state[i]
+            self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0]
+
+        mujoco.mj_fwdPosition(self._model, self._data)
+        super().setup(state)
+
+    def _create_observation(self, obs):
+        # Filter the joint velocity
+        q_pos, q_vel = self.get_joints(obs)
+        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+        self.q_pos_prev = q_pos
+        self.q_vel_prev = q_vel_filter
+
+        for i in range(3):
+            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+
+        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+        return obs
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
new file mode 100644
index 0000000..6124fba
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
@@ -0,0 +1,84 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
+
+class AirHockeyHit(AirHockeySingle):
+    """
+    Class for the air hockey hitting task.
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}):
+        """
+        Constructor
+        Args:
+            moving_init(bool, False): If true, initialize the puck with inital velocity.
+        """
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+        self.moving_init = moving_init
+        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+                    self.env_info['mallet']['radius'] * 2
+        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
+        self.init_velocity_range = (0, 0.5)  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+
+    def setup(self, state=None):
+        # Initial position of the puck
+        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+        # self.init_state = np.array([-0.9273, 0.9273, np.pi / 2])
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+
+        if self.moving_init:
+            lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+            angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
+            puck_vel = np.zeros(3)
+            puck_vel[0] = -np.cos(angle) * lin_vel
+            puck_vel[1] = np.sin(angle) * lin_vel
+            puck_vel[2] = np.random.uniform(-2, 2, 1)
+
+            self._write_data("puck_x_vel", puck_vel[0])
+            self._write_data("puck_y_vel", puck_vel[1])
+            self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyHit, self).setup(state)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, obs):
+        puck_pos, puck_vel = self.get_puck(obs)
+        # Stop if the puck bounces back on the opponents wall
+        if puck_pos[0] > 0 and puck_vel[0] < 0:
+            return True
+        return super(AirHockeyHit, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+    env = AirHockeyHit(moving_init=False)
+
+    env.reset()
+    env.render()
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    while True:
+        action = np.zeros(3)
+
+        observation, reward, done, info = env.step(action)
+        env.render()
+
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
new file mode 100644
index 0000000..64c297a
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
@@ -0,0 +1,2 @@
+from .kinematics import inverse_kinematics, forward_kinematics, jacobian
+from .transformations import robot_to_world, world_to_robot
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
new file mode 100644
index 0000000..2156d48
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
@@ -0,0 +1,248 @@
+import mujoco
+import numpy as np
+
+
+def forward_kinematics(mj_model, mj_data, q, link="ee"):
+    """
+    Compute the forward kinematics of the robots.
+
+    IMPORTANT:
+        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
+        case this function will return the wrong values.
+
+    Coordinate System:
+        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+        coordinate system
+
+    Args:
+        mj_model (mujoco.MjModel):
+            mujoco MjModel of the robot-only model
+        mj_data (mujoco.MjData):
+            mujoco MjData object generated from the model
+        q (np.array):
+            joint configuration for which the forward kinematics are computed
+        link (string, "ee"):
+            Link for which the forward kinematics is calculated. When using the iiwas the choices are
+            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+
+    Returns
+    -------
+    position: numpy.ndarray, (3,)
+        Position of the link in robot's base frame
+    orientation: numpy.ndarray, (3, 3)
+        Orientation of the link in robot's base frame
+    """
+
+    return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
+
+
+def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"):
+    """
+    Compute the inverse kinematics of the robots.
+
+    IMPORTANT:
+        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is
+        the case this function will return the wrong values.
+
+    Coordinate System:
+        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+        coordinate system
+
+    Args:
+        mj_model (mujoco.MjModel):
+            mujoco MjModel of the robot-only model
+        mj_data (mujoco.MjData):
+            mujoco MjData object generated from the model
+        desired_position (numpy.ndarray, (3,)):
+            The desired position of the selected link.
+        desired_rotation (optional, numpy.array, (3,3)):
+            The desired rotation of the selected link.
+        initial_q (numpy.ndarray, None):
+            The initial configuration of the algorithm, if set to None it will take the initial configuration of the
+            mj_data.
+        link (str, "ee"):
+            Link for which the inverse kinematics is calculated. When using the iiwas the choices are
+            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+    """
+    q_init = np.zeros(mj_model.nq)
+    if initial_q is None:
+        q_init = mj_data.qpos
+    else:
+        q_init[:initial_q.size] = initial_q
+
+    q_l = mj_model.jnt_range[:, 0]
+    q_h = mj_model.jnt_range[:, 1]
+    lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2
+    upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2
+
+    desired_quat = None
+    if desired_rotation is not None:
+        desired_quat = np.zeros(4)
+        mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1))
+
+    return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model,
+                        mj_data, lower_limit, upper_limit)
+
+
+def jacobian(mj_model, mj_data, q, link="ee"):
+    """
+    Compute the Jacobian of the robots.
+
+    IMPORTANT:
+        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
+        case this function will return the wrong values.
+
+    Coordinate System:
+        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+        coordinate system
+
+    Args:
+        mj_model (mujoco.MjModel):
+            mujoco MjModel of the robot-only model
+        mj_data (mujoco.MjData):
+            mujoco MjData object generated from the model
+        q (numpy.ndarray):
+            joint configuration for which the forward kinematics are computed
+        link (string, "ee"):
+            Link for which the forward kinematics is calculated. When using the iiwas the choices are
+            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+
+    Returns
+    -------
+    numpy.ndarray, (6, num_joints):
+        The Jacobian matrix for the robot kinematics.
+    """
+    return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
+
+
+def link_to_xml_name(mj_model, link):
+    try:
+        mj_model.body('iiwa_1/base')
+        link_to_frame_idx = {
+            "1": "iiwa_1/link_1",
+            "2": "iiwa_1/link_2",
+            "3": "iiwa_1/link_3",
+            "4": "iiwa_1/link_4",
+            "5": "iiwa_1/link_5",
+            "6": "iiwa_1/link_6",
+            "7": "iiwa_1/link_7",
+            "ee": "iiwa_1/striker_joint_link",
+        }
+    except:
+        link_to_frame_idx = {
+            "1": "planar_robot_1/body_1",
+            "2": "planar_robot_1/body_2",
+            "3": "planar_robot_1/body_3",
+            "ee": "planar_robot_1/body_ee",
+        }
+    return link_to_frame_idx[link]
+
+
+def _mujoco_fk(q, name, model, data):
+    data.qpos[:len(q)] = q
+    mujoco.mj_fwdPosition(model, data)
+    return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy()
+
+
+def _mujoco_jac(q, name, model, data):
+    data.qpos[:len(q)] = q
+    dtype = data.qpos.dtype
+    jac = np.empty((6, model.nv), dtype=dtype)
+    jac_pos, jac_rot = jac[:3], jac[3:]
+    mujoco.mj_fwdPosition(model, data)
+    mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
+    return jac
+
+
+def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit):
+    IT_MAX = 1000
+    eps = 1e-4
+    damp = 1e-3
+    progress_thresh = 20.0
+    max_update_norm = 0.1
+    rot_weight = 1
+    i = 0
+
+    dtype = data.qpos.dtype
+
+    data.qpos = initial_q
+
+    neg_x_quat = np.empty(4, dtype=dtype)
+    error_x_quat = np.empty(4, dtype=dtype)
+
+    if desired_pos is not None and desired_quat is not None:
+        jac = np.empty((6, model.nv), dtype=dtype)
+        err = np.empty(6, dtype=dtype)
+        jac_pos, jac_rot = jac[:3], jac[3:]
+        err_pos, err_rot = err[:3], err[3:]
+    else:
+        jac = np.empty((3, model.nv), dtype=dtype)
+        err = np.empty(3, dtype=dtype)
+        if desired_pos is not None:
+            jac_pos, jac_rot = jac, None
+            err_pos, err_rot = err, None
+        elif desired_quat is not None:
+            jac_pos, jac_rot = None, jac
+            err_pos, err_rot = None, err
+        else:
+            raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics")
+
+    while True:
+        # forward kinematics
+        mujoco.mj_fwdPosition(model, data)
+
+        x_pos = data.body(name).xpos
+        x_quat = data.body(name).xquat
+
+        error_norm = 0
+        if desired_pos is not None:
+            err_pos[:] = desired_pos - x_pos
+            error_norm += np.linalg.norm(err_pos)
+
+        if desired_quat is not None:
+            mujoco.mju_negQuat(neg_x_quat, x_quat)
+            mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat)
+            mujoco.mju_quat2Vel(err_rot, error_x_quat, 1)
+            error_norm += np.linalg.norm(err_rot) * rot_weight
+
+        if error_norm < eps:
+            success = True
+            break
+        if i >= IT_MAX:
+            success = False
+            break
+
+        mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
+
+        hess_approx = jac.T.dot(jac)
+        joint_delta = jac.T.dot(err)
+
+        hess_approx += np.eye(hess_approx.shape[0]) * damp
+        update_joints = np.linalg.solve(hess_approx, joint_delta)
+
+        update_norm = np.linalg.norm(update_joints)
+
+        # Check whether we are still making enough progress, and halt if not.
+        progress_criterion = error_norm / update_norm
+        if progress_criterion > progress_thresh:
+            success = False
+            break
+
+        if update_norm > max_update_norm:
+            update_joints *= max_update_norm / update_norm
+
+        mujoco.mj_integratePos(model, data.qpos, update_joints, 1)
+        data.qpos = np.clip(data.qpos, lower_limit, upper_limit)
+        i += 1
+    q_cur = data.qpos.copy()
+
+    return success, q_cur
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
new file mode 100644
index 0000000..6a6ee84
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
@@ -0,0 +1,65 @@
+import numpy as np
+
+
+def robot_to_world(base_frame, translation, rotation=None):
+    """
+    Transform position or rotation optional from the robot base frame to the world frame
+
+    Args
+    ----
+    base_frame: numpy.ndarray, (4,4)
+        The transformation matrix from the world to robot base frame
+    translation: ndarray, (3,)
+        The 3D position to be transformed
+    rotation: optional, ndarray, (3, 3)
+        The rotation in the matrix form to be transformed
+
+    Returns
+    -------
+    position: ndarray (3,)
+        The transformed 3D position
+    rotation: ndarray (3, 3)
+        The transformed rotation in the matrix form
+
+    """
+
+    target = np.eye(4)
+    target[:len(translation), 3] = translation
+    if rotation is not None:
+        target[:3, :3] = rotation
+
+    target_frame = base_frame @ target
+
+    return target_frame[:len(translation), 3], target_frame[:3, :3]
+
+
+def world_to_robot(base_frame, translation, rotation=None):
+    """
+    Transfrom position and rotation (optional) from the world frame to the robot's base frame
+
+    Args
+    ----
+    base_frame: ndarray, (4,4)
+        The transformation matrix from the world to robot base frame
+    translation: ndarray, (3,)
+        The 3D position to be transformed
+    rotation: optional, ndarray, (3, 3)
+        The rotation in the matrix form to be tranformed
+
+    Returns
+    -------
+    position: ndarray, (3,)
+        The transformed 3D position
+    rotation: ndarray, (3, 3)
+        The transformed rotation in the matrix form
+
+    """
+
+    target = np.eye(4)
+    target[:len(translation), 3] = translation
+    if rotation is not None:
+        target[:3, :3] = rotation
+
+    target_frame = np.linalg.inv(base_frame) @ target
+
+    return target_frame[:len(translation), 3], target_frame[:3, :3]
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
new file mode 100644
index 0000000..20b43ac
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
@@ -0,0 +1,120 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics
+
+
+class UniversalJointPlugin:
+    def __init__(self, env_model, env_data, env_info):
+        self.env_info = env_info
+        self.env_model = env_model
+        self.env_data = env_data
+        self.Kp = 20
+        self.Kd = 0.31
+
+        self.universal_joint_ids = []
+        self.universal_joint_ctrl_ids = []
+        self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id,
+                                     env_model.joint("iiwa_1/striker_joint_2").id]
+        self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id,
+                                          env_model.actuator("iiwa_1/striker_joint_2").id]
+        action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
+                       "iiwa_1/joint_6", "iiwa_1/joint_7"]
+
+        if self.env_info['n_agents'] >= 2:
+            self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id,
+                                         env_model.joint("iiwa_2/striker_joint_2").id]
+            self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id,
+                                              env_model.actuator("iiwa_2/striker_joint_2").id]
+            action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5",
+                            "iiwa_2/joint_6", "iiwa_2/joint_7"]
+
+        self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec]
+
+        self.filter_ratio = 0.273
+        self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents'])
+        self.u_joint_pos_prev = None
+        self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents'])
+
+    def reset(self):
+        self.u_joint_pos_prev = None
+        self._control_universal_joint()
+        for i in range(self.env_info['n_agents']):
+            self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids]
+
+        self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des
+
+    def update(self):
+        self._control_universal_joint()
+
+    def _control_universal_joint(self):
+        self._compute_universal_joint()
+        self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids]
+        self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \
+                                (1 - self.filter_ratio) * self.u_joint_vel_prev
+
+        Kp = 4
+        Kd = 0.31
+        torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev
+        self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque
+
+    def _compute_universal_joint(self):
+        for i in range(self.env_info['n_agents']):
+            q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]]
+            # Have to exclude the puck joints
+            pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'],
+                                              self.env_info['robot']['robot_data'], q)
+
+            v_x = rot_mat[:, 0]
+            v_y = rot_mat[:, 1]
+
+            # The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T
+            # and the current y-axis. (0, 0, 1).T x v_y
+            x_desired = np.array([-v_y[1], v_y[0], 0])
+
+            # Find the signed angle from the current to the desired x-axis around the y-axis
+            # https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane
+            q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired)
+            if self.u_joint_pos_prev is not None:
+                if q1 - self.u_joint_pos_prev[0] > np.pi:
+                    q1 -= np.pi * 2
+                elif q1 - self.u_joint_pos_prev[0] < -np.pi:
+                    q1 += np.pi * 2
+
+            # Rotate the X axis by the calculated amount
+            w = np.array([[0, -v_y[2], v_y[1]],
+                          [v_y[2], 0, -v_y[0]],
+                          [-v_y[1], v_y[0], 0]])
+
+            r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1))
+            v_x_rotated = r @ v_x
+
+            # The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current
+            # x-axis, which is already rotated around y. The negative is there because the x-axis is flipped.
+            # -((0, 0, 1).T x v_x))
+            y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0])
+
+            # Find the signed angle from the current to the desired y-axis around the new rotated x-axis
+            q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired)
+
+            if self.u_joint_pos_prev is not None:
+                if q2 - self.u_joint_pos_prev[1] > np.pi:
+                    q2 -= np.pi * 2
+                elif q2 - self.u_joint_pos_prev[1] < -np.pi:
+                    q2 += np.pi * 2
+
+            alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
+            alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
+
+            if self.u_joint_pos_prev is None:
+                self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
+            else:
+                self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum(
+                    10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]),
+                    -np.pi * 0.01), np.pi * 0.01)
+
+            self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
+            
+        return self.u_joint_pos_des
+
+    def _cross_3d(self, a, b):
+        return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]])
diff --git a/setup.py b/setup.py
index 1daa568..53ee8f1 100644
--- a/setup.py
+++ b/setup.py
@@ -7,11 +7,12 @@ from setuptools import setup, find_packages
 # Environment-specific dependencies for dmc and metaworld
 extras = {
     'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'],
-    'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'],
+    'metaworld': ['mujoco==2.3.3', 'metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'],
     'box2d': ['gymnasium[box2d]>=0.26.0'],
     'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'],
     'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'],
     'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"],
+    'mushroom-rl': ['mushroom-rl'],
 }
 
 # All dependencies
diff --git a/test/test_black_box.py b/test/test_black_box.py
index 8cdc543..c1a760a 100644
--- a/test/test_black_box.py
+++ b/test/test_black_box.py
@@ -41,7 +41,7 @@ class ToyEnv(gym.Env):
         obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
         return obs, reward, terminated, truncated, info
 
-    def render(self, mode="human"):
+    def render(self):
         pass
 
 
diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py
index aad076b..849b36f 100644
--- a/test/test_fancy_registry.py
+++ b/test/test_fancy_registry.py
@@ -33,7 +33,7 @@ class ToyEnv(gym.Env):
         obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
         return obs, reward, terminated, truncated, info
 
-    def render(self, mode="human"):
+    def render(self):
         pass
 
 
diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py
index c2edf42..db463c8 100644
--- a/test/test_replanning_sequencing.py
+++ b/test/test_replanning_sequencing.py
@@ -37,7 +37,7 @@ class ToyEnv(gym.Env):
         obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
         return obs, reward, terminated, truncated, info
 
-    def render(self, mode="human"):
+    def render(self):
         pass