Merge branch 'master' into pypi-package

2023-11-21 20:08:35 +01:00 · 2023-11-21 20:08:35 +01:00 · 99ab2344ad
commit 99ab2344ad
parent 74bede4ba3 2f4b6fcea4
72 changed files with 3065 additions and 0 deletions
--- a/fancy_gym/envs/init.py
+++ b/fancy_gym/envs/init.py
@ -289,3 +289,25 @@ register(
        'goal_switching_step': 99
    }
 )
+
+# Air Hockey environments
+for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]:
+    register(
+        id=f'fancy/AirHockey-{env_mode}-v0',
+        entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
+        max_episode_steps=500,
+        add_mp_types=[],
+        kwargs={
+            'env_mode': env_mode
+        }
+    )
+
+register(
+    id=f'fancy/AirHockey-tournament-v0',
+    entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
+    max_episode_steps=15000,
+    add_mp_types=[],
+    kwargs={
+        'env_mode': 'tournament'
+    }
+)
--- a/fancy_gym/envs/mujoco/init.py
+++ b/fancy_gym/envs/mujoco/init.py
@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv
 from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
 from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
 from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching
+
+try:
+    from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv
+except ModuleNotFoundError:
+    print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)")
--- a/fancy_gym/envs/mujoco/air_hockey/LICENSE
+++ b/fancy_gym/envs/mujoco/air_hockey/LICENSE
@ -0,0 +1,26 @@
+MIT License
+
+Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+---
+
+This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge).
+The changes are mostly focused on adapting the provided environments to fancy_gym.
--- a/fancy_gym/envs/mujoco/air_hockey/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
+++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
@ -0,0 +1,180 @@
+from copy import deepcopy
+import numpy as np
+from gymnasium import spaces
+
+import fancy_gym.envs.mujoco.air_hockey.constraints as constraints
+from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position
+from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
+from mushroom_rl.core import Environment
+
+class AirHockeyEnv(Environment):
+    metadata = {"render_modes": ["human"], "render_fps": 50}
+
+    def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs):
+        """
+        Environment Constructor
+
+        Args:
+            env [string]:
+                The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament].
+            interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
+                    polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
+                    Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
+                    of position, velocity and acceleration of the shape (20, 3, n_joints)
+        """
+
+        env_dict = {
+            "tournament": position.IiwaPositionTournament,
+
+            "7dof-hit": position.IiwaPositionHit,
+            "7dof-defend": position.IiwaPositionDefend,
+
+            "3dof-hit": position.PlanarPositionHit,
+            "3dof-defend": position.PlanarPositionDefend
+        }
+
+        if env_mode not in env_dict:
+            raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!")
+
+        if env_mode == "tournament" and type(interpolation_order) != tuple:
+            interpolation_order = (interpolation_order, interpolation_order)
+
+        self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
+        self.env_name = env_mode
+        self.env_info = self.base_env.env_info
+        single_robot_obs_size = len(self.base_env.info.observation_space.low)
+        if env_mode == "tournament":
+            self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
+        else:
+            self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
+        robot_info = self.env_info["robot"]
+
+        if env_mode != "tournament":
+            if interpolation_order in [1, 2]:
+                self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
+            if interpolation_order in [3, 4, -1]:
+                self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
+            if interpolation_order in [5]:
+                self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
+        else:
+            acts = [None, None]
+            for i in range(2):
+                if interpolation_order[i] in [1, 2]:
+                    acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
+                if interpolation_order[i] in [3, 4, -1]:
+                    acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
+                if interpolation_order[i] in [5]:
+                    acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), 
+                                            high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
+            self.action_space = spaces.Tuple((acts[0], acts[1]))
+
+        constraint_list = constraints.ConstraintList()
+        constraint_list.add(constraints.JointPositionConstraint(self.env_info))
+        constraint_list.add(constraints.JointVelocityConstraint(self.env_info))
+        constraint_list.add(constraints.EndEffectorConstraint(self.env_info))
+        if "7dof" in self.env_name or self.env_name == "tournament":
+            constraint_list.add(constraints.LinkConstraint(self.env_info))
+
+        self.env_info['constraints'] = constraint_list
+        self.env_info['env_name'] = self.env_name
+
+        self.render_mode = render_mode
+        self.render_human_active = False
+
+        super().__init__(self.base_env.info)
+
+    def step(self, action):
+        obs, reward, done, info = self.base_env.step(action)
+
+        if "tournament" in self.env_name:
+            info["constraints_value"] = list()
+            info["jerk"] = list()
+            for i in range(2):
+                obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)]
+                info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun(
+                    obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']])))
+                info["jerk"].append(
+                    self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][
+                        'n_joints']])
+
+            info["score"] = self.base_env.score
+            info["faults"] = self.base_env.faults
+
+        else:
+            info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']],
+                                                                                  obs[self.env_info['joint_vel_ids']]))
+            info["jerk"] = self.base_env.jerk
+            info["success"] = self.check_success(obs)
+
+        if self.env_info['env_name'] == "tournament":
+            obs = np.array(np.split(obs, 2))
+        
+        if self.render_human_active:
+            self.base_env.render()
+
+        return obs, reward, done, False, info
+
+    def render(self):
+        self.render_human_active = True
+
+    def reset(self, seed=None, options={}):
+        self.base_env.seed(seed)
+        obs = self.base_env.reset()
+        if self.env_info['env_name'] == "tournament":
+            obs = np.array(np.split(obs, 2))
+        return obs, {}
+
+    def check_success(self, obs):
+        puck_pos, puck_vel = self.base_env.get_puck(obs)
+
+        puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos)
+        success = 0
+
+        if "hit" in self.env_name:
+            if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \
+                    np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0:
+                success = 1
+
+        elif "defend" in self.env_name:
+            if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1:
+                success = 1
+
+        elif "prepare" in self.env_name:
+            if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1:
+                success = 1
+        return success
+
+    @property
+    def unwrapped(self):
+        return self
+    
+    def close(self):
+        self.base_env.stop()
+
+
+if __name__ == "__main__":
+    env = AirHockeyEnv(env_mode="7dof-hit")
+    env.reset()
+
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    while True:
+        action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3
+        observation, reward, done, info = env.step(action)
+        env.render()
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/constraints/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/init.py
@ -0,0 +1 @@
+from .constraints import *
--- a/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
@ -0,0 +1,212 @@
+import copy
+
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian
+
+
+class Constraint:
+    def __init__(self, env_info, output_dim, **kwargs):
+        """
+        Constructor
+
+        Args
+        ----
+        env_info: dict
+            A dictionary contains information about the environment;
+        output_dim: int
+            The output dimension of the constraints.
+        **kwargs: dict
+            A dictionary contains agent related information.
+        """
+        self._env_info = env_info
+        self._name = None
+
+        self.output_dim = output_dim
+
+        self._fun_value = np.zeros(self.output_dim)
+        self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"]))
+        self._q_prev = None
+        self._dq_prev = None
+
+    @property
+    def name(self):
+        """
+        The name of the constraints
+
+        """
+        return self._name
+
+    def fun(self, q, dq):
+        """
+        The function of the constraint.
+
+        Args
+        ----
+        q: numpy.ndarray, (num_joints,)
+            The joint position of the robot
+        dq: numpy.ndarray, (num_joints,)
+            The joint velocity of the robot
+
+        Returns
+        -------
+        numpy.ndarray, (out_dim,):
+            The value computed by the constraints function.
+        """
+        if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
+            return self._fun_value
+        else:
+            self._jacobian(q, dq)
+            return self._fun(q, dq)
+
+    def jacobian(self, q, dq):
+        """
+        Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity.
+
+        Args
+        ----
+        q: ndarray, (num_joints,)
+            The joint position of the robot
+        dq: ndarray, (num_joints,)
+            The joint velocity of the robot
+
+        Returns
+        -------
+        numpy.ndarray, (dim_output, num_joints * 2):
+            The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot]
+
+        """
+        if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
+            return self._fun_value
+        else:
+            self._fun(q, dq)
+            return self._jacobian(q, dq)
+
+    def _fun(self, q, dq):
+        raise NotImplementedError
+
+    def _jacobian(self, q, dq):
+        raise NotImplementedError
+
+
+class ConstraintList:
+    def __init__(self):
+        self.constraints = dict()
+
+    def keys(self):
+        return self.constraints.keys()
+
+    def get(self, key):
+        return self.constraints.get(key)
+
+    def add(self, c):
+        self.constraints.update({c.name: c})
+
+    def delete(self, name):
+        del self.constraints[name]
+
+    def fun(self, q, dq):
+        return {key: self.constraints[key].fun(q, dq) for key in self.constraints}
+
+    def jacobian(self, q, dq):
+        return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints}
+
+
+class JointPositionConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
+        self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95
+        self._name = 'joint_pos_constr'
+
+    def _fun(self, q, dq):
+        self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1]
+        self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye(
+            self._env_info['robot']['n_joints'])
+        self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye(
+            self._env_info['robot']['n_joints'])
+        return self._jac_value
+
+
+class JointVelocityConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
+        self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95
+        self._name = 'joint_vel_constr'
+
+    def _fun(self, q, dq):
+        self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1]
+        self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye(
+            self._env_info['robot']['n_joints'])
+        self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye(
+            self._env_info['robot']['n_joints'])
+        return self._jac_value
+
+
+class EndEffectorConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        # 1 Dimension on x direction: x > x_lb
+        # 2 Dimension on y direction: y > y_lb, y < y_ub
+        # 2 Dimension on z direction: z > z_lb, z < z_ub
+        super().__init__(env_info, output_dim=5, **kwargs)
+        self._name = "ee_constr"
+        tolerance = 0.02
+
+        self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
+        self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
+
+        self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - (
+                self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius'])
+        self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
+        self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
+        self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance
+        self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance
+
+    def _fun(self, q, dq):
+        ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q)
+        self._fun_value = np.array([-ee_pos[0] + self.x_lb,
+                                    -ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub,
+                                    -ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub])
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        jac = jacobian(self.robot_model, self.robot_data, q)
+        dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]])
+        self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']]
+        return self._jac_value
+
+
+class LinkConstraint(Constraint):
+    def __init__(self, env_info, **kwargs):
+        # 1 Dimension: wrist_z > minimum_height
+        # 2 Dimension: elbow_z > minimum_height
+        super().__init__(env_info, output_dim=2, **kwargs)
+        self._name = "link_constr"
+
+        self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
+        self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
+
+        self.z_lb = 0.25
+
+    def _fun(self, q, dq):
+        wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7")
+        elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4")
+        self._fun_value = np.array([-wrist_pos[2] + self.z_lb,
+                                    -elbow_pos[2] + self.z_lb])
+        return self._fun_value
+
+    def _jacobian(self, q, dq):
+        jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7")
+        jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4")
+        self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([
+            -jac_wrist[2, :self._env_info['robot']['n_joints']],
+            -jac_elbow[2, :self._env_info['robot']['n_joints']],
+        ])
+        return self._jac_value
--- a/fancy_gym/envs/mujoco/air_hockey/data/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/data/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
@ -0,0 +1,58 @@
+<mujoco model="AirHockeySingle">
+    <include file="iiwa1.xml"/>
+
+    <include file="iiwa2.xml"/>
+
+    <include file="../table.xml"/>
+
+    <contact>
+        <exclude body1="table_surface" body2="iiwa_1/base"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_1"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_2"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_3"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_4"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_5"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_6"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_7"/>
+        <exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
+        <exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
+
+        <exclude body1="rim" body2="iiwa_1/base"/>
+        <exclude body1="rim" body2="iiwa_1/link_1"/>
+        <exclude body1="rim" body2="iiwa_1/link_2"/>
+        <exclude body1="rim" body2="iiwa_1/link_3"/>
+        <exclude body1="rim" body2="iiwa_1/link_4"/>
+        <exclude body1="rim" body2="iiwa_1/link_5"/>
+        <exclude body1="rim" body2="iiwa_1/link_6"/>
+        <exclude body1="rim" body2="iiwa_1/link_7"/>
+        <exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
+        <exclude body1="rim" body2="iiwa_1/striker_mallet"/>
+
+        <exclude body1="world" body2="iiwa_1/striker_mallet"/>
+
+        <exclude body1="table_surface" body2="iiwa_2/base"/>
+        <exclude body1="table_surface" body2="iiwa_2/link_1"/>
+        <exclude body1="table_surface" body2="iiwa_2/link_2"/>
+        <exclude body1="table_surface" body2="iiwa_2/link_3"/>
+        <exclude body1="table_surface" body2="iiwa_2/link_4"/>
+        <exclude body1="table_surface" body2="iiwa_2/link_5"/>
+        <exclude body1="table_surface" body2="iiwa_2/link_6"/>
+        <exclude body1="table_surface" body2="iiwa_2/link_7"/>
+        <exclude body1="table_surface" body2="iiwa_2/striker_joint_link"/>
+        <exclude body1="table_surface" body2="iiwa_2/striker_mallet"/>
+
+        <exclude body1="rim" body2="iiwa_2/base"/>
+        <exclude body1="rim" body2="iiwa_2/link_1"/>
+        <exclude body1="rim" body2="iiwa_2/link_2"/>
+        <exclude body1="rim" body2="iiwa_2/link_3"/>
+        <exclude body1="rim" body2="iiwa_2/link_4"/>
+        <exclude body1="rim" body2="iiwa_2/link_5"/>
+        <exclude body1="rim" body2="iiwa_2/link_6"/>
+        <exclude body1="rim" body2="iiwa_2/link_7"/>
+        <exclude body1="rim" body2="iiwa_2/striker_joint_link"/>
+        <exclude body1="rim" body2="iiwa_2/striker_mallet"/>
+
+        <exclude body1="world" body2="iiwa_2/striker_mallet"/>
+    </contact>
+
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
@ -0,0 +1,108 @@
+<mujoco model="iiwa_1">
+    <compiler angle="radian" autolimits="true" meshdir="assets"/>
+
+    <default>
+        <default class="vis">
+            <geom contype="0" conaffinity="0"/>
+        </default>
+        <default class="robot">
+            <geom condim="4" solref="0.02 0.3" priority="2"/>
+        </default>
+    </default>
+    <asset>
+        <mesh name="link_0" file="link_0.stl"/>
+        <mesh name="link_1" file="link_1.stl"/>
+        <mesh name="link_2" file="link_2.stl"/>
+        <mesh name="link_3" file="link_3.stl"/>
+        <mesh name="link_4" file="link_4.stl"/>
+        <mesh name="link_5" file="link_5.stl"/>
+        <mesh name="link_6" file="link_6.stl"/>
+        <mesh name="link_7" file="link_7.stl"/>
+        <mesh name="EE_arm" file="EE_arm.stl"/>
+        <mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
+    </asset>
+    <worldbody>
+        <body name="iiwa_1/base" pos="-1.51 0 -0.1">
+            <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
+            <body name="iiwa_1/link_1" pos="0 0 0.1575">
+                <inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
+                          fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
+                <joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
+                       frictionloss="0.384477"/>
+                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
+                <body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
+                    <inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
+                              fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
+                    <joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
+                           frictionloss="0.496333"/>
+                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
+                    <body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
+                        <inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
+                                  fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
+                        <joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
+                               frictionloss="0.173951"/>
+                        <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
+                        <body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
+                            <inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
+                                      fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
+                            <joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
+                                   damping="0.219041" frictionloss="0.3751"/>
+                            <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
+                            <body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
+                                <inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
+                                          fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
+                                <joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
+                                       damping="0.185923" frictionloss="0.481099"/>
+                                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
+                                <body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
+                                    <inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
+                                              fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
+                                    <joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
+                                           damping="0.1" frictionloss="0.196149"/>
+                                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
+                                    <body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
+                                        <inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
+                                                  fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
+                                        <joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
+                                               damping="0.1" frictionloss="0.299238" armature="0.01"/>
+                                        <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
+                                        <geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
+                                              class="vis"/>
+                                        <body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
+                                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
+                                            <body name="iiwa_1/striker_mallet" pos="0 0 0">
+                                                <inertial pos="0 0 0.0682827" mass="0.283"
+                                                          diaginertia="0.005 0.005 0.005"/>
+                                                <joint name="iiwa_1/striker_joint_1" pos="0 0 0" axis="0 1 0"
+                                                       range="-1.5708 1.5708" damping="0.0"/>
+                                                <joint name="iiwa_1/striker_joint_2" pos="0 0 0" axis="1 0 0"
+                                                       range="-1.5708 1.5708" damping="0.0"/>
+                                                <geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
+                                                      class="vis"/>
+                                                <geom name="iiwa_1/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
+                                                      size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
+                                            </body>
+                                        </body>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+
+
+    <actuator>
+        <motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
+        <motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
+        <motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
+        <motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
+        <motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
+        <motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
+        <motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
+        <motor name="iiwa_1/striker_joint_1" joint="iiwa_1/striker_joint_1" ctrlrange="-10 10"/>
+        <motor name="iiwa_1/striker_joint_2" joint="iiwa_1/striker_joint_2" ctrlrange="-10 10"/>
+    </actuator>
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
@ -0,0 +1,89 @@
+<mujoco model="iiwa_2">
+    <compiler angle="radian" autolimits="true" meshdir="assets"/>
+
+    <worldbody>
+        <body name="iiwa_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
+            <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
+            <body name="iiwa_2/link_1" pos="0 0 0.1575">
+                <inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
+                          fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
+                <joint name="iiwa_2/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
+                       frictionloss="0.384477"/>
+                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
+                <body name="iiwa_2/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
+                    <inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
+                              fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
+                    <joint name="iiwa_2/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
+                           frictionloss="0.496333"/>
+                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
+                    <body name="iiwa_2/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
+                        <inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
+                                  fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
+                        <joint name="iiwa_2/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
+                               frictionloss="0.173951"/>
+                        <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
+                        <body name="iiwa_2/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
+                            <inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
+                                      fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
+                            <joint name="iiwa_2/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
+                                   damping="0.219041" frictionloss="0.3751"/>
+                            <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
+                            <body name="iiwa_2/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
+                                <inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
+                                          fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
+                                <joint name="iiwa_2/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
+                                       damping="0.185923" frictionloss="0.481099"/>
+                                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
+                                <body name="iiwa_2/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
+                                    <inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
+                                              fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
+                                    <joint name="iiwa_2/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
+                                           damping="0.1" frictionloss="0.196149"/>
+                                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
+                                    <body name="iiwa_2/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
+                                        <inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
+                                                  fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
+                                        <joint name="iiwa_2/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
+                                               damping="0.1" frictionloss="0.299238" armature="0.01"/>
+                                        <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
+                                        <geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
+                                              class="vis"/>
+                                        <body name="iiwa_2/striker_joint_link" pos="0 0 0.585">
+                                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
+                                            <body name="iiwa_2/striker_mallet" pos="0 0 0">
+                                                <inertial pos="0 0 0.0682827" mass="0.283"
+                                                          diaginertia="0.005 0.005 0.005"/>
+                                                <joint name="iiwa_2/striker_joint_1" pos="0 0 0" axis="0 1 0"
+                                                       range="-1.5708 1.5708" damping="0.0"/>
+                                                <joint name="iiwa_2/striker_joint_2" pos="0 0 0" axis="1 0 0"
+                                                       range="-1.5708 1.5708" damping="0.0"/>
+                                                <geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
+                                                      class="vis"/>
+                                                <geom name="iiwa_2/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
+                                                      size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
+                                            </body>
+                                        </body>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+
+
+    <actuator>
+        <motor name="iiwa_2/joint_1" joint="iiwa_2/joint_1" ctrlrange="-320 320"/>
+        <motor name="iiwa_2/joint_2" joint="iiwa_2/joint_2" ctrlrange="-320 320"/>
+        <motor name="iiwa_2/joint_3" joint="iiwa_2/joint_3" ctrlrange="-176 176"/>
+        <motor name="iiwa_2/joint_4" joint="iiwa_2/joint_4" ctrlrange="-176 176"/>
+        <motor name="iiwa_2/joint_5" joint="iiwa_2/joint_5" ctrlrange="-110 110"/>
+        <motor name="iiwa_2/joint_6" joint="iiwa_2/joint_6" ctrlrange="-40 40"/>
+        <motor name="iiwa_2/joint_7" joint="iiwa_2/joint_7" ctrlrange="-40 40"/>
+        <motor name="iiwa_2/striker_joint_1" joint="iiwa_2/striker_joint_1" ctrlrange="-10 10"/>
+        <motor name="iiwa_2/striker_joint_2" joint="iiwa_2/striker_joint_2" ctrlrange="-10 10"/>
+    </actuator>
+
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
@ -0,0 +1,94 @@
+<mujoco model="iiwa_1">
+    <compiler angle="radian" autolimits="true" meshdir="assets"/>
+
+    <default>
+        <default class="vis">
+            <geom contype="0" conaffinity="0"/>
+        </default>
+        <default class="robot">
+            <geom condim="4" solref="0.02 0.3" priority="2"/>
+        </default>
+    </default>
+    <asset>
+        <mesh name="link_0" file="link_0.stl"/>
+        <mesh name="link_1" file="link_1.stl"/>
+        <mesh name="link_2" file="link_2.stl"/>
+        <mesh name="link_3" file="link_3.stl"/>
+        <mesh name="link_4" file="link_4.stl"/>
+        <mesh name="link_5" file="link_5.stl"/>
+        <mesh name="link_6" file="link_6.stl"/>
+        <mesh name="link_7" file="link_7.stl"/>
+        <mesh name="EE_arm" file="EE_arm.stl"/>
+        <mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
+    </asset>
+    <worldbody>
+        <body name="iiwa_1/base" pos="0.0 0 0.0">
+            <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
+            <body name="iiwa_1/link_1" pos="0 0 0.1575">
+                <inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
+                          fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
+                <joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
+                       frictionloss="0.384477"/>
+                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
+                <body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
+                    <inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
+                              fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
+                    <joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
+                           frictionloss="0.496333"/>
+                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
+                    <body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
+                        <inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
+                                  fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
+                        <joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
+                               frictionloss="0.173951"/>
+                        <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
+                        <body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
+                            <inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
+                                      fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
+                            <joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
+                                   damping="0.219041" frictionloss="0.3751"/>
+                            <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
+                            <body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
+                                <inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
+                                          fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
+                                <joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
+                                       damping="0.185923" frictionloss="0.481099"/>
+                                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
+                                <body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
+                                    <inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
+                                              fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
+                                    <joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
+                                           damping="0.1" frictionloss="0.196149"/>
+                                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
+                                    <body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
+                                        <inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
+                                                  fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
+                                        <joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
+                                               damping="0.1" frictionloss="0.299238" armature="0.01"/>
+                                        <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
+                                        <geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
+                                              class="vis"/>
+                                        <body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
+                                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
+                                        </body>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+
+
+    <actuator>
+        <motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
+        <motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
+        <motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
+        <motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
+        <motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
+        <motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
+        <motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
+    </actuator>
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
@ -0,0 +1,32 @@
+<mujoco model="AirHockeySingle">
+
+    <include file="iiwa1.xml"/>
+    <include file="../table.xml"/>
+
+    <contact>
+        <exclude body1="table_surface" body2="iiwa_1/base"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_1"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_2"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_3"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_4"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_5"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_6"/>
+        <exclude body1="table_surface" body2="iiwa_1/link_7"/>
+        <exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
+        <exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
+
+        <exclude body1="rim" body2="iiwa_1/base"/>
+        <exclude body1="rim" body2="iiwa_1/link_1"/>
+        <exclude body1="rim" body2="iiwa_1/link_2"/>
+        <exclude body1="rim" body2="iiwa_1/link_3"/>
+        <exclude body1="rim" body2="iiwa_1/link_4"/>
+        <exclude body1="rim" body2="iiwa_1/link_5"/>
+        <exclude body1="rim" body2="iiwa_1/link_6"/>
+        <exclude body1="rim" body2="iiwa_1/link_7"/>
+        <exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
+        <exclude body1="rim" body2="iiwa_1/striker_mallet"/>
+
+        <exclude body1="world" body2="iiwa_1/striker_mallet"/>
+    </contact>
+
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
@ -0,0 +1,37 @@
+<mujoco model="AirHockeySingle">
+
+    <include file="planar_robot_1.xml"/>
+
+    <include file="planar_robot_2.xml"/>
+
+    <include file="../table.xml"/>
+
+    <contact>
+        <exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_3" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_2" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_1" body2="table_surface"/>
+        <exclude body1="planar_robot_1/base" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_ee" body2="rim"/>
+        <exclude body1="planar_robot_1/body_hand" body2="rim"/>
+        <exclude body1="planar_robot_1/body_3" body2="rim"/>
+        <exclude body1="planar_robot_1/body_2" body2="rim"/>
+        <exclude body1="planar_robot_1/body_1" body2="rim"/>
+        <exclude body1="planar_robot_1/base" body2="rim"/>
+
+        <exclude body1="planar_robot_2/body_ee" body2="table_surface"/>
+        <exclude body1="planar_robot_2/body_hand" body2="table_surface"/>
+        <exclude body1="planar_robot_2/body_3" body2="table_surface"/>
+        <exclude body1="planar_robot_2/body_2" body2="table_surface"/>
+        <exclude body1="planar_robot_2/body_1" body2="table_surface"/>
+        <exclude body1="planar_robot_2/base" body2="table_surface"/>
+        <exclude body1="planar_robot_2/body_ee" body2="rim"/>
+        <exclude body1="planar_robot_2/body_hand" body2="rim"/>
+        <exclude body1="planar_robot_2/body_3" body2="rim"/>
+        <exclude body1="planar_robot_2/body_2" body2="rim"/>
+        <exclude body1="planar_robot_2/body_1" body2="rim"/>
+        <exclude body1="planar_robot_2/base" body2="rim"/>
+    </contact>
+
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
@ -0,0 +1,61 @@
+<mujoco model="planar_robot_1">
+    <compiler autolimits="true" angle="radian"/>
+    <asset>
+        <material name="dark_red" rgba="0.58 0.03 0.25 1"/>
+        <material name="black" rgba="0.1 0.1 0.11 1"/>
+    </asset>
+
+    <default>
+        <default class="visual">
+            <geom contype="0" conaffinity="0"/>
+        </default>
+
+        <default class="robot">
+            <geom condim="4" solref="0.02 0.3" priority="2"/>
+        </default>
+    </default>
+
+    <worldbody>
+        <body name="planar_robot_1/base" pos="-1.51 0 -0.1">
+            <geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
+            <inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
+            <body name="planar_robot_1/body_1" pos="0 0 0.25">
+                <joint name="planar_robot_1/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
+                <inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
+                <geom class="robot" type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0"
+                      pos="0.275 0 0"/>
+                <body name="planar_robot_1/body_2" pos="0.55 0 0">
+                    <joint name="planar_robot_1/joint_2" axis="0 0 1" range="-1.8 1.8"/>
+                    <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
+                    <geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
+                          pos="0.22 0 0"/>
+                    <geom class="robot" type="sphere" material="dark_red" size="0.05"/>
+                    <body name="planar_robot_1/body_3" pos="0.44 0 0">
+                        <joint name="planar_robot_1/joint_3" axis="0 0 1"
+                               range="-2.0943951023931953 2.0943951023931953"/>
+                        <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
+                        <geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22"
+                              euler="0 1.57079632679 0" pos="0.22 0 0"/>
+                        <geom class="robot" type="sphere" material="dark_red" size="0.05"/>
+                        <body name="planar_robot_1/body_hand" pos="0.44 0 0">
+                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
+                            <geom class="robot" type="sphere" material="dark_red" size="0.05"/>
+                            <geom class="robot" type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
+                            <body name="planar_robot_1/body_ee" pos="0 0 -0.15">
+                                <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
+                                <geom class="robot" name="planar_robot_1/ee" type="cylinder" material="black"
+                                      size="0.04815 0.01" pos="0 0 0.01" friction="0 0 0"/>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+
+    <actuator>
+        <motor name="planar_robot_1/joint_1" joint="planar_robot_1/joint_1" ctrlrange="-100 100"/>
+        <motor name="planar_robot_1/joint_2" joint="planar_robot_1/joint_2" ctrlrange="-50 50"/>
+        <motor name="planar_robot_1/joint_3" joint="planar_robot_1/joint_3" ctrlrange="-30 30"/>
+    </actuator>
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
@ -0,0 +1,46 @@
+<mujoco model="planar_robot_2">
+    <compiler autolimits="true" angle="radian"/>
+
+    <worldbody>
+        <body name="planar_robot_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
+            <geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
+            <inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
+            <body name="planar_robot_2/body_1" pos="0 0 0.25">
+                <joint name="planar_robot_2/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
+                <inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
+                <geom type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0" pos="0.275 0 0"/>
+                <body name="planar_robot_2/body_2" pos="0.55 0 0">
+                    <joint name="planar_robot_2/joint_2" axis="0 0 1" range="-1.8 1.8"/>
+                    <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
+                    <geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
+                          pos="0.22 0 0"/>
+                    <geom type="sphere" material="dark_red" size="0.05"/>
+                    <body name="planar_robot_2/body_3" pos="0.44 0 0">
+                        <joint name="planar_robot_2/joint_3" axis="0 0 1"
+                               range="-2.0943951023931953 2.0943951023931953"/>
+                        <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
+                        <geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
+                              pos="0.22 0 0"/>
+                        <geom type="sphere" material="dark_red" size="0.05"/>
+                        <body name="planar_robot_2/body_hand" pos="0.44 0 0">
+                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
+                            <geom type="sphere" material="dark_red" size="0.05"/>
+                            <geom type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
+                            <body name="planar_robot_2/body_ee" pos="0 0 -0.15">
+                                <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
+                                <geom name="planar_robot_2/ee" type="cylinder" material="black" size="0.04815 0.01"
+                                      pos="0 0 0.01" friction="0 0 0"/>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </worldbody>
+
+    <actuator>
+        <motor name="planar_robot_2/joint_1" joint="planar_robot_2/joint_1" ctrlrange="-100 100"/>
+        <motor name="planar_robot_2/joint_2" joint="planar_robot_2/joint_2" ctrlrange="-50 50"/>
+        <motor name="planar_robot_2/joint_3" joint="planar_robot_2/joint_3" ctrlrange="-30 30"/>
+    </actuator>
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
@ -0,0 +1,24 @@
+<mujoco model="AirHockeySingle">
+
+
+    <include file="planar_robot_1.xml"/>
+
+    <include file="../table.xml"/>
+
+
+    <contact>
+        <exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_3" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_2" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_1" body2="table_surface"/>
+        <exclude body1="planar_robot_1/base" body2="table_surface"/>
+        <exclude body1="planar_robot_1/body_ee" body2="rim"/>
+        <exclude body1="planar_robot_1/body_hand" body2="rim"/>
+        <exclude body1="planar_robot_1/body_3" body2="rim"/>
+        <exclude body1="planar_robot_1/body_2" body2="rim"/>
+        <exclude body1="planar_robot_1/body_1" body2="rim"/>
+        <exclude body1="planar_robot_1/base" body2="rim"/>
+    </contact>
+
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/table.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/table.xml
@ -0,0 +1,89 @@
+<mujoco model="table">
+
+    <option timestep="0.001" cone="elliptic" impratio="1"/>
+
+    <asset>
+        <material name="grey" specular="0.5" shininess="0.25" rgba="0.8 0.8 0.8 1"/>
+        <material name="white" specular="0.5" shininess="0.25" rgba="1.0 1.0 1.0 1"/>
+        <material name="red" specular="0.5" shininess="0.25" rgba="1.0 0.0 0.0 1"/>
+        <material name="blue" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 1"/>
+        <material name="transparent" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 0"/>
+
+        <texture type="skybox" builtin="gradient" rgb1="0.3 0.5 0.7" rgb2="0 0 0" width="512"
+                 height="3072"/>
+        <texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.2 0.3 0.4"
+                 rgb2="0.1 0.2 0.3" markrgb="0.8 0.8 0.8" width="300" height="300"/>
+        <material name="groundplane" texture="groundplane" texuniform="true" texrepeat="5 5"
+                  reflectance="0.2"/>
+    </asset>
+
+    <default>
+        <geom condim="4" solref="0.02 0.3"/>
+        <default class="rim">
+            <geom type="box" material="grey" condim="6" friction="10000 0.0 0.0" priority="1"
+                  solref="-2000000 -250" solimp="0.99 0.999 0.001 0.5 2"/>
+        </default>
+    </default>
+
+    <worldbody>
+        <body name="table">
+            <body name="table_surface">
+                <geom name="surface" type="box" material="white" size="1.064 0.609 0.0505" pos="0 0 -0.0505"
+                      condim="4" friction="0.08 0.08 0.0" priority="1"
+                      solref="0.0125 0.5" solimp="0.9 0.999 0.001 0.5 2"/>
+            </body>
+            <body name="rim">
+<!--                <geom name="rim_home_l" class="rim" size="0.045 0.197 0.005" pos="-1.019 0.322 0.005"/>-->
+<!--                <geom name="rim_home_r" class="rim" size="0.045 0.197 0.005" pos="-1.019 -0.322 0.005"/>-->
+
+                <geom name="rim_home_l" class="rim" size="0.045 0.1945 0.005" pos="-1.019 0.3245 0.005"/>
+                <geom name="rim_home_r" class="rim" size="0.045 0.1945 0.005" pos="-1.019 -0.3245 0.005"/>
+                <geom name="rim_home_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 0.13 0.005"/>
+                <geom name="rim_home_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 0.13 0.005"/>
+                <geom name="rim_home_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 -0.13 0.005"/>
+                <geom name="rim_home_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 -0.13 0.005"/>
+
+                <geom name="rim_home_top" class="rim" size="0.045 0.519 0.01" pos="-1.019 0 0.02"/>
+
+                <geom name="rim_left" class="rim" size="1.064 0.045 0.015" pos="0 0.564 0.015"/>
+                <geom name="rim_right" class="rim" size="1.064 0.045 0.015" pos="0 -0.564 0.015"/>
+
+<!--                <geom name="rim_away_l" class="rim" size="0.045 0.197 0.005" pos="1.019 0.322 0.005"/>-->
+<!--                <geom name="rim_away_r" class="rim" size="0.045 0.197 0.005" pos="1.019 -0.322 0.005"/>-->
+
+                <geom name="rim_away_l" class="rim" size="0.045 0.1945 0.005" pos="1.019 0.3245 0.005"/>
+                <geom name="rim_away_r" class="rim" size="0.045 0.1945 0.005" pos="1.019 -0.3245 0.005"/>
+                <geom name="rim_away_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 0.13 0.005"/>
+                <geom name="rim_away_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 0.13 0.005"/>
+                <geom name="rim_away_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 -0.13 0.005"/>
+                <geom name="rim_away_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 -0.13 0.005"/>
+                <geom name="rim_away_top" class="rim" size="0.045 0.519 0.01" pos="1.019 0 0.02"/>
+            </body>
+        </body>
+
+        <body name="base" pos="0 0 0">
+            <joint name="puck_x" type="slide" axis="1 0 0" damping="0.005" limited="false"/>
+            <joint name="puck_y" type="slide" axis="0 1 0" damping="0.005" limited="false"/>
+            <joint name="puck_yaw" type="hinge" axis="0 0 1" damping="2e-6" limited="false"/>
+            <body name="puck">
+                <geom pos="0 0 0" name="puck" type="cylinder" material="red" size="0.03165 0.003"
+                      condim="4" priority="0"/>
+                <geom pos="0.02 0 0" type="cylinder" material="blue" size="0.01 0.0031"
+                      condim="4" contype="0" conaffinity="0"/>
+                <inertial pos="0 0 0" mass="0.01" diaginertia="2.5e-6 2.5e-6 5e-6"/>
+            </body>
+        </body>
+
+        <site name="puck_vis" type="ellipsoid" size="0.03165 0.03165 0.004" pos="0 0 -0.05"/>
+        <site name="puck_vis_rot" type="cylinder" size="0.0045 0.006" rgba="1 0 0 1" pos="0 0 -0.05"/>
+    </worldbody>
+
+    <contact>
+        <exclude body1="puck" body2="table_surface"/>
+    </contact>
+
+    <worldbody>
+        <light pos="0 0 3" dir="0 0 -1" directional="true"/>
+        <geom pos="0 0 -0.1" name="floor" size="0 0 0.05" type="plane" material="groundplane"/>
+    </worldbody>
+</mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
+++ b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
@ -0,0 +1,270 @@
+from collections import deque
+
+import mujoco
+import numpy as np
+import scipy
+
+from fancy_gym.envs.mujoco.air_hockey import seven_dof
+from fancy_gym.envs.mujoco.air_hockey import three_dof
+
+
+class PositionControl:
+    def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs):
+        """
+            Mixin that adds position controller to mujoco environments.
+
+            Args:
+                p_gain (float): Proportional controller gain
+                d_gain (float): Differential controller gain
+                i_gain (float): Integral controller gain
+                interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
+                    polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
+                    Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
+                    of position, velocity and acceleration of the shape (20, 3, n_joints)
+                    In the case of 2 agents it is a tuple, which describes the interpolation order for each agent
+                debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the
+                    entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk.
+            """
+
+        self.debug = debug
+
+        super(PositionControl, self).__init__(*args, **kwargs)
+
+        self.robot_model = self.env_info['robot']['robot_model']
+        self.robot_data = self.env_info['robot']['robot_data']
+
+        self.p_gain = np.array(p_gain * self.n_agents)
+        self.d_gain = np.array(d_gain * self.n_agents)
+        self.i_gain = np.array(i_gain * self.n_agents)
+
+        self.prev_pos = np.zeros(len(self.actuator_joint_ids))
+        self.prev_vel = np.zeros(len(self.actuator_joint_ids))
+        self.prev_acc = np.zeros(len(self.actuator_joint_ids))
+        self.i_error = np.zeros(len(self.actuator_joint_ids))
+        self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids))
+
+        self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,)
+
+        self._num_env_joints = len(self.actuator_joint_ids)
+        self.n_robot_joints = self.env_info['robot']["n_joints"]
+
+        self.action_shape = [None] * self.n_agents
+
+        for i in range(self.n_agents):
+            if self.interp_order[i] is None:
+                self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints)
+            elif self.interp_order[i] in [1, 2]:
+                self.action_shape[i] = (self.n_robot_joints,)
+            elif self.interp_order[i] in [3, 4, -1]:
+                self.action_shape[i] = (2, self.n_robot_joints)
+            elif self.interp_order[i] == 5:
+                self.action_shape[i] = (3, self.n_robot_joints)
+
+        self.traj = None
+
+        self.jerk = np.zeros(self._num_env_joints)
+
+        if self.debug:
+            self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
+
+    def _enforce_safety_limits(self, desired_pos, desired_vel):
+        # ROS safe controller
+        pos = self.prev_controller_cmd_pos
+        k = 20
+
+        joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents))
+        joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents))
+
+        min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1])
+
+        max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1])
+
+        clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel)
+
+        min_pos = pos + min_vel * self._timestep
+        max_pos = pos + max_vel * self._timestep
+
+        clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos)
+        self.prev_controller_cmd_pos = clipped_pos.copy()
+
+        return clipped_pos, clipped_vel
+
+    def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel):
+        clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel)
+
+        error = (clipped_pos - current_pos)
+
+        self.i_error += self.i_gain * error * self._timestep
+        torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error
+
+        # Acceleration FeedForward
+        tau_ff = np.zeros(self.robot_model.nv)
+        for i in range(self.n_agents):
+            robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i
+            self.robot_data.qpos = current_pos[robot_joint_ids]
+            self.robot_data.qvel = current_vel[robot_joint_ids]
+            acc_ff = desired_acc[robot_joint_ids]
+            mujoco.mj_forward(self.robot_model, self.robot_data)
+
+            mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff)
+            torque[robot_joint_ids] += tau_ff
+
+            # Gravity Compensation and Coriolis and Centrifugal force
+            torque[robot_joint_ids] += self.robot_data.qfrc_bias
+
+            torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids],
+                                                            self.robot_model.actuator_ctrlrange[:, 0]),
+                                                 self.robot_model.actuator_ctrlrange[:, 1])
+
+        if self.debug:
+            self.controller_record.append(
+                np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk]))
+
+        return torque
+
+    def _interpolate_trajectory(self, interp_order, action, i=0):
+        tf = self.dt
+        prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+        prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+        prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+        if interp_order == 1 and action.ndim == 1:
+            coef = np.array([[1, 0], [1, tf]])
+            results = np.vstack([prev_pos, action])
+        elif interp_order == 2 and action.ndim == 1:
+            coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]])
+            if np.linalg.norm(action - prev_pos) < 1e-3:
+                prev_vel = np.zeros_like(prev_vel)
+            results = np.vstack([prev_pos, action, prev_vel])
+        elif interp_order == 3 and action.shape[0] == 2:
+            coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]])
+            results = np.vstack([prev_pos, action[0], prev_vel, action[1]])
+        elif interp_order == 4 and action.shape[0] == 2:
+            coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4],
+                             [0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3],
+                             [0, 0, 2, 0, 0]])
+            results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc])
+        elif interp_order == 5 and action.shape[0] == 3:
+            coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5],
+                             [0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4],
+                             [0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]])
+            results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]])
+        elif interp_order == -1:
+            # Interpolate position and velocity linearly
+            pass
+        else:
+            raise ValueError("Undefined interpolator order or the action dimension does not match!")
+
+        if interp_order > 0:
+            A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints)
+            y = results.reshape(-2, order='F')
+            weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1)
+            weights_d = np.polynomial.polynomial.polyder(weights, axis=1)
+            weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
+        elif interp_order == -1:
+            weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T
+            weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T
+            weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
+
+        if interp_order in [3, 4, 5]:
+            self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep
+        else:
+            self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf
+
+        self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T)
+        self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T)
+        self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T)
+
+        for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps):
+            q = np.polynomial.polynomial.polyval(t, weights.T)
+            qd = np.polynomial.polynomial.polyval(t, weights_d.T)
+            qdd = np.polynomial.polynomial.polyval(t, weights_dd.T)
+            yield q, qd, qdd
+
+    def reset(self, obs=None):
+        obs = super(PositionControl, self).reset(obs)
+        self.prev_pos = self._data.qpos[self.actuator_joint_ids]
+        self.prev_vel = self._data.qvel[self.actuator_joint_ids]
+        self.prev_acc = np.zeros(len(self.actuator_joint_ids))
+        self.i_error = np.zeros(len(self.actuator_joint_ids))
+        self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids]
+
+        if self.debug:
+            self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
+        return obs
+
+    def _step_init(self, obs, action):
+        super(PositionControl, self)._step_init(obs, action)
+
+        if self.n_agents == 1:
+            self.traj = self._create_traj(self.interp_order[0], action)
+        else:
+            def _traj():
+                traj_1 = self._create_traj(self.interp_order[0], action[0], 0)
+                traj_2 = self._create_traj(self.interp_order[1], action[1], 1)
+
+                for a1, a2 in zip(traj_1, traj_2):
+                    yield np.hstack([a1, a2])
+
+            self.traj = _traj()
+
+    def _create_traj(self, interp_order, action, i=0):
+        if interp_order is None:
+            return iter(action)
+        return self._interpolate_trajectory(interp_order, action, i)
+
+    def _compute_action(self, obs, action):
+        cur_pos, cur_vel = self.get_joints(obs)
+
+        desired_pos, desired_vel, desired_acc = next(self.traj)
+
+        return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel)
+
+    def _preprocess_action(self, action):
+        action = super(PositionControl, self)._preprocess_action(action)
+
+        if self.n_agents == 1:
+            assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \
+                                                      f" {action.shape}"
+        else:
+            for i in range(self.n_agents):
+                assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \
+                                                          f" {action[i].shape}"
+
+        return action
+
+
+class PositionControlIIWA(PositionControl):
+    def __init__(self, *args, **kwargs):
+        p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.]
+        d_gain = [60, 80, 60, 30, 10, 1, 0.5]
+        i_gain = [0, 0, 0, 0, 0, 0, 0]
+
+        super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
+
+
+class PositionControlPlanar(PositionControl):
+    def __init__(self, *args, **kwargs):
+        p_gain = [960, 480, 240]
+        d_gain = [60, 20, 4]
+        i_gain = [0, 0, 0]
+        super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
+
+
+class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit):
+    pass
+
+
+class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
+    pass
+
+
+class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
+    pass
+
+
+class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
+    pass
+
+
+class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
+    pass
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/init.py
@ -0,0 +1,4 @@
+from .env_base import AirHockeyBase
+from .tournament import AirHockeyTournament
+from .hit import AirHockeyHit
+from .defend import AirHockeyDefend
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
@ -0,0 +1,75 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
+
+
+class AirHockeyDefend(AirHockeySingle):
+    """
+        Class for the air hockey defending task.
+        The agent should stop the puck at the line x=-0.6.
+    """
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+        self.init_velocity_range = (1, 3)
+
+        self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]])  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+    def setup(self, obs):
+        puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
+
+        lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+        angle = np.random.uniform(-0.5, 0.5)
+
+        puck_vel = np.zeros(3)
+        puck_vel[0] = -np.cos(angle) * lin_vel
+        puck_vel[1] = np.sin(angle) * lin_vel
+        puck_vel[2] = np.random.uniform(-10, 10)
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+        self._write_data("puck_x_vel", puck_vel[0])
+        self._write_data("puck_y_vel", puck_vel[1])
+        self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyDefend, self).setup(obs)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, state):
+        puck_pos, puck_vel = self.get_puck(state)
+        # If puck is over the middle line and moving towards opponent
+        if puck_pos[0] > 0 and puck_vel[0] > 0:
+            return True
+        if np.linalg.norm(puck_vel[:2]) < 0.1:
+            return True
+        return super().is_absorbing(state)
+
+
+if __name__ == '__main__':
+    env = AirHockeyDefend()
+
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    env.reset()
+    env.render()
+    while True:
+        # action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8
+        action = np.zeros(7)
+        observation, reward, done, info = env.step(action)
+        env.render()
+        print(observation)
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
@ -0,0 +1,258 @@
+import os
+
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path
+from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin
+from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
+from mushroom_rl.utils.spaces import Box
+
+
+"""
+    Abstract class for all AirHockey Environments.
+
+"""
+class AirHockeyBase(MuJoCo):
+    def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
+                 n_agents=1, viewer_params={}):
+
+        """
+            Constructor.
+
+            Args:
+                n_agents (int, 1): number of agent to be used in the environment (one or two)
+        """
+
+        self.n_agents = n_agents
+
+        action_spec = []
+        observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                            ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                            ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                            ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                            ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                            ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                           ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                           ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                           ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                           ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                           ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        collision_spec = [("puck", ["puck"]),
+                          ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
+                          ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
+
+        if 1 <= self.n_agents <= 2:
+            scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
+
+            action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
+                            "iiwa_1/joint_6", "iiwa_1/joint_7"]
+
+            observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)]
+
+            additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS),
+                                ("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS),
+                                ("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL),
+                                ("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL),
+                                ("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS),
+                                ("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL),
+                                ("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)]
+
+            collision_spec += [("robot_1/ee", ["iiwa_1/ee"])]
+
+            if self.n_agents == 2:
+                scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
+
+                observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)]
+
+                action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4",
+                                "iiwa_2/joint_5",
+                                "iiwa_2/joint_6", "iiwa_2/joint_7"]
+
+                observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)]
+
+                observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)]
+
+                additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS),
+                                    ("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS),
+                                    ("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL),
+                                    ("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL),
+                                    ("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS),
+                                    ("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL),
+                                    ("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)]
+
+                collision_spec += [("robot_2/ee", ["iiwa_2/ee"])]
+        else:
+            raise ValueError('n_agents should be 1 or 2')
+
+        self.env_info = dict()
+        self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
+        self.env_info['puck'] = {"radius": 0.03165}
+        self.env_info['mallet'] = {"radius": 0.04815}
+        self.env_info['n_agents'] = self.n_agents
+        self.env_info['robot'] = {
+            "n_joints": 7,
+            "ee_desired_height": 0.1645,
+            "joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
+                                         [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi,
+            "joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
+                                         [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10,
+            "base_frame": [],
+            "universal_height": 0.0645,
+            "control_frequency": 50,
+        }
+
+        self.env_info['puck_pos_ids'] = [0, 1, 2]
+        self.env_info['puck_vel_ids'] = [3, 4, 5]
+        self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12]
+        self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19]
+        if self.n_agents == 2:
+            self.env_info['opponent_ee_ids'] = [20, 21, 22]
+        else:
+            self.env_info['opponent_ee_ids'] = []
+
+        max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents
+
+        super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
+                         n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
+
+        # Construct the mujoco model at origin
+        robot_model = mujoco.MjModel.from_xml_path(
+            os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml"))
+        robot_model.body('iiwa_1/base').pos = np.zeros(3)
+        robot_data = mujoco.MjData(robot_model)
+
+        # Add env_info that requires mujoco models
+        self.env_info['dt'] = self.dt
+        self.env_info["robot"]["joint_pos_limit"] = np.array(
+            [self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T
+        self.env_info["robot"]["robot_model"] = robot_model
+        self.env_info["robot"]["robot_data"] = robot_data
+        self.env_info["rl_info"] = self.info
+
+        frame_T = np.eye(4)
+        temp = np.zeros((9, 1))
+        mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat)
+        frame_T[:3, :3] = temp.reshape(3, 3)
+        frame_T[:3, 3] = self._model.body("iiwa_1/base").pos
+        self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        if self.n_agents == 2:
+            mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat)
+            frame_T[:3, :3] = temp.reshape(3, 3)
+            frame_T[:3, 3] = self._model.body("iiwa_2/base").pos
+            self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        # Ids of the joint, which are controller by the action space
+        self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
+
+        self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info)
+
+    def _modify_mdp_info(self, mdp_info):
+        obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
+                            *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0]
+                                       for i in range(self.env_info['robot']['n_joints'])]),
+                            *self.env_info['robot']['joint_vel_limit'][0]])
+        obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
+                             *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1]
+                                        for i in range(self.env_info['robot']['n_joints'])]),
+                             *self.env_info['robot']['joint_vel_limit'][1]])
+        if self.n_agents == 2:
+            obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
+            obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
+        mdp_info.observation_space = Box(obs_low, obs_high)
+        return mdp_info
+
+    def _simulation_pre_step(self):
+        self.universal_joint_plugin.update()
+
+    def is_absorbing(self, obs):
+        boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
+            return True
+        return False
+
+    @staticmethod
+    def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
+        if type == 'pose':
+            puck_w = np.eye(4)
+            puck_w[:2, 3] = puck_in[:2]
+            puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
+
+            puck_r = np.linalg.inv(robot_frame) @ puck_w
+            puck_out = np.concatenate([puck_r[:2, 3],
+                                       R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
+
+        if type == 'vel':
+            rot_mat = robot_frame[:3, :3]
+
+            vel_lin = np.array([*puck_in[:2], 0])
+            vel_ang = np.array([0., 0., puck_in[2]])
+
+            vel_lin_r = rot_mat.T @ vel_lin
+            vel_ang_r = rot_mat.T @ vel_ang
+
+            puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
+        return puck_out
+
+    def get_puck(self, obs):
+        """
+        Getting the puck properties from the observations
+        Args:
+            obs: The current observation
+
+        Returns:
+            ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
+
+        """
+        puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
+        puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
+        return puck_pos, puck_vel
+
+    def get_ee(self):
+        raise NotImplementedError
+
+    def get_joints(self, obs):
+        raise NotImplementedError
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
@ -0,0 +1,171 @@
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
+from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
+
+class AirHockeyDouble(AirHockeyBase):
+    """
+    Base class for two agents air hockey tasks.
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params)
+
+        self._compute_init_state()
+
+        self.filter_ratio = 0.274
+        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
+        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
+
+    def _compute_init_state(self):
+        init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
+
+        success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
+                                                      self.env_info['robot']['robot_data'],
+                                                      np.array([0.65, 0., 0.1645]),
+                                                      R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
+                                                      initial_q=init_state)
+
+        assert success is True
+
+    def get_ee(self, robot=1):
+        """
+        Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics
+        on the current joint position, this function exists to avoid redundant computations.
+        Args:
+            robot: ID of robot, either 1 or 2
+
+        Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+        """
+        ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos")
+
+        ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel")
+
+        return ee_pos, ee_vel
+
+    def get_joints(self, obs, agent=None):
+        """
+        Get joint position and velocity of the robots
+        Can choose the robot with agent = 1 / 2. If agent is None both are returned
+        """
+        if agent:
+            q_pos = np.zeros(7)
+            q_vel = np.zeros(7)
+            for i in range(7):
+                q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0]
+                q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0]
+        else:
+            q_pos = np.zeros(14)
+            q_vel = np.zeros(14)
+            for i in range(7):
+                q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+                q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+                q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0]
+                q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0]
+
+        return q_pos, q_vel
+
+    def _create_observation(self, obs):
+        # Filter the joint velocity
+        q_pos, q_vel = self.get_joints(obs)
+        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+        self.q_pos_prev = q_pos
+        self.q_vel_prev = q_vel_filter
+
+        for i in range(7):
+            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+            self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7]
+
+        # Wrap puck's rotation angle to [-pi, pi)
+        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+        return obs
+
+    def _modify_observation(self, obs):
+        new_obs = obs.copy()
+
+        puck_pos, puck_vel = self.get_puck(new_obs)
+
+        puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+        puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2]
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2]
+
+        opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')
+        self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \
+            (np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3]
+
+        puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1])
+        puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2]
+
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1]
+        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2]
+
+        opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')
+        self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \
+            (np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3]
+
+        return new_obs
+
+    def setup(self, obs):
+        for i in range(7):
+            self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+            self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i]
+
+            self.q_pos_prev[i] = self.init_state[i]
+            self.q_pos_prev[i + 7] = self.init_state[i]
+            self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
+            self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0]
+
+        self.universal_joint_plugin.reset()
+
+        super().setup(obs)
+        # Update body positions, needed for _compute_universal_joint
+        mujoco.mj_fwdPosition(self._model, self._data)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+
+def main():
+    env = AirHockeyDouble(viewer_params={'start_paused': True})
+    env.reset()
+    env.render()
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    while True:
+        action = np.zeros(14)
+        observation, reward, done, info = env.step(action)
+        env.render()
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
+
+
+if __name__ == '__main__':
+    main()
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
@ -0,0 +1,104 @@
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
+from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
+
+class AirHockeySingle(AirHockeyBase):
+    """
+    Base class for single agent air hockey tasks.
+
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
+
+        self._compute_init_state()
+
+        self.filter_ratio = 0.274
+        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
+        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
+
+    def _compute_init_state(self):
+        init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
+
+        success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
+                                                      self.env_info['robot']['robot_data'],
+                                                      np.array([0.65, 0., 0.1645]),
+                                                      R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
+                                                      initial_q=init_state)
+
+        assert success is True
+
+    def get_ee(self):
+        """
+        Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
+        on the current joint position, this function exists to avoid redundant computations.
+
+        Returns:
+            ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+        """
+        ee_pos = self._read_data("robot_1/ee_pos")
+
+        ee_vel = self._read_data("robot_1/ee_vel")
+
+        return ee_pos, ee_vel
+
+    def get_joints(self, obs):
+        """
+        Get joint position and velocity of the robot
+        """
+        q_pos = np.zeros(7)
+        q_vel = np.zeros(7)
+        for i in range(7):
+            q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+            q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+        return q_pos, q_vel
+
+    def _create_observation(self, obs):
+        # Filter the joint velocity
+        q_pos, q_vel = self.get_joints(obs)
+        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+        self.q_pos_prev = q_pos
+        self.q_vel_prev = q_vel_filter
+
+        for i in range(7):
+            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+
+        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+        return obs
+
+    def _modify_observation(self, obs):
+        new_obs = obs.copy()
+        puck_pos, puck_vel = self.get_puck(new_obs)
+
+        puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+
+        puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
+
+        return new_obs
+
+    def setup(self, obs):
+        for i in range(7):
+            self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+            self.q_pos_prev[i] = self.init_state[i]
+            self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
+
+        self.universal_joint_plugin.reset()
+
+        super().setup(obs)
+
+        # Update body positions, needed for _compute_universal_joint
+        mujoco.mj_fwdPosition(self._model, self._data)
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
@ -0,0 +1,74 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
+
+
+class AirHockeyHit(AirHockeySingle):
+    """
+        Class for the air hockey hitting task.
+    """
+    def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}):
+        """
+            Constructor
+            Args:
+                opponent_agent(Agent, None): Agent which controls the opponent
+                moving_init(bool, False): If true, initialize the puck with inital velocity.
+        """
+        self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]])  # Table Frame
+        self.init_velocity_range = (0, 0.5)  # Table Frame
+
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+        self.moving_init = moving_init
+        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+                    self.env_info['mallet']['radius'] * 2
+        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
+        self.init_velocity_range = (0, 0.5)  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+
+
+    def setup(self, obs):
+        # Initial position of the puck
+        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+
+        if self.moving_init:
+            lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+            angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
+            puck_vel = np.zeros(3)
+            puck_vel[0] = -np.cos(angle) * lin_vel
+            puck_vel[1] = np.sin(angle) * lin_vel
+            puck_vel[2] = np.random.uniform(-2, 2)
+
+            self._write_data("puck_x_vel", puck_vel[0])
+            self._write_data("puck_y_vel", puck_vel[1])
+            self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyHit, self).setup(obs)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, obs):
+        puck_pos, puck_vel = self.get_puck(obs)
+        # Stop if the puck bounces back on the opponents wall
+        if puck_pos[0] > 0 and puck_vel[0] < 0:
+            return True
+        return super(AirHockeyHit, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+    env = AirHockeyHit(moving_init=True)
+    env.reset()
+
+    steps = 0
+    while True:
+        action = np.zeros(7)
+
+        observation, reward, done, info = env.step(action)
+        env.render()
+        if done or steps > env.info.horizon:
+            steps = 0
+            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
@ -0,0 +1,111 @@
+import mujoco
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble
+
+
+class AirHockeyTournament(AirHockeyDouble):
+    """
+        Class for the air hockey tournament. Consists of 2 robots which should play against each other.
+        When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty.
+        If a player accumulates 3 penalties his score is reduced by 1.
+    """
+    def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"):
+        self.agent_name = agent_name
+        self.opponent_name = opponent_name
+
+        self.score = [0, 0]
+        self.faults = [0, 0]
+        self.start_side = None
+
+        self.timer = 0
+
+        def custom_render_callback(viewport, context):
+            names = f"Agents \nScores \nFaults "
+            data = f"{self.agent_name} - {self.opponent_name}\n "
+            data += f"{self.score[0]} - {self.score[1]}\n "
+            data += f"{self.faults[0]} - {self.faults[1]}"
+            mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context)
+
+        viewer_params["custom_render_callback"] = custom_render_callback
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+                    self.env_info['mallet']['radius'] * 2
+
+        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
+
+    def setup(self, obs):
+        if self.start_side == None:
+            self.start_side = np.random.choice([1, -1])
+            self.prev_side = self.start_side
+
+        # Initial position of the puck
+        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+        self._write_data("puck_x_pos", puck_pos[0] * self.start_side)
+        self._write_data("puck_y_pos", puck_pos[1])
+
+        self.prev_side = self.start_side
+        self.timer = 0
+
+        super(AirHockeyTournament, self).setup(obs)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, obs):
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        # Puck stuck on one side for more than 15s
+        if np.sign(puck_pos[0]) == self.prev_side:
+            self.timer += self.dt
+        else:
+            self.prev_side *= -1
+            self.timer = 0
+
+        if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15:
+            if self.prev_side == -1:
+                self.faults[0] += 1
+                self.start_side = -1
+                if self.faults[0] % 3 == 0:
+                    self.score[1] += 1
+            else:
+                self.faults[1] += 1
+                self.start_side = 1
+                if self.faults[1] % 3 == 0:
+                    self.score[0] += 1
+
+            return True
+
+        # Puck in Goal
+        if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0:
+            if puck_pos[0] > self.env_info['table']['length'] / 2:
+                self.score[0] += 1
+                self.start_side = -1
+                return True
+
+            if puck_pos[0] < -self.env_info['table']['length'] / 2:
+                self.score[1] += 1
+                self.start_side = 1
+                return True
+
+        # Puck stuck in the middle
+        if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025:
+            return True
+        return super(AirHockeyTournament, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+    env = AirHockeyTournament()
+    env.reset()
+
+    steps = 0
+    while True:
+        action = np.zeros(14)
+        steps += 1
+        observation, reward, done, info = env.step(action)
+        env.render()
+        if done or steps > env.info.horizon:
+            steps = 0
+            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/init.py
@ -0,0 +1,3 @@
+from .env_base import AirHockeyBase
+from .defend import AirHockeyDefend
+from .hit import AirHockeyHit
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
@ -0,0 +1,76 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
+
+
+class AirHockeyDefend(AirHockeySingle):
+    """
+    Class for the air hockey defending task.
+    The agent should stop the puck at the line x=-0.6.
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        self.init_velocity_range = (1, 3)
+
+        self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]])  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+    def setup(self, state=None):
+        # Set initial puck parameters
+        puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
+
+        lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+        angle = np.random.uniform(-0.5, 0.5)
+
+        puck_vel = np.zeros(3)
+        puck_vel[0] = -np.cos(angle) * lin_vel
+        puck_vel[1] = np.sin(angle) * lin_vel
+        puck_vel[2] = np.random.uniform(-10, 10)
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+        self._write_data("puck_x_vel", puck_vel[0])
+        self._write_data("puck_y_vel", puck_vel[1])
+        self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyDefend, self).setup(state)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, state):
+        puck_pos, puck_vel = self.get_puck(state)
+        # If puck is over the middle line and moving towards opponent
+        if puck_pos[0] > 0 and puck_vel[0] > 0:
+            return True
+        if np.linalg.norm(puck_vel[:2]) < 0.1:
+            return True
+        return super().is_absorbing(state)
+
+
+if __name__ == '__main__':
+    env = AirHockeyDefend()
+
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    env.reset()
+    while True:
+        action = np.zeros(3)
+        observation, reward, done, info = env.step(action)
+        env.render()
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
@ -0,0 +1,224 @@
+import os
+
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path
+from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
+from mushroom_rl.utils.spaces import Box
+
+
+class AirHockeyBase(MuJoCo):
+    """
+    Abstract class for all AirHockey Environments.
+
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
+                 n_agents=1, viewer_params={}):
+        """
+        Constructor.
+
+        Args:
+            n_agents (int, 1): number of agent to be used in the environment (one or two)
+        """
+
+        self.n_agents = n_agents
+
+        action_spec = []
+
+        observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                            ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                            ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                            ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                            ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                            ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                           ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                           ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                           ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                           ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                           ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+        collision_spec = [("puck", ["puck"]),
+                          ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
+                          ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
+
+        if 1 <= self.n_agents <= 2:
+            scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
+
+            action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"]
+
+            observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS),
+                                 ("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL),
+                                 ("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)]
+
+            additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS),
+                                ("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)]
+
+            collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])]
+
+            if self.n_agents == 2:
+                scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
+
+                observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)]
+
+                action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"]
+                # Add puck pos/vel again to transform into second agents frame
+                observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+                                     ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+                                     ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS),
+                                     ("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL),
+                                     ("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)]
+
+                observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)]
+
+                additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS),
+                                    ("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)]
+
+                collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])]
+        else:
+            raise ValueError('n_agents should be 1 or 2')
+
+        self.env_info = dict()
+        self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
+        self.env_info['puck'] = {"radius": 0.03165}
+        self.env_info['mallet'] = {"radius": 0.04815}
+        self.env_info['n_agents'] = self.n_agents
+        self.env_info['robot'] = {
+            "n_joints": 3,
+            "ee_desired_height": 0.1,
+            "joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3],
+                                         [np.pi / 2, np.pi / 2, np.pi * 2 / 3]]),
+
+            "joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi],
+                                         [2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]),
+            "base_frame": [],
+            "control_frequency": 50,
+        }
+
+        self.env_info['puck_pos_ids'] = [0, 1, 2]
+        self.env_info['puck_vel_ids'] = [3, 4, 5]
+        self.env_info['joint_pos_ids'] = [6, 7, 8]
+        self.env_info['joint_vel_ids'] = [9, 10, 11]
+        if self.n_agents == 2:
+            self.env_info['opponent_ee_ids'] = [13, 14, 15]
+        else:
+            self.env_info['opponent_ee_ids'] = []
+
+        max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents
+
+        super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
+                         n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
+
+        # Construct the mujoco model at origin
+        robot_model = mujoco.MjModel.from_xml_path(
+            os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml"))
+        robot_model.body('planar_robot_1/base').pos = np.zeros(3)
+        robot_data = mujoco.MjData(robot_model)
+
+        # Add env_info that requires mujoco models
+        self.env_info['dt'] = self.dt
+        self.env_info["robot"]["joint_pos_limit"] = np.array(
+            [self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T
+        self.env_info["robot"]["robot_model"] = robot_model
+        self.env_info["robot"]["robot_data"] = robot_data
+        self.env_info["rl_info"] = self.info
+
+        frame_T = np.eye(4)
+        temp = np.zeros((9, 1))
+        mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat)
+        frame_T[:3, :3] = temp.reshape(3, 3)
+        frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos
+        self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        if self.n_agents == 2:
+            mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat)
+            frame_T[:3, :3] = temp.reshape(3, 3)
+            frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos
+            self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+        # Ids of the joint, which are controller by the action space
+        self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
+
+    def _modify_mdp_info(self, mdp_info):
+        obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
+                            *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0]
+                                       for i in range(self.env_info['robot']['n_joints'])]),
+                            *self.env_info['robot']['joint_vel_limit'][0]])
+        obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
+                             *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1]
+                                        for i in range(self.env_info['robot']['n_joints'])]),
+                             *self.env_info['robot']['joint_vel_limit'][1]])
+        if self.n_agents == 2:
+            obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
+            obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
+        mdp_info.observation_space = Box(obs_low, obs_high)
+        return mdp_info
+
+    def is_absorbing(self, obs):
+        boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
+            return True
+        return False
+
+    @staticmethod
+    def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
+        if type == 'pose':
+            puck_w = np.eye(4)
+            puck_w[:2, 3] = puck_in[:2]
+            puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
+
+            puck_r = np.linalg.inv(robot_frame) @ puck_w
+            puck_out = np.concatenate([puck_r[:2, 3],
+                                       R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
+
+        if type == 'vel':
+            rot_mat = robot_frame[:3, :3]
+
+            vel_lin = np.array([*puck_in[:2], 0])
+            vel_ang = np.array([0., 0., puck_in[2]])
+
+            vel_lin_r = rot_mat.T @ vel_lin
+            vel_ang_r = rot_mat.T @ vel_ang
+
+            puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
+        return puck_out
+
+    def get_puck(self, obs):
+        """
+        Getting the puck properties from the observations
+        Args:
+            obs: The current observation
+
+        Returns:
+            ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
+
+        """
+        puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_pos"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
+        puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_y_vel"),
+                                   self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
+        return puck_pos, puck_vel
+
+    def get_ee(self):
+        raise NotImplementedError
+
+    def get_joints(self, obs):
+        raise NotImplementedError
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
@ -0,0 +1,91 @@
+import mujoco
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase
+
+
+class AirHockeySingle(AirHockeyBase):
+    """
+    Base class for single agent air hockey tasks.
+
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+        """
+        Constructor.
+
+        """
+        self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414])
+        super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
+
+        self.filter_ratio = 0.274
+        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
+        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
+
+    def get_ee(self):
+        """
+        Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
+        on the current joint position, this function exists to avoid redundant computations.
+
+        Returns:
+            ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+        """
+        ee_pos = self._read_data("robot_1/ee_pos")
+
+        ee_vel = self._read_data("robot_1/ee_vel")
+
+        return ee_pos, ee_vel
+
+    def get_joints(self, obs):
+        """
+        Get joint position and velocity of the robot
+        """
+        q_pos = np.zeros(3)
+        q_vel = np.zeros(3)
+        for i in range(3):
+            q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+            q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+        return q_pos, q_vel
+
+    def _modify_observation(self, obs):
+        new_obs = obs.copy()
+        puck_pos, puck_vel = self.get_puck(obs)
+
+        puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+
+        puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
+
+        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
+        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
+        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
+
+        return new_obs
+
+    def setup(self, state=None):
+        for i in range(3):
+            self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+            self.q_pos_prev[i] = self.init_state[i]
+            self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0]
+
+        mujoco.mj_fwdPosition(self._model, self._data)
+        super().setup(state)
+
+    def _create_observation(self, obs):
+        # Filter the joint velocity
+        q_pos, q_vel = self.get_joints(obs)
+        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+        self.q_pos_prev = q_pos
+        self.q_vel_prev = q_vel_filter
+
+        for i in range(3):
+            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+
+        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+        return obs
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
@ -0,0 +1,84 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
+
+class AirHockeyHit(AirHockeySingle):
+    """
+    Class for the air hockey hitting task.
+    """
+
+    def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}):
+        """
+        Constructor
+        Args:
+            moving_init(bool, False): If true, initialize the puck with inital velocity.
+        """
+        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+        self.moving_init = moving_init
+        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+                    self.env_info['mallet']['radius'] * 2
+        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
+        self.init_velocity_range = (0, 0.5)  # Table Frame
+        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
+
+    def setup(self, state=None):
+        # Initial position of the puck
+        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+        # self.init_state = np.array([-0.9273, 0.9273, np.pi / 2])
+
+        self._write_data("puck_x_pos", puck_pos[0])
+        self._write_data("puck_y_pos", puck_pos[1])
+
+        if self.moving_init:
+            lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+            angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
+            puck_vel = np.zeros(3)
+            puck_vel[0] = -np.cos(angle) * lin_vel
+            puck_vel[1] = np.sin(angle) * lin_vel
+            puck_vel[2] = np.random.uniform(-2, 2, 1)
+
+            self._write_data("puck_x_vel", puck_vel[0])
+            self._write_data("puck_y_vel", puck_vel[1])
+            self._write_data("puck_yaw_vel", puck_vel[2])
+
+        super(AirHockeyHit, self).setup(state)
+
+    def reward(self, state, action, next_state, absorbing):
+        return 0
+
+    def is_absorbing(self, obs):
+        puck_pos, puck_vel = self.get_puck(obs)
+        # Stop if the puck bounces back on the opponents wall
+        if puck_pos[0] > 0 and puck_vel[0] < 0:
+            return True
+        return super(AirHockeyHit, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+    env = AirHockeyHit(moving_init=False)
+
+    env.reset()
+    env.render()
+    R = 0.
+    J = 0.
+    gamma = 1.
+    steps = 0
+    while True:
+        action = np.zeros(3)
+
+        observation, reward, done, info = env.step(action)
+        env.render()
+
+        gamma *= env.info.gamma
+        J += gamma * reward
+        R += reward
+        steps += 1
+        if done or steps > env.info.horizon:
+            print("J: ", J, " R: ", R)
+            R = 0.
+            J = 0.
+            gamma = 1.
+            steps = 0
+            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/utils/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/init.py
@ -0,0 +1,2 @@
+from .kinematics import inverse_kinematics, forward_kinematics, jacobian
+from .transformations import robot_to_world, world_to_robot
--- a/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
@ -0,0 +1,248 @@
+import mujoco
+import numpy as np
+
+
+def forward_kinematics(mj_model, mj_data, q, link="ee"):
+    """
+    Compute the forward kinematics of the robots.
+
+    IMPORTANT:
+        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
+        case this function will return the wrong values.
+
+    Coordinate System:
+        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+        coordinate system
+
+    Args:
+        mj_model (mujoco.MjModel):
+            mujoco MjModel of the robot-only model
+        mj_data (mujoco.MjData):
+            mujoco MjData object generated from the model
+        q (np.array):
+            joint configuration for which the forward kinematics are computed
+        link (string, "ee"):
+            Link for which the forward kinematics is calculated. When using the iiwas the choices are
+            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+
+    Returns
+    -------
+    position: numpy.ndarray, (3,)
+        Position of the link in robot's base frame
+    orientation: numpy.ndarray, (3, 3)
+        Orientation of the link in robot's base frame
+    """
+
+    return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
+
+
+def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"):
+    """
+    Compute the inverse kinematics of the robots.
+
+    IMPORTANT:
+        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is
+        the case this function will return the wrong values.
+
+    Coordinate System:
+        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+        coordinate system
+
+    Args:
+        mj_model (mujoco.MjModel):
+            mujoco MjModel of the robot-only model
+        mj_data (mujoco.MjData):
+            mujoco MjData object generated from the model
+        desired_position (numpy.ndarray, (3,)):
+            The desired position of the selected link.
+        desired_rotation (optional, numpy.array, (3,3)):
+            The desired rotation of the selected link.
+        initial_q (numpy.ndarray, None):
+            The initial configuration of the algorithm, if set to None it will take the initial configuration of the
+            mj_data.
+        link (str, "ee"):
+            Link for which the inverse kinematics is calculated. When using the iiwas the choices are
+            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+    """
+    q_init = np.zeros(mj_model.nq)
+    if initial_q is None:
+        q_init = mj_data.qpos
+    else:
+        q_init[:initial_q.size] = initial_q
+
+    q_l = mj_model.jnt_range[:, 0]
+    q_h = mj_model.jnt_range[:, 1]
+    lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2
+    upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2
+
+    desired_quat = None
+    if desired_rotation is not None:
+        desired_quat = np.zeros(4)
+        mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1))
+
+    return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model,
+                        mj_data, lower_limit, upper_limit)
+
+
+def jacobian(mj_model, mj_data, q, link="ee"):
+    """
+    Compute the Jacobian of the robots.
+
+    IMPORTANT:
+        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
+        case this function will return the wrong values.
+
+    Coordinate System:
+        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+        coordinate system
+
+    Args:
+        mj_model (mujoco.MjModel):
+            mujoco MjModel of the robot-only model
+        mj_data (mujoco.MjData):
+            mujoco MjData object generated from the model
+        q (numpy.ndarray):
+            joint configuration for which the forward kinematics are computed
+        link (string, "ee"):
+            Link for which the forward kinematics is calculated. When using the iiwas the choices are
+            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+
+    Returns
+    -------
+    numpy.ndarray, (6, num_joints):
+        The Jacobian matrix for the robot kinematics.
+    """
+    return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
+
+
+def link_to_xml_name(mj_model, link):
+    try:
+        mj_model.body('iiwa_1/base')
+        link_to_frame_idx = {
+            "1": "iiwa_1/link_1",
+            "2": "iiwa_1/link_2",
+            "3": "iiwa_1/link_3",
+            "4": "iiwa_1/link_4",
+            "5": "iiwa_1/link_5",
+            "6": "iiwa_1/link_6",
+            "7": "iiwa_1/link_7",
+            "ee": "iiwa_1/striker_joint_link",
+        }
+    except:
+        link_to_frame_idx = {
+            "1": "planar_robot_1/body_1",
+            "2": "planar_robot_1/body_2",
+            "3": "planar_robot_1/body_3",
+            "ee": "planar_robot_1/body_ee",
+        }
+    return link_to_frame_idx[link]
+
+
+def _mujoco_fk(q, name, model, data):
+    data.qpos[:len(q)] = q
+    mujoco.mj_fwdPosition(model, data)
+    return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy()
+
+
+def _mujoco_jac(q, name, model, data):
+    data.qpos[:len(q)] = q
+    dtype = data.qpos.dtype
+    jac = np.empty((6, model.nv), dtype=dtype)
+    jac_pos, jac_rot = jac[:3], jac[3:]
+    mujoco.mj_fwdPosition(model, data)
+    mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
+    return jac
+
+
+def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit):
+    IT_MAX = 1000
+    eps = 1e-4
+    damp = 1e-3
+    progress_thresh = 20.0
+    max_update_norm = 0.1
+    rot_weight = 1
+    i = 0
+
+    dtype = data.qpos.dtype
+
+    data.qpos = initial_q
+
+    neg_x_quat = np.empty(4, dtype=dtype)
+    error_x_quat = np.empty(4, dtype=dtype)
+
+    if desired_pos is not None and desired_quat is not None:
+        jac = np.empty((6, model.nv), dtype=dtype)
+        err = np.empty(6, dtype=dtype)
+        jac_pos, jac_rot = jac[:3], jac[3:]
+        err_pos, err_rot = err[:3], err[3:]
+    else:
+        jac = np.empty((3, model.nv), dtype=dtype)
+        err = np.empty(3, dtype=dtype)
+        if desired_pos is not None:
+            jac_pos, jac_rot = jac, None
+            err_pos, err_rot = err, None
+        elif desired_quat is not None:
+            jac_pos, jac_rot = None, jac
+            err_pos, err_rot = None, err
+        else:
+            raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics")
+
+    while True:
+        # forward kinematics
+        mujoco.mj_fwdPosition(model, data)
+
+        x_pos = data.body(name).xpos
+        x_quat = data.body(name).xquat
+
+        error_norm = 0
+        if desired_pos is not None:
+            err_pos[:] = desired_pos - x_pos
+            error_norm += np.linalg.norm(err_pos)
+
+        if desired_quat is not None:
+            mujoco.mju_negQuat(neg_x_quat, x_quat)
+            mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat)
+            mujoco.mju_quat2Vel(err_rot, error_x_quat, 1)
+            error_norm += np.linalg.norm(err_rot) * rot_weight
+
+        if error_norm < eps:
+            success = True
+            break
+        if i >= IT_MAX:
+            success = False
+            break
+
+        mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
+
+        hess_approx = jac.T.dot(jac)
+        joint_delta = jac.T.dot(err)
+
+        hess_approx += np.eye(hess_approx.shape[0]) * damp
+        update_joints = np.linalg.solve(hess_approx, joint_delta)
+
+        update_norm = np.linalg.norm(update_joints)
+
+        # Check whether we are still making enough progress, and halt if not.
+        progress_criterion = error_norm / update_norm
+        if progress_criterion > progress_thresh:
+            success = False
+            break
+
+        if update_norm > max_update_norm:
+            update_joints *= max_update_norm / update_norm
+
+        mujoco.mj_integratePos(model, data.qpos, update_joints, 1)
+        data.qpos = np.clip(data.qpos, lower_limit, upper_limit)
+        i += 1
+    q_cur = data.qpos.copy()
+
+    return success, q_cur
--- a/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
@ -0,0 +1,65 @@
+import numpy as np
+
+
+def robot_to_world(base_frame, translation, rotation=None):
+    """
+    Transform position or rotation optional from the robot base frame to the world frame
+
+    Args
+    ----
+    base_frame: numpy.ndarray, (4,4)
+        The transformation matrix from the world to robot base frame
+    translation: ndarray, (3,)
+        The 3D position to be transformed
+    rotation: optional, ndarray, (3, 3)
+        The rotation in the matrix form to be transformed
+
+    Returns
+    -------
+    position: ndarray (3,)
+        The transformed 3D position
+    rotation: ndarray (3, 3)
+        The transformed rotation in the matrix form
+
+    """
+
+    target = np.eye(4)
+    target[:len(translation), 3] = translation
+    if rotation is not None:
+        target[:3, :3] = rotation
+
+    target_frame = base_frame @ target
+
+    return target_frame[:len(translation), 3], target_frame[:3, :3]
+
+
+def world_to_robot(base_frame, translation, rotation=None):
+    """
+    Transfrom position and rotation (optional) from the world frame to the robot's base frame
+
+    Args
+    ----
+    base_frame: ndarray, (4,4)
+        The transformation matrix from the world to robot base frame
+    translation: ndarray, (3,)
+        The 3D position to be transformed
+    rotation: optional, ndarray, (3, 3)
+        The rotation in the matrix form to be tranformed
+
+    Returns
+    -------
+    position: ndarray, (3,)
+        The transformed 3D position
+    rotation: ndarray, (3, 3)
+        The transformed rotation in the matrix form
+
+    """
+
+    target = np.eye(4)
+    target[:len(translation), 3] = translation
+    if rotation is not None:
+        target[:3, :3] = rotation
+
+    target_frame = np.linalg.inv(base_frame) @ target
+
+    return target_frame[:len(translation), 3], target_frame[:3, :3]
--- a/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
@ -0,0 +1,120 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics
+
+
+class UniversalJointPlugin:
+    def __init__(self, env_model, env_data, env_info):
+        self.env_info = env_info
+        self.env_model = env_model
+        self.env_data = env_data
+        self.Kp = 20
+        self.Kd = 0.31
+
+        self.universal_joint_ids = []
+        self.universal_joint_ctrl_ids = []
+        self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id,
+                                     env_model.joint("iiwa_1/striker_joint_2").id]
+        self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id,
+                                          env_model.actuator("iiwa_1/striker_joint_2").id]
+        action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
+                       "iiwa_1/joint_6", "iiwa_1/joint_7"]
+
+        if self.env_info['n_agents'] >= 2:
+            self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id,
+                                         env_model.joint("iiwa_2/striker_joint_2").id]
+            self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id,
+                                              env_model.actuator("iiwa_2/striker_joint_2").id]
+            action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5",
+                            "iiwa_2/joint_6", "iiwa_2/joint_7"]
+
+        self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec]
+
+        self.filter_ratio = 0.273
+        self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents'])
+        self.u_joint_pos_prev = None
+        self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents'])
+
+    def reset(self):
+        self.u_joint_pos_prev = None
+        self._control_universal_joint()
+        for i in range(self.env_info['n_agents']):
+            self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids]
+
+        self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des
+
+    def update(self):
+        self._control_universal_joint()
+
+    def _control_universal_joint(self):
+        self._compute_universal_joint()
+        self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids]
+        self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \
+                                (1 - self.filter_ratio) * self.u_joint_vel_prev
+
+        Kp = 4
+        Kd = 0.31
+        torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev
+        self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque
+
+    def _compute_universal_joint(self):
+        for i in range(self.env_info['n_agents']):
+            q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]]
+            # Have to exclude the puck joints
+            pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'],
+                                              self.env_info['robot']['robot_data'], q)
+
+            v_x = rot_mat[:, 0]
+            v_y = rot_mat[:, 1]
+
+            # The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T
+            # and the current y-axis. (0, 0, 1).T x v_y
+            x_desired = np.array([-v_y[1], v_y[0], 0])
+
+            # Find the signed angle from the current to the desired x-axis around the y-axis
+            # https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane
+            q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired)
+            if self.u_joint_pos_prev is not None:
+                if q1 - self.u_joint_pos_prev[0] > np.pi:
+                    q1 -= np.pi * 2
+                elif q1 - self.u_joint_pos_prev[0] < -np.pi:
+                    q1 += np.pi * 2
+
+            # Rotate the X axis by the calculated amount
+            w = np.array([[0, -v_y[2], v_y[1]],
+                          [v_y[2], 0, -v_y[0]],
+                          [-v_y[1], v_y[0], 0]])
+
+            r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1))
+            v_x_rotated = r @ v_x
+
+            # The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current
+            # x-axis, which is already rotated around y. The negative is there because the x-axis is flipped.
+            # -((0, 0, 1).T x v_x))
+            y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0])
+
+            # Find the signed angle from the current to the desired y-axis around the new rotated x-axis
+            q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired)
+
+            if self.u_joint_pos_prev is not None:
+                if q2 - self.u_joint_pos_prev[1] > np.pi:
+                    q2 -= np.pi * 2
+                elif q2 - self.u_joint_pos_prev[1] < -np.pi:
+                    q2 += np.pi * 2
+
+            alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
+            alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
+
+            if self.u_joint_pos_prev is None:
+                self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
+            else:
+                self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum(
+                    10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]),
+                    -np.pi * 0.01), np.pi * 0.01)
+
+            self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
+            
+        return self.u_joint_pos_des
+
+    def _cross_3d(self, a, b):
+        return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]])
--- a/setup.py
+++ b/setup.py
@ -14,6 +14,7 @@ extras = {
    'box2d': ['gymnasium[box2d]>=0.26.0'],
    'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'],
    'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"],
+    'mushroom-rl': ['mushroom-rl'],
 }

 # All dependencies