Merge branch 'bruce_port_envs' into future

2024-01-28 12:39:18 +01:00 · 2024-01-28 12:39:18 +01:00 · fbbe312bce
commit fbbe312bce
parent acc540f757 a67637c714
88 changed files with 3808 additions and 121 deletions
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@ -2,8 +2,6 @@ name: Publish Python package to PyPI
 on:
  push:
    branches:
      - master
    tags:
      - '*'
@ -16,6 +14,17 @@ jobs:
    steps:
    - name: Check out code
      uses: actions/checkout@v4
      with:
        fetch-depth: 0  # This fetches all history for all branches and tags
    - name: Verify tag is on master branch
      run: |
        TAG_IS_ON_MASTER=$(git branch -r --contains ${{ github.ref }} | grep 'origin/master')
        if [ -z "$TAG_IS_ON_MASTER" ]; then
          echo "Tag is not on the master branch. Cancelling the workflow."
          exit 1
        fi
        echo "Tag is on the master branch. Proceeding with the workflow."
    - name: Set up Python
      uses: actions/setup-python@v4
--- a/.github/workflows/publish-to-test-pypi.yml
+++ b/.github/workflows/publish-to-test-pypi.yml
@ -2,8 +2,6 @@ name: Publish Python package to TestPyPI
 on:
  push:
    branches:
      - master
    tags:
      - '*'
@ -16,6 +14,17 @@ jobs:
    steps:
    - name: Check out code
      uses: actions/checkout@v4
      with:
        fetch-depth: 0  # This fetches all history for all branches and tags
    - name: Verify tag is on master branch
      run: |
        TAG_IS_ON_MASTER=$(git branch -r --contains ${{ github.ref }} | grep 'origin/master')
        if [ -z "$TAG_IS_ON_MASTER" ]; then
          echo "Tag is not on the master branch. Cancelling the workflow."
          exit 1
        fi
        echo "Tag is on the master branch. Proceeding with the workflow."
    - name: Set up Python
      uses: actions/setup-python@v4
--- a/README.md
+++ b/README.md
@ -33,7 +33,7 @@ While the overarching objective of MP environments remains the learning of an op
 ## Installation
-We recommend installing `fancy_gym` into a virtual environments like [venv](https://docs.python.org/3/library/venv.html). 3rd party alternatives to venv like [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/) can also be used.
+We recommend installing `fancy_gym` into a virtual environment as provided by [venv](https://docs.python.org/3/library/venv.html). 3rd party alternatives to venv like [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/) can also be used.
 ### Installation from PyPI (recommended)
@ -105,17 +105,16 @@ Regular step based environments added by Fancy Gym are added into the `fancy/` n
 import gymnasium as gym
 import fancy_gym
-env = gym.make('fancy/Reacher5d-v0')
+env = gym.make('fancy/Reacher5d-v0', render_mode='human')
-# or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS
+# or env = gym.make('metaworld/reach-v2', render_mode='human') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS
-# or env = gym.make('dm_control/ball_in_cup-catch-v0')
+# or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human')
-# or env = gym.make('Reacher-v2')
+# or env = gym.make('Reacher-v2', render_mode='human')
 observation = env.reset(seed=1)
 env.render()
 for i in range(1000):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    if i % 5 == 0:
        env.render()
    if terminated or truncated:
        observation, info = env.reset()
@ -149,17 +148,14 @@ Just keep in mind, calling `step()` executes a full trajectory.
 import gymnasium as gym
 import fancy_gym
-env = gym.make('fancy_ProMP/Reacher5d-v0')
+env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human")
-# or env = gym.make('metaworld_ProDMP/reach-v2')
+# or env = gym.make('metaworld_ProDMP/reach-v2', render_mode="human")
-# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0')
+# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0', render_mode="human")
-# or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS
+# or env = gym.make('gym_ProMP/Reacher-v2', render_mode="human") # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS
 # render() can be called once in the beginning with all necessary arguments.
 # To turn it of again just call render() without any arguments.
 env.render(mode='human')
 # This returns the context information, not the full state observation
 observation, info = env.reset(seed=1)
 env.render()
 for i in range(5):
    action = env.action_space.sample()
--- a/fancy_gym/envs/init.py
+++ b/fancy_gym/envs/init.py
@ -26,7 +26,7 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
 from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \
    BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING
 from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \
-    MAX_EPISODE_STEPS_TABLE_TENNIS
+    MAX_EPISODE_STEPS_TABLE_TENNIS, MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER
 from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper as MPWrapper_TableTennis
 from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper_Replan as MPWrapper_TableTennis_Replan
 from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper as MPWrapper_TableTennis_VelObs
@ -135,6 +135,19 @@ register(
    }
 )
 register(
    id='fancy/HopperJumpMarkov-v0',
    entry_point='fancy_gym.envs.mujoco:HopperJumpMarkovRew',
    mp_wrapper=mujoco.hopper_jump.MPWrapper,
    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
    kwargs={
        "sparse": False,
        "healthy_reward": 1.0,
        "contact_weight": 0.0,
        "height_weight": 3.0,
    }
 )
 # TODO: Add [MPs] later when finished (old TODO I moved here during refactor)
 register(
    id='fancy/AntJump-v0',
@ -289,3 +302,53 @@ register(
        'goal_switching_step': 99
    }
 )
 register(
    id='TableTennisRndRobot-v0',
    entry_point='fancy_gym.envs.mujoco:TableTennisRandomInit',
    max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS,
    kwargs={
        'random_pos_scale': 0.1,
        'random_vel_scale': 0.0,
    }
 )
 register(
    id='TableTennisMarkovian-v0',
    entry_point='fancy_gym.envs.mujoco:TableTennisMarkovian',
    max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER,
    kwargs={
    }
 )
 register(
    id='TableTennisRndRobotMarkovian-v0',
    entry_point='fancy_gym.envs.mujoco:TableTennisMarkovian',
    max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER,
    kwargs={
        'random_pos_scale': 0.1,
        'random_vel_scale': 0.0,
    }
 )
 # Air Hockey environments
 for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]:
    register(
        id=f'fancy/AirHockey-{env_mode}-v0',
        entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
        max_episode_steps=500,
        add_mp_types=[],
        kwargs={
            'env_mode': env_mode
        }
    )
 register(
    id=f'fancy/AirHockey-tournament-v0',
    entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
    max_episode_steps=15000,
    add_mp_types=[],
    kwargs={
        'env_mode': 'tournament'
    }
 )
--- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
+++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
@ -178,9 +178,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
        return False
-    def render(self, mode=None):
+    def render(self):
        if mode==None:
            mode = self.render_mode
        if self.fig is None:
            # Create base figure once on the beginning. Afterwards only update
            plt.ion()
@ -199,7 +197,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
        self.fig.gca().set_title(
            f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}")
-        if mode == "human":
+        if self.render_mode == "human":
            # arm
            self.line.set_data(self._joints[:, 0], self._joints[:, 1])
@ -207,7 +205,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
            self.fig.canvas.draw()
            self.fig.canvas.flush_events()
-        elif mode == "partial":
+        elif self.render_mode == "partial":
            if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
                # Arm
                plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
--- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
+++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
@ -98,9 +98,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
    def _check_collisions(self) -> bool:
        return self._check_self_collision()
-    def render(self, mode=None):  # pragma: no cover
+    def render(self):  # pragma: no cover
        if mode==None:
            mode = self.render_mode
        if self.fig is None:
            # Create base figure once on the beginning. Afterwards only update
            plt.ion()
--- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
+++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
@ -123,9 +123,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
    def _check_collisions(self) -> bool:
        return self._check_self_collision()
-    def render(self, mode=None):
+    def render(self):
        if mode==None:
            mode = self.render_mode
        goal_pos = self._goal.T
        via_pos = self._via_point.T
@ -148,7 +146,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
        self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
-        if mode == "human":
+        if self.render_mode == "human":
            # goal
            if self._steps == 1:
                self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
@ -160,7 +158,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
            self.fig.canvas.draw()
            self.fig.canvas.flush_events()
-        elif mode == "partial":
+        elif self.render_mode == "partial":
            if self._steps == 1:
                # fig, ax = plt.subplots()
                # Add the patch to the Axes
@ -180,7 +178,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
                plt.ylim([-1.1, lim])
                plt.pause(0.01)
-        elif mode == "final":
+        elif self.render_mode == "final":
            if self._steps == 199 or self._is_collided:
                # fig, ax = plt.subplots()
--- a/fancy_gym/envs/mujoco/init.py
+++ b/fancy_gym/envs/mujoco/init.py
@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv
 from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
 from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
 from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching
 try:
    from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv
 except ModuleNotFoundError:
    print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)")
--- a/fancy_gym/envs/mujoco/air_hockey/LICENSE
+++ b/fancy_gym/envs/mujoco/air_hockey/LICENSE
@ -0,0 +1,26 @@
 MIT License
 Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ---
 This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge).
 The changes are mostly focused on adapting the provided environments to fancy_gym.
--- a/fancy_gym/envs/mujoco/air_hockey/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
+++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
@ -0,0 +1,202 @@
 from copy import deepcopy
 import numpy as np
 from gymnasium import spaces
 import fancy_gym.envs.mujoco.air_hockey.constraints as constraints
 from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position
 from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
 from mushroom_rl.core import Environment
 class AirHockeyEnv(Environment):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
    def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, width=1920, height=1080, **kwargs):
        """
        Environment Constructor
        Args:
            env [string]:
                The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament].
            interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
                    polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
                    Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
                    of position, velocity and acceleration of the shape (20, 3, n_joints)
        """
        env_dict = {
            "tournament": position.IiwaPositionTournament,
            "7dof-hit": position.IiwaPositionHit,
            "7dof-defend": position.IiwaPositionDefend,
            "3dof-hit": position.PlanarPositionHit,
            "3dof-defend": position.PlanarPositionDefend,
            "7dof-hit-airhockit2023": position.IiwaPositionHitAirhocKIT2023,
            "7dof-defend-airhockit2023": position.IiwaPositionDefendAirhocKIT2023,
        }
        if env_mode not in env_dict:
            raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!")
        if env_mode == "tournament" and type(interpolation_order) != tuple:
            interpolation_order = (interpolation_order, interpolation_order)
        self.render_mode = render_mode
        self.render_human_active = False
        # Determine headless mode based on render_mode
        headless = self.render_mode == 'rgb_array'
        # Prepare viewer_params
        viewer_params = kwargs.get('viewer_params', {})
        viewer_params.update({'headless': headless, 'width': width, 'height': height})
        kwargs['viewer_params'] = viewer_params
        self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
        self.env_name = env_mode
        self.env_info = self.base_env.env_info
        if hasattr(self.base_env, "wrapper_obs_space") and hasattr(self.base_env, "wrapper_act_space"):
            self.observation_space = self.base_env.wrapper_obs_space
            self.action_space = self.base_env.wrapper_act_space
        else:
            single_robot_obs_size = len(self.base_env.info.observation_space.low)
            if env_mode == "tournament":
                self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
            else:
                self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
            robot_info = self.env_info["robot"]
            if env_mode != "tournament":
                if interpolation_order in [1, 2]:
                    self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
                if interpolation_order in [3, 4, -1]:
                    self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), 
                                                high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
                if interpolation_order in [5]:
                    self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), 
                                                high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
            else:
                acts = [None, None]
                for i in range(2):
                    if interpolation_order[i] in [1, 2]:
                        acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
                    if interpolation_order[i] in [3, 4, -1]:
                        acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), 
                                                high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
                    if interpolation_order[i] in [5]:
                        acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), 
                                                high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
                self.action_space = spaces.Tuple((acts[0], acts[1]))
        constraint_list = constraints.ConstraintList()
        constraint_list.add(constraints.JointPositionConstraint(self.env_info))
        constraint_list.add(constraints.JointVelocityConstraint(self.env_info))
        constraint_list.add(constraints.EndEffectorConstraint(self.env_info))
        if "7dof" in self.env_name or self.env_name == "tournament":
            constraint_list.add(constraints.LinkConstraint(self.env_info))
        self.env_info['constraints'] = constraint_list
        self.env_info['env_name'] = self.env_name
        super().__init__(self.base_env.info)
    def step(self, action):
        obs, reward, done, info = self.base_env.step(action)
        if "tournament" in self.env_name:
            info["constraints_value"] = list()
            info["jerk"] = list()
            for i in range(2):
                obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)]
                info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun(
                    obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']])))
                info["jerk"].append(
                    self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][
                        'n_joints']])
            info["score"] = self.base_env.score
            info["faults"] = self.base_env.faults
        else:
            info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']],
                                                                                  obs[self.env_info['joint_vel_ids']]))
            info["jerk"] = self.base_env.jerk
            info["success"] = self.check_success(obs)
        if self.env_info['env_name'] == "tournament":
            obs = np.array(np.split(obs, 2))
        if self.render_human_active:
            self.base_env.render()
        return obs, reward, done, False, info
    def render(self):
        if self.render_mode == 'rgb_array':
            return self.base_env.render(record = True)
        elif self.render_mode == 'human':
            self.render_human_active = True
            self.base_env.render()
        else:
            raise ValueError(f"Unsupported render mode: '{self.render_mode}'")
    def reset(self, seed=None, options={}):
        self.base_env.seed(seed)
        obs = self.base_env.reset()
        if self.env_info['env_name'] == "tournament":
            obs = np.array(np.split(obs, 2))
        return obs, {}
    def check_success(self, obs):
        puck_pos, puck_vel = self.base_env.get_puck(obs)
        puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos)
        success = 0
        if "hit" in self.env_name:
            if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \
                    np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0:
                success = 1
        elif "defend" in self.env_name:
            if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1:
                success = 1
        elif "prepare" in self.env_name:
            if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1:
                success = 1
        return success
    @property
    def unwrapped(self):
        return self
    def close(self):
        self.base_env.stop()
 if __name__ == "__main__":
    env = AirHockeyEnv(env_mode="7dof-hit")
    env.reset()
    R = 0.
    J = 0.
    gamma = 1.
    steps = 0
    while True:
        action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3
        observation, reward, done, info = env.step(action)
        env.render()
        gamma *= env.info.gamma
        J += gamma * reward
        R += reward
        steps += 1
        if done or steps > env.info.horizon:
            print("J: ", J, " R: ", R)
            R = 0.
            J = 0.
            gamma = 1.
            steps = 0
            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/constraints/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/init.py
@ -0,0 +1 @@
 from .constraints import *
--- a/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
@ -0,0 +1,212 @@
 import copy
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian
 class Constraint:
    def __init__(self, env_info, output_dim, **kwargs):
        """
        Constructor
        Args
        ----
        env_info: dict
            A dictionary contains information about the environment;
        output_dim: int
            The output dimension of the constraints.
        **kwargs: dict
            A dictionary contains agent related information.
        """
        self._env_info = env_info
        self._name = None
        self.output_dim = output_dim
        self._fun_value = np.zeros(self.output_dim)
        self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"]))
        self._q_prev = None
        self._dq_prev = None
    @property
    def name(self):
        """
        The name of the constraints
        """
        return self._name
    def fun(self, q, dq):
        """
        The function of the constraint.
        Args
        ----
        q: numpy.ndarray, (num_joints,)
            The joint position of the robot
        dq: numpy.ndarray, (num_joints,)
            The joint velocity of the robot
        Returns
        -------
        numpy.ndarray, (out_dim,):
            The value computed by the constraints function.
        """
        if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
            return self._fun_value
        else:
            self._jacobian(q, dq)
            return self._fun(q, dq)
    def jacobian(self, q, dq):
        """
        Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity.
        Args
        ----
        q: ndarray, (num_joints,)
            The joint position of the robot
        dq: ndarray, (num_joints,)
            The joint velocity of the robot
        Returns
        -------
        numpy.ndarray, (dim_output, num_joints * 2):
            The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot]
        """
        if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
            return self._fun_value
        else:
            self._fun(q, dq)
            return self._jacobian(q, dq)
    def _fun(self, q, dq):
        raise NotImplementedError
    def _jacobian(self, q, dq):
        raise NotImplementedError
 class ConstraintList:
    def __init__(self):
        self.constraints = dict()
    def keys(self):
        return self.constraints.keys()
    def get(self, key):
        return self.constraints.get(key)
    def add(self, c):
        self.constraints.update({c.name: c})
    def delete(self, name):
        del self.constraints[name]
    def fun(self, q, dq):
        return {key: self.constraints[key].fun(q, dq) for key in self.constraints}
    def jacobian(self, q, dq):
        return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints}
 class JointPositionConstraint(Constraint):
    def __init__(self, env_info, **kwargs):
        super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
        self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95
        self._name = 'joint_pos_constr'
    def _fun(self, q, dq):
        self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1]
        self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q
        return self._fun_value
    def _jacobian(self, q, dq):
        self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye(
            self._env_info['robot']['n_joints'])
        self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye(
            self._env_info['robot']['n_joints'])
        return self._jac_value
 class JointVelocityConstraint(Constraint):
    def __init__(self, env_info, **kwargs):
        super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
        self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95
        self._name = 'joint_vel_constr'
    def _fun(self, q, dq):
        self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1]
        self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq
        return self._fun_value
    def _jacobian(self, q, dq):
        self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye(
            self._env_info['robot']['n_joints'])
        self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye(
            self._env_info['robot']['n_joints'])
        return self._jac_value
 class EndEffectorConstraint(Constraint):
    def __init__(self, env_info, **kwargs):
        # 1 Dimension on x direction: x > x_lb
        # 2 Dimension on y direction: y > y_lb, y < y_ub
        # 2 Dimension on z direction: z > z_lb, z < z_ub
        super().__init__(env_info, output_dim=5, **kwargs)
        self._name = "ee_constr"
        tolerance = 0.02
        self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
        self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
        self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - (
                self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius'])
        self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
        self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
        self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance
        self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance
    def _fun(self, q, dq):
        ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q)
        self._fun_value = np.array([-ee_pos[0] + self.x_lb,
                                    -ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub,
                                    -ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub])
        return self._fun_value
    def _jacobian(self, q, dq):
        jac = jacobian(self.robot_model, self.robot_data, q)
        dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]])
        self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']]
        return self._jac_value
 class LinkConstraint(Constraint):
    def __init__(self, env_info, **kwargs):
        # 1 Dimension: wrist_z > minimum_height
        # 2 Dimension: elbow_z > minimum_height
        super().__init__(env_info, output_dim=2, **kwargs)
        self._name = "link_constr"
        self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
        self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
        self.z_lb = 0.25
    def _fun(self, q, dq):
        wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7")
        elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4")
        self._fun_value = np.array([-wrist_pos[2] + self.z_lb,
                                    -elbow_pos[2] + self.z_lb])
        return self._fun_value
    def _jacobian(self, q, dq):
        jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7")
        jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4")
        self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([
            -jac_wrist[2, :self._env_info['robot']['n_joints']],
            -jac_elbow[2, :self._env_info['robot']['n_joints']],
        ])
        return self._jac_value
--- a/fancy_gym/envs/mujoco/air_hockey/data/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/data/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
@ -0,0 +1,58 @@
 <mujoco model="AirHockeySingle">
    <include file="iiwa1.xml"/>
    <include file="iiwa2.xml"/>
    <include file="../table.xml"/>
    <contact>
        <exclude body1="table_surface" body2="iiwa_1/base"/>
        <exclude body1="table_surface" body2="iiwa_1/link_1"/>
        <exclude body1="table_surface" body2="iiwa_1/link_2"/>
        <exclude body1="table_surface" body2="iiwa_1/link_3"/>
        <exclude body1="table_surface" body2="iiwa_1/link_4"/>
        <exclude body1="table_surface" body2="iiwa_1/link_5"/>
        <exclude body1="table_surface" body2="iiwa_1/link_6"/>
        <exclude body1="table_surface" body2="iiwa_1/link_7"/>
        <exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
        <exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
        <exclude body1="rim" body2="iiwa_1/base"/>
        <exclude body1="rim" body2="iiwa_1/link_1"/>
        <exclude body1="rim" body2="iiwa_1/link_2"/>
        <exclude body1="rim" body2="iiwa_1/link_3"/>
        <exclude body1="rim" body2="iiwa_1/link_4"/>
        <exclude body1="rim" body2="iiwa_1/link_5"/>
        <exclude body1="rim" body2="iiwa_1/link_6"/>
        <exclude body1="rim" body2="iiwa_1/link_7"/>
        <exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
        <exclude body1="rim" body2="iiwa_1/striker_mallet"/>
        <exclude body1="world" body2="iiwa_1/striker_mallet"/>
        <exclude body1="table_surface" body2="iiwa_2/base"/>
        <exclude body1="table_surface" body2="iiwa_2/link_1"/>
        <exclude body1="table_surface" body2="iiwa_2/link_2"/>
        <exclude body1="table_surface" body2="iiwa_2/link_3"/>
        <exclude body1="table_surface" body2="iiwa_2/link_4"/>
        <exclude body1="table_surface" body2="iiwa_2/link_5"/>
        <exclude body1="table_surface" body2="iiwa_2/link_6"/>
        <exclude body1="table_surface" body2="iiwa_2/link_7"/>
        <exclude body1="table_surface" body2="iiwa_2/striker_joint_link"/>
        <exclude body1="table_surface" body2="iiwa_2/striker_mallet"/>
        <exclude body1="rim" body2="iiwa_2/base"/>
        <exclude body1="rim" body2="iiwa_2/link_1"/>
        <exclude body1="rim" body2="iiwa_2/link_2"/>
        <exclude body1="rim" body2="iiwa_2/link_3"/>
        <exclude body1="rim" body2="iiwa_2/link_4"/>
        <exclude body1="rim" body2="iiwa_2/link_5"/>
        <exclude body1="rim" body2="iiwa_2/link_6"/>
        <exclude body1="rim" body2="iiwa_2/link_7"/>
        <exclude body1="rim" body2="iiwa_2/striker_joint_link"/>
        <exclude body1="rim" body2="iiwa_2/striker_mallet"/>
        <exclude body1="world" body2="iiwa_2/striker_mallet"/>
    </contact>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
@ -0,0 +1,108 @@
 <mujoco model="iiwa_1">
    <compiler angle="radian" autolimits="true" meshdir="assets"/>
    <default>
        <default class="vis">
            <geom contype="0" conaffinity="0"/>
        </default>
        <default class="robot">
            <geom condim="4" solref="0.02 0.3" priority="2"/>
        </default>
    </default>
    <asset>
        <mesh name="link_0" file="link_0.stl"/>
        <mesh name="link_1" file="link_1.stl"/>
        <mesh name="link_2" file="link_2.stl"/>
        <mesh name="link_3" file="link_3.stl"/>
        <mesh name="link_4" file="link_4.stl"/>
        <mesh name="link_5" file="link_5.stl"/>
        <mesh name="link_6" file="link_6.stl"/>
        <mesh name="link_7" file="link_7.stl"/>
        <mesh name="EE_arm" file="EE_arm.stl"/>
        <mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
    </asset>
    <worldbody>
        <body name="iiwa_1/base" pos="-1.51 0 -0.1">
            <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
            <body name="iiwa_1/link_1" pos="0 0 0.1575">
                <inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
                          fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
                <joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
                       frictionloss="0.384477"/>
                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
                <body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
                    <inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
                              fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
                    <joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
                           frictionloss="0.496333"/>
                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
                    <body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
                        <inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
                                  fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
                        <joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
                               frictionloss="0.173951"/>
                        <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
                        <body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
                            <inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
                                      fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
                            <joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
                                   damping="0.219041" frictionloss="0.3751"/>
                            <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
                            <body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
                                <inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
                                          fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
                                <joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
                                       damping="0.185923" frictionloss="0.481099"/>
                                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
                                <body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
                                    <inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
                                              fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
                                    <joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
                                           damping="0.1" frictionloss="0.196149"/>
                                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
                                    <body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
                                        <inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
                                                  fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
                                        <joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
                                               damping="0.1" frictionloss="0.299238" armature="0.01"/>
                                        <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
                                        <geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
                                              class="vis"/>
                                        <body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
                                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
                                            <body name="iiwa_1/striker_mallet" pos="0 0 0">
                                                <inertial pos="0 0 0.0682827" mass="0.283"
                                                          diaginertia="0.005 0.005 0.005"/>
                                                <joint name="iiwa_1/striker_joint_1" pos="0 0 0" axis="0 1 0"
                                                       range="-1.5708 1.5708" damping="0.0"/>
                                                <joint name="iiwa_1/striker_joint_2" pos="0 0 0" axis="1 0 0"
                                                       range="-1.5708 1.5708" damping="0.0"/>
                                                <geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
                                                      class="vis"/>
                                                <geom name="iiwa_1/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
                                                      size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
                                            </body>
                                        </body>
                                    </body>
                                </body>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
        </body>
    </worldbody>
    <actuator>
        <motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
        <motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
        <motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
        <motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
        <motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
        <motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
        <motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
        <motor name="iiwa_1/striker_joint_1" joint="iiwa_1/striker_joint_1" ctrlrange="-10 10"/>
        <motor name="iiwa_1/striker_joint_2" joint="iiwa_1/striker_joint_2" ctrlrange="-10 10"/>
    </actuator>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
@ -0,0 +1,89 @@
 <mujoco model="iiwa_2">
    <compiler angle="radian" autolimits="true" meshdir="assets"/>
    <worldbody>
        <body name="iiwa_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
            <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
            <body name="iiwa_2/link_1" pos="0 0 0.1575">
                <inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
                          fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
                <joint name="iiwa_2/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
                       frictionloss="0.384477"/>
                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
                <body name="iiwa_2/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
                    <inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
                              fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
                    <joint name="iiwa_2/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
                           frictionloss="0.496333"/>
                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
                    <body name="iiwa_2/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
                        <inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
                                  fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
                        <joint name="iiwa_2/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
                               frictionloss="0.173951"/>
                        <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
                        <body name="iiwa_2/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
                            <inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
                                      fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
                            <joint name="iiwa_2/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
                                   damping="0.219041" frictionloss="0.3751"/>
                            <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
                            <body name="iiwa_2/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
                                <inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
                                          fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
                                <joint name="iiwa_2/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
                                       damping="0.185923" frictionloss="0.481099"/>
                                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
                                <body name="iiwa_2/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
                                    <inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
                                              fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
                                    <joint name="iiwa_2/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
                                           damping="0.1" frictionloss="0.196149"/>
                                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
                                    <body name="iiwa_2/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
                                        <inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
                                                  fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
                                        <joint name="iiwa_2/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
                                               damping="0.1" frictionloss="0.299238" armature="0.01"/>
                                        <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
                                        <geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
                                              class="vis"/>
                                        <body name="iiwa_2/striker_joint_link" pos="0 0 0.585">
                                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
                                            <body name="iiwa_2/striker_mallet" pos="0 0 0">
                                                <inertial pos="0 0 0.0682827" mass="0.283"
                                                          diaginertia="0.005 0.005 0.005"/>
                                                <joint name="iiwa_2/striker_joint_1" pos="0 0 0" axis="0 1 0"
                                                       range="-1.5708 1.5708" damping="0.0"/>
                                                <joint name="iiwa_2/striker_joint_2" pos="0 0 0" axis="1 0 0"
                                                       range="-1.5708 1.5708" damping="0.0"/>
                                                <geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
                                                      class="vis"/>
                                                <geom name="iiwa_2/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
                                                      size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
                                            </body>
                                        </body>
                                    </body>
                                </body>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
        </body>
    </worldbody>
    <actuator>
        <motor name="iiwa_2/joint_1" joint="iiwa_2/joint_1" ctrlrange="-320 320"/>
        <motor name="iiwa_2/joint_2" joint="iiwa_2/joint_2" ctrlrange="-320 320"/>
        <motor name="iiwa_2/joint_3" joint="iiwa_2/joint_3" ctrlrange="-176 176"/>
        <motor name="iiwa_2/joint_4" joint="iiwa_2/joint_4" ctrlrange="-176 176"/>
        <motor name="iiwa_2/joint_5" joint="iiwa_2/joint_5" ctrlrange="-110 110"/>
        <motor name="iiwa_2/joint_6" joint="iiwa_2/joint_6" ctrlrange="-40 40"/>
        <motor name="iiwa_2/joint_7" joint="iiwa_2/joint_7" ctrlrange="-40 40"/>
        <motor name="iiwa_2/striker_joint_1" joint="iiwa_2/striker_joint_1" ctrlrange="-10 10"/>
        <motor name="iiwa_2/striker_joint_2" joint="iiwa_2/striker_joint_2" ctrlrange="-10 10"/>
    </actuator>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
@ -0,0 +1,94 @@
 <mujoco model="iiwa_1">
    <compiler angle="radian" autolimits="true" meshdir="assets"/>
    <default>
        <default class="vis">
            <geom contype="0" conaffinity="0"/>
        </default>
        <default class="robot">
            <geom condim="4" solref="0.02 0.3" priority="2"/>
        </default>
    </default>
    <asset>
        <mesh name="link_0" file="link_0.stl"/>
        <mesh name="link_1" file="link_1.stl"/>
        <mesh name="link_2" file="link_2.stl"/>
        <mesh name="link_3" file="link_3.stl"/>
        <mesh name="link_4" file="link_4.stl"/>
        <mesh name="link_5" file="link_5.stl"/>
        <mesh name="link_6" file="link_6.stl"/>
        <mesh name="link_7" file="link_7.stl"/>
        <mesh name="EE_arm" file="EE_arm.stl"/>
        <mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
    </asset>
    <worldbody>
        <body name="iiwa_1/base" pos="0.0 0 0.0">
            <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
            <body name="iiwa_1/link_1" pos="0 0 0.1575">
                <inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
                          fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
                <joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
                       frictionloss="0.384477"/>
                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
                <body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
                    <inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
                              fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
                    <joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
                           frictionloss="0.496333"/>
                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
                    <body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
                        <inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
                                  fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
                        <joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
                               frictionloss="0.173951"/>
                        <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
                        <body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
                            <inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
                                      fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
                            <joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
                                   damping="0.219041" frictionloss="0.3751"/>
                            <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
                            <body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
                                <inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
                                          fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
                                <joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
                                       damping="0.185923" frictionloss="0.481099"/>
                                <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
                                <body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
                                    <inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
                                              fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
                                    <joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
                                           damping="0.1" frictionloss="0.196149"/>
                                    <geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
                                    <body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
                                        <inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
                                                  fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
                                        <joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
                                               damping="0.1" frictionloss="0.299238" armature="0.01"/>
                                        <geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
                                        <geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
                                              class="vis"/>
                                        <body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
                                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
                                        </body>
                                    </body>
                                </body>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
        </body>
    </worldbody>
    <actuator>
        <motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
        <motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
        <motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
        <motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
        <motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
        <motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
        <motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
    </actuator>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl
--- a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
@ -0,0 +1,32 @@
 <mujoco model="AirHockeySingle">
    <include file="iiwa1.xml"/>
    <include file="../table.xml"/>
    <contact>
        <exclude body1="table_surface" body2="iiwa_1/base"/>
        <exclude body1="table_surface" body2="iiwa_1/link_1"/>
        <exclude body1="table_surface" body2="iiwa_1/link_2"/>
        <exclude body1="table_surface" body2="iiwa_1/link_3"/>
        <exclude body1="table_surface" body2="iiwa_1/link_4"/>
        <exclude body1="table_surface" body2="iiwa_1/link_5"/>
        <exclude body1="table_surface" body2="iiwa_1/link_6"/>
        <exclude body1="table_surface" body2="iiwa_1/link_7"/>
        <exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
        <exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
        <exclude body1="rim" body2="iiwa_1/base"/>
        <exclude body1="rim" body2="iiwa_1/link_1"/>
        <exclude body1="rim" body2="iiwa_1/link_2"/>
        <exclude body1="rim" body2="iiwa_1/link_3"/>
        <exclude body1="rim" body2="iiwa_1/link_4"/>
        <exclude body1="rim" body2="iiwa_1/link_5"/>
        <exclude body1="rim" body2="iiwa_1/link_6"/>
        <exclude body1="rim" body2="iiwa_1/link_7"/>
        <exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
        <exclude body1="rim" body2="iiwa_1/striker_mallet"/>
        <exclude body1="world" body2="iiwa_1/striker_mallet"/>
    </contact>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/init.py
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
@ -0,0 +1,37 @@
 <mujoco model="AirHockeySingle">
    <include file="planar_robot_1.xml"/>
    <include file="planar_robot_2.xml"/>
    <include file="../table.xml"/>
    <contact>
        <exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_3" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_2" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_1" body2="table_surface"/>
        <exclude body1="planar_robot_1/base" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_ee" body2="rim"/>
        <exclude body1="planar_robot_1/body_hand" body2="rim"/>
        <exclude body1="planar_robot_1/body_3" body2="rim"/>
        <exclude body1="planar_robot_1/body_2" body2="rim"/>
        <exclude body1="planar_robot_1/body_1" body2="rim"/>
        <exclude body1="planar_robot_1/base" body2="rim"/>
        <exclude body1="planar_robot_2/body_ee" body2="table_surface"/>
        <exclude body1="planar_robot_2/body_hand" body2="table_surface"/>
        <exclude body1="planar_robot_2/body_3" body2="table_surface"/>
        <exclude body1="planar_robot_2/body_2" body2="table_surface"/>
        <exclude body1="planar_robot_2/body_1" body2="table_surface"/>
        <exclude body1="planar_robot_2/base" body2="table_surface"/>
        <exclude body1="planar_robot_2/body_ee" body2="rim"/>
        <exclude body1="planar_robot_2/body_hand" body2="rim"/>
        <exclude body1="planar_robot_2/body_3" body2="rim"/>
        <exclude body1="planar_robot_2/body_2" body2="rim"/>
        <exclude body1="planar_robot_2/body_1" body2="rim"/>
        <exclude body1="planar_robot_2/base" body2="rim"/>
    </contact>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
@ -0,0 +1,61 @@
 <mujoco model="planar_robot_1">
    <compiler autolimits="true" angle="radian"/>
    <asset>
        <material name="dark_red" rgba="0.58 0.03 0.25 1"/>
        <material name="black" rgba="0.1 0.1 0.11 1"/>
    </asset>
    <default>
        <default class="visual">
            <geom contype="0" conaffinity="0"/>
        </default>
        <default class="robot">
            <geom condim="4" solref="0.02 0.3" priority="2"/>
        </default>
    </default>
    <worldbody>
        <body name="planar_robot_1/base" pos="-1.51 0 -0.1">
            <geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
            <inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
            <body name="planar_robot_1/body_1" pos="0 0 0.25">
                <joint name="planar_robot_1/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
                <inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
                <geom class="robot" type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0"
                      pos="0.275 0 0"/>
                <body name="planar_robot_1/body_2" pos="0.55 0 0">
                    <joint name="planar_robot_1/joint_2" axis="0 0 1" range="-1.8 1.8"/>
                    <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
                    <geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
                          pos="0.22 0 0"/>
                    <geom class="robot" type="sphere" material="dark_red" size="0.05"/>
                    <body name="planar_robot_1/body_3" pos="0.44 0 0">
                        <joint name="planar_robot_1/joint_3" axis="0 0 1"
                               range="-2.0943951023931953 2.0943951023931953"/>
                        <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
                        <geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22"
                              euler="0 1.57079632679 0" pos="0.22 0 0"/>
                        <geom class="robot" type="sphere" material="dark_red" size="0.05"/>
                        <body name="planar_robot_1/body_hand" pos="0.44 0 0">
                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
                            <geom class="robot" type="sphere" material="dark_red" size="0.05"/>
                            <geom class="robot" type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
                            <body name="planar_robot_1/body_ee" pos="0 0 -0.15">
                                <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
                                <geom class="robot" name="planar_robot_1/ee" type="cylinder" material="black"
                                      size="0.04815 0.01" pos="0 0 0.01" friction="0 0 0"/>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
        </body>
    </worldbody>
    <actuator>
        <motor name="planar_robot_1/joint_1" joint="planar_robot_1/joint_1" ctrlrange="-100 100"/>
        <motor name="planar_robot_1/joint_2" joint="planar_robot_1/joint_2" ctrlrange="-50 50"/>
        <motor name="planar_robot_1/joint_3" joint="planar_robot_1/joint_3" ctrlrange="-30 30"/>
    </actuator>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
@ -0,0 +1,46 @@
 <mujoco model="planar_robot_2">
    <compiler autolimits="true" angle="radian"/>
    <worldbody>
        <body name="planar_robot_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
            <geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
            <inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
            <body name="planar_robot_2/body_1" pos="0 0 0.25">
                <joint name="planar_robot_2/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
                <inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
                <geom type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0" pos="0.275 0 0"/>
                <body name="planar_robot_2/body_2" pos="0.55 0 0">
                    <joint name="planar_robot_2/joint_2" axis="0 0 1" range="-1.8 1.8"/>
                    <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
                    <geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
                          pos="0.22 0 0"/>
                    <geom type="sphere" material="dark_red" size="0.05"/>
                    <body name="planar_robot_2/body_3" pos="0.44 0 0">
                        <joint name="planar_robot_2/joint_3" axis="0 0 1"
                               range="-2.0943951023931953 2.0943951023931953"/>
                        <inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
                        <geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
                              pos="0.22 0 0"/>
                        <geom type="sphere" material="dark_red" size="0.05"/>
                        <body name="planar_robot_2/body_hand" pos="0.44 0 0">
                            <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
                            <geom type="sphere" material="dark_red" size="0.05"/>
                            <geom type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
                            <body name="planar_robot_2/body_ee" pos="0 0 -0.15">
                                <inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
                                <geom name="planar_robot_2/ee" type="cylinder" material="black" size="0.04815 0.01"
                                      pos="0 0 0.01" friction="0 0 0"/>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
        </body>
    </worldbody>
    <actuator>
        <motor name="planar_robot_2/joint_1" joint="planar_robot_2/joint_1" ctrlrange="-100 100"/>
        <motor name="planar_robot_2/joint_2" joint="planar_robot_2/joint_2" ctrlrange="-50 50"/>
        <motor name="planar_robot_2/joint_3" joint="planar_robot_2/joint_3" ctrlrange="-30 30"/>
    </actuator>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
@ -0,0 +1,24 @@
 <mujoco model="AirHockeySingle">
    <include file="planar_robot_1.xml"/>
    <include file="../table.xml"/>
    <contact>
        <exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_3" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_2" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_1" body2="table_surface"/>
        <exclude body1="planar_robot_1/base" body2="table_surface"/>
        <exclude body1="planar_robot_1/body_ee" body2="rim"/>
        <exclude body1="planar_robot_1/body_hand" body2="rim"/>
        <exclude body1="planar_robot_1/body_3" body2="rim"/>
        <exclude body1="planar_robot_1/body_2" body2="rim"/>
        <exclude body1="planar_robot_1/body_1" body2="rim"/>
        <exclude body1="planar_robot_1/base" body2="rim"/>
    </contact>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/data/table.xml
+++ b/fancy_gym/envs/mujoco/air_hockey/data/table.xml
@ -0,0 +1,89 @@
 <mujoco model="table">
    <option timestep="0.001" cone="elliptic" impratio="1"/>
    <asset>
        <material name="grey" specular="0.5" shininess="0.25" rgba="0.8 0.8 0.8 1"/>
        <material name="white" specular="0.5" shininess="0.25" rgba="1.0 1.0 1.0 1"/>
        <material name="red" specular="0.5" shininess="0.25" rgba="1.0 0.0 0.0 1"/>
        <material name="blue" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 1"/>
        <material name="transparent" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 0"/>
        <texture type="skybox" builtin="gradient" rgb1="0.3 0.5 0.7" rgb2="0 0 0" width="512"
                 height="3072"/>
        <texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.2 0.3 0.4"
                 rgb2="0.1 0.2 0.3" markrgb="0.8 0.8 0.8" width="300" height="300"/>
        <material name="groundplane" texture="groundplane" texuniform="true" texrepeat="5 5"
                  reflectance="0.2"/>
    </asset>
    <default>
        <geom condim="4" solref="0.02 0.3"/>
        <default class="rim">
            <geom type="box" material="grey" condim="6" friction="10000 0.0 0.0" priority="1"
                  solref="-2000000 -250" solimp="0.99 0.999 0.001 0.5 2"/>
        </default>
    </default>
    <worldbody>
        <body name="table">
            <body name="table_surface">
                <geom name="surface" type="box" material="white" size="1.064 0.609 0.0505" pos="0 0 -0.0505"
                      condim="4" friction="0.08 0.08 0.0" priority="1"
                      solref="0.0125 0.5" solimp="0.9 0.999 0.001 0.5 2"/>
            </body>
            <body name="rim">
 <!--                <geom name="rim_home_l" class="rim" size="0.045 0.197 0.005" pos="-1.019 0.322 0.005"/>-->
 <!--                <geom name="rim_home_r" class="rim" size="0.045 0.197 0.005" pos="-1.019 -0.322 0.005"/>-->
                <geom name="rim_home_l" class="rim" size="0.045 0.1945 0.005" pos="-1.019 0.3245 0.005"/>
                <geom name="rim_home_r" class="rim" size="0.045 0.1945 0.005" pos="-1.019 -0.3245 0.005"/>
                <geom name="rim_home_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 0.13 0.005"/>
                <geom name="rim_home_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 0.13 0.005"/>
                <geom name="rim_home_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 -0.13 0.005"/>
                <geom name="rim_home_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 -0.13 0.005"/>
                <geom name="rim_home_top" class="rim" size="0.045 0.519 0.01" pos="-1.019 0 0.02"/>
                <geom name="rim_left" class="rim" size="1.064 0.045 0.015" pos="0 0.564 0.015"/>
                <geom name="rim_right" class="rim" size="1.064 0.045 0.015" pos="0 -0.564 0.015"/>
 <!--                <geom name="rim_away_l" class="rim" size="0.045 0.197 0.005" pos="1.019 0.322 0.005"/>-->
 <!--                <geom name="rim_away_r" class="rim" size="0.045 0.197 0.005" pos="1.019 -0.322 0.005"/>-->
                <geom name="rim_away_l" class="rim" size="0.045 0.1945 0.005" pos="1.019 0.3245 0.005"/>
                <geom name="rim_away_r" class="rim" size="0.045 0.1945 0.005" pos="1.019 -0.3245 0.005"/>
                <geom name="rim_away_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 0.13 0.005"/>
                <geom name="rim_away_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 0.13 0.005"/>
                <geom name="rim_away_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 -0.13 0.005"/>
                <geom name="rim_away_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 -0.13 0.005"/>
                <geom name="rim_away_top" class="rim" size="0.045 0.519 0.01" pos="1.019 0 0.02"/>
            </body>
        </body>
        <body name="base" pos="0 0 0">
            <joint name="puck_x" type="slide" axis="1 0 0" damping="0.005" limited="false"/>
            <joint name="puck_y" type="slide" axis="0 1 0" damping="0.005" limited="false"/>
            <joint name="puck_yaw" type="hinge" axis="0 0 1" damping="2e-6" limited="false"/>
            <body name="puck">
                <geom pos="0 0 0" name="puck" type="cylinder" material="red" size="0.03165 0.003"
                      condim="4" priority="0"/>
                <geom pos="0.02 0 0" type="cylinder" material="blue" size="0.01 0.0031"
                      condim="4" contype="0" conaffinity="0"/>
                <inertial pos="0 0 0" mass="0.01" diaginertia="2.5e-6 2.5e-6 5e-6"/>
            </body>
        </body>
        <site name="puck_vis" type="ellipsoid" size="0.03165 0.03165 0.004" pos="0 0 -0.05"/>
        <site name="puck_vis_rot" type="cylinder" size="0.0045 0.006" rgba="1 0 0 1" pos="0 0 -0.05"/>
    </worldbody>
    <contact>
        <exclude body1="puck" body2="table_surface"/>
    </contact>
    <worldbody>
        <light pos="0 0 3" dir="0 0 -1" directional="true"/>
        <geom pos="0 0 -0.1" name="floor" size="0 0 0.05" type="plane" material="groundplane"/>
    </worldbody>
 </mujoco>
--- a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
+++ b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
@ -0,0 +1,274 @@
 from collections import deque
 import mujoco
 import numpy as np
 import scipy
 from fancy_gym.envs.mujoco.air_hockey import seven_dof
 from fancy_gym.envs.mujoco.air_hockey import three_dof
 class PositionControl:
    def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs):
        """
            Mixin that adds position controller to mujoco environments.
            Args:
                p_gain (float): Proportional controller gain
                d_gain (float): Differential controller gain
                i_gain (float): Integral controller gain
                interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
                    polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
                    Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
                    of position, velocity and acceleration of the shape (20, 3, n_joints)
                    In the case of 2 agents it is a tuple, which describes the interpolation order for each agent
                debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the
                    entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk.
            """
        self.debug = debug
        super(PositionControl, self).__init__(*args, **kwargs)
        self.robot_model = self.env_info['robot']['robot_model']
        self.robot_data = self.env_info['robot']['robot_data']
        self.p_gain = np.array(p_gain * self.n_agents)
        self.d_gain = np.array(d_gain * self.n_agents)
        self.i_gain = np.array(i_gain * self.n_agents)
        self.prev_pos = np.zeros(len(self.actuator_joint_ids))
        self.prev_vel = np.zeros(len(self.actuator_joint_ids))
        self.prev_acc = np.zeros(len(self.actuator_joint_ids))
        self.i_error = np.zeros(len(self.actuator_joint_ids))
        self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids))
        self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,)
        self._num_env_joints = len(self.actuator_joint_ids)
        self.n_robot_joints = self.env_info['robot']["n_joints"]
        self.action_shape = [None] * self.n_agents
        for i in range(self.n_agents):
            if self.interp_order[i] is None:
                self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints)
            elif self.interp_order[i] in [1, 2]:
                self.action_shape[i] = (self.n_robot_joints,)
            elif self.interp_order[i] in [3, 4, -1]:
                self.action_shape[i] = (2, self.n_robot_joints)
            elif self.interp_order[i] == 5:
                self.action_shape[i] = (3, self.n_robot_joints)
        self.traj = None
        self.jerk = np.zeros(self._num_env_joints)
        if self.debug:
            self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
    def _enforce_safety_limits(self, desired_pos, desired_vel):
        # ROS safe controller
        pos = self.prev_controller_cmd_pos
        k = 20
        joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents))
        joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents))
        min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1])
        max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1])
        clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel)
        min_pos = pos + min_vel * self._timestep
        max_pos = pos + max_vel * self._timestep
        clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos)
        self.prev_controller_cmd_pos = clipped_pos.copy()
        return clipped_pos, clipped_vel
    def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel):
        clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel)
        error = (clipped_pos - current_pos)
        self.i_error += self.i_gain * error * self._timestep
        torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error
        # Acceleration FeedForward
        tau_ff = np.zeros(self.robot_model.nv)
        for i in range(self.n_agents):
            robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i
            self.robot_data.qpos = current_pos[robot_joint_ids]
            self.robot_data.qvel = current_vel[robot_joint_ids]
            acc_ff = desired_acc[robot_joint_ids]
            mujoco.mj_forward(self.robot_model, self.robot_data)
            mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff)
            torque[robot_joint_ids] += tau_ff
            # Gravity Compensation and Coriolis and Centrifugal force
            torque[robot_joint_ids] += self.robot_data.qfrc_bias
            torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids],
                                                            self.robot_model.actuator_ctrlrange[:, 0]),
                                                 self.robot_model.actuator_ctrlrange[:, 1])
        if self.debug:
            self.controller_record.append(
                np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk]))
        return torque
    def _interpolate_trajectory(self, interp_order, action, i=0):
        tf = self.dt
        prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
        prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
        prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
        if interp_order == 1 and action.ndim == 1:
            coef = np.array([[1, 0], [1, tf]])
            results = np.vstack([prev_pos, action])
        elif interp_order == 2 and action.ndim == 1:
            coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]])
            if np.linalg.norm(action - prev_pos) < 1e-3:
                prev_vel = np.zeros_like(prev_vel)
            results = np.vstack([prev_pos, action, prev_vel])
        elif interp_order == 3 and action.shape[0] == 2:
            coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]])
            results = np.vstack([prev_pos, action[0], prev_vel, action[1]])
        elif interp_order == 4 and action.shape[0] == 2:
            coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4],
                             [0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3],
                             [0, 0, 2, 0, 0]])
            results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc])
        elif interp_order == 5 and action.shape[0] == 3:
            coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5],
                             [0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4],
                             [0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]])
            results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]])
        elif interp_order == -1:
            # Interpolate position and velocity linearly
            pass
        else:
            raise ValueError("Undefined interpolator order or the action dimension does not match!")
        if interp_order > 0:
            A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints)
            y = results.reshape(-2, order='F')
            weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1)
            weights_d = np.polynomial.polynomial.polyder(weights, axis=1)
            weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
        elif interp_order == -1:
            weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T
            weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T
            weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
        if interp_order in [3, 4, 5]:
            self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep
        else:
            self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf
        self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T)
        self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T)
        self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T)
        for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps):
            q = np.polynomial.polynomial.polyval(t, weights.T)
            qd = np.polynomial.polynomial.polyval(t, weights_d.T)
            qdd = np.polynomial.polynomial.polyval(t, weights_dd.T)
            yield q, qd, qdd
    def reset(self, obs=None):
        obs = super(PositionControl, self).reset(obs)
        self.prev_pos = self._data.qpos[self.actuator_joint_ids]
        self.prev_vel = self._data.qvel[self.actuator_joint_ids]
        self.prev_acc = np.zeros(len(self.actuator_joint_ids))
        self.i_error = np.zeros(len(self.actuator_joint_ids))
        self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids]
        if self.debug:
            self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
        return obs
    def _step_init(self, obs, action):
        super(PositionControl, self)._step_init(obs, action)
        if self.n_agents == 1:
            self.traj = self._create_traj(self.interp_order[0], action)
        else:
            def _traj():
                traj_1 = self._create_traj(self.interp_order[0], action[0], 0)
                traj_2 = self._create_traj(self.interp_order[1], action[1], 1)
                for a1, a2 in zip(traj_1, traj_2):
                    yield np.hstack([a1, a2])
            self.traj = _traj()
    def _create_traj(self, interp_order, action, i=0):
        if interp_order is None:
            return iter(action)
        return self._interpolate_trajectory(interp_order, action, i)
    def _compute_action(self, obs, action):
        cur_pos, cur_vel = self.get_joints(obs)
        desired_pos, desired_vel, desired_acc = next(self.traj)
        return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel)
    def _preprocess_action(self, action):
        action = super(PositionControl, self)._preprocess_action(action)
        if self.n_agents == 1:
            assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \
                                                      f" {action.shape}"
        else:
            for i in range(self.n_agents):
                assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \
                                                          f" {action[i].shape}"
        return action
 class PositionControlIIWA(PositionControl):
    def __init__(self, *args, **kwargs):
        p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.]
        d_gain = [60, 80, 60, 30, 10, 1, 0.5]
        i_gain = [0, 0, 0, 0, 0, 0, 0]
        super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
 class PositionControlPlanar(PositionControl):
    def __init__(self, *args, **kwargs):
        p_gain = [960, 480, 240]
        d_gain = [60, 20, 4]
        i_gain = [0, 0, 0]
        super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
 class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit):
    pass
 class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
    pass
 class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
    pass
 class IiwaPositionHitAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyHitAirhocKIT2023):
    pass
 class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
    pass
 class IiwaPositionDefendAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyDefendAirhocKIT2023):
    pass
 class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
    pass
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/init.py
@ -0,0 +1,4 @@
 from .env_base import AirHockeyBase
 from .tournament import AirHockeyTournament
 from .hit import AirHockeyHit, AirHockeyHitAirhocKIT2023
 from .defend import AirHockeyDefend, AirHockeyDefendAirhocKIT2023
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
@ -0,0 +1,114 @@
 import numpy as np
 from gymnasium import spaces
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
 from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian
 class AirhocKIT2023BaseEnv(AirHockeySingle):
    def __init__(self, noise=False, **kwargs):
        super().__init__(**kwargs)
        obs_low = np.hstack([[-np.inf] * 37])
        obs_high = np.hstack([[np.inf] * 37])
        self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64)
        self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6))
        self.noise = noise
    # We don't need puck yaw observations
    def filter_obs(self, obs):
        obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]])
        return obs
    def add_noise(self, obs):
        if not self.noise:
            return
        obs[self.env_info["puck_pos_ids"]] += np.random.normal(0, 0.001, 3)
        obs[self.env_info["puck_vel_ids"]] += np.random.normal(0, 0.1, 3)
    def reset(self):
        self.last_acceleration = np.repeat(0., 6)
        obs = super().reset()
        self.add_noise(obs)
        self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1]
        self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1]
        self.last_planned_world_pos = self._fk(self.interp_pos)
        obs = np.hstack([
            obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
        ])
        return self.filter_obs(obs)
    def step(self, action):
        action /= 10
        new_vel = self.interp_vel + action
        jerk = 2 * (new_vel - self.interp_vel - self.last_acceleration * 0.02) / (0.02 ** 2)
        new_pos = self.interp_pos + self.interp_vel * 0.02 + (1/2) * self.last_acceleration * (0.02 ** 2) + (1/6) * jerk * (0.02 ** 3)
        abs_action = np.vstack([np.hstack([new_pos, 0]), np.hstack([new_vel, 0])])
        self.interp_pos = new_pos
        self.interp_vel = new_vel
        self.last_acceleration += jerk * 0.02
        obs, rew, done, info = super().step(abs_action)
        self.add_noise(obs)
        self.last_planned_world_pos = self._fk(self.interp_pos)
        obs = np.hstack([
            obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
        ])
        fatal_rew = self.check_fatal(obs)
        if fatal_rew != 0:
            return self.filter_obs(obs), fatal_rew, True, info
        return self.filter_obs(obs), rew, done, info
    def check_constraints(self, constraint_values):
        fatal_rew = 0
        j_pos_constr = constraint_values["joint_pos_constr"]
        if j_pos_constr.max() > 0:
            fatal_rew += j_pos_constr.max()
        j_vel_constr = constraint_values["joint_vel_constr"]
        if j_vel_constr.max() > 0:
            fatal_rew += j_vel_constr.max()
        ee_constr = constraint_values["ee_constr"]
        if ee_constr.max() > 0:
            fatal_rew += ee_constr.max()
        link_constr = constraint_values["link_constr"]
        if link_constr.max() > 0:
            fatal_rew += link_constr.max()
        return -fatal_rew
    def check_fatal(self, obs):
        fatal_rew = 0
        q = obs[self.env_info["joint_pos_ids"]]
        qd = obs[self.env_info["joint_vel_ids"]]
        constraint_values_obs = self.env_info["constraints"].fun(q, qd)
        fatal_rew += self.check_constraints(constraint_values_obs)
        return -fatal_rew
    def _fk(self, pos):
        res, _ = forward_kinematics(self.env_info["robot"]["robot_model"],
                                    self.env_info["robot"]["robot_data"], pos)
        return res.astype(np.float32)
    def _ik(self, world_pos, init_q=None):
        success, pos = inverse_kinematics(self.env_info["robot"]["robot_model"],
                                          self.env_info["robot"]["robot_data"],
                                          world_pos,
                                          initial_q=init_q)
        pos = pos.astype(np.float32)
        assert success
        return pos
    def _jacobian(self, pos):
        return jacobian(self.env_info["robot"]["robot_model"],
                        self.env_info["robot"]["robot_data"],
                        pos).astype(np.float32)
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
@ -0,0 +1,166 @@
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
 class AirHockeyDefend(AirHockeySingle):
    """
        Class for the air hockey defending task.
        The agent should stop the puck at the line x=-0.6.
    """
    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
        self.init_velocity_range = (1, 3)
        self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]])  # Table Frame
        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
    def setup(self, obs):
        puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
        lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
        angle = np.random.uniform(-0.5, 0.5)
        puck_vel = np.zeros(3)
        puck_vel[0] = -np.cos(angle) * lin_vel
        puck_vel[1] = np.sin(angle) * lin_vel
        puck_vel[2] = np.random.uniform(-10, 10)
        self._write_data("puck_x_pos", puck_pos[0])
        self._write_data("puck_y_pos", puck_pos[1])
        self._write_data("puck_x_vel", puck_vel[0])
        self._write_data("puck_y_vel", puck_vel[1])
        self._write_data("puck_yaw_vel", puck_vel[2])
        super().setup(obs)
    def reward(self, state, action, next_state, absorbing):
        return 0
    def is_absorbing(self, state):
        puck_pos, puck_vel = self.get_puck(state)
        # If puck is over the middle line and moving towards opponent
        if puck_pos[0] > 0 and puck_vel[0] > 0:
            return True
        if np.linalg.norm(puck_vel[:2]) < 0.1:
            return True
        return super().is_absorbing(state)
 class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv):
    def __init__(self, gamma=0.99, horizon=200, viewer_params={}, **kwargs):
        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
        self.init_velocity_range = (1, 3)
        self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]])  # Table Frame
        self._setup_metrics()
    def setup(self, obs):
        self._setup_metrics()
        puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
        lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
        angle = np.random.uniform(-0.5, 0.5)
        puck_vel = np.zeros(3)
        puck_vel[0] = -np.cos(angle) * lin_vel
        puck_vel[1] = np.sin(angle) * lin_vel
        puck_vel[2] = np.random.uniform(-10, 10)
        self._write_data("puck_x_pos", puck_pos[0])
        self._write_data("puck_y_pos", puck_pos[1])
        self._write_data("puck_x_vel", puck_vel[0])
        self._write_data("puck_y_vel", puck_vel[1])
        self._write_data("puck_yaw_vel", puck_vel[2])
        super().setup(obs)
    def reset(self, *args):
        obs = super().reset()
        self.hit_step_flag = False
        self.hit_step = False
        self.received_hit_reward = False
        self.give_reward_next = False
        return obs
    def _setup_metrics(self):
        self.episode_steps = 0
        self.has_hit = False
    def _simulation_post_step(self):
        if not self.has_hit:
            self.has_hit = self._check_collision("puck", "robot_1/ee")
        super()._simulation_post_step()
    def _step_finalize(self):
        self.episode_steps += 1
        return super()._step_finalize()
    def reward(self, state, action, next_state, absorbing):
        puck_pos, puck_vel = self.get_puck(next_state)
        ee_pos, _ = self.get_ee()
        rew = 0.01
        if -0.7 < puck_pos[0] <= -0.2 and np.linalg.norm(puck_vel[:2]) < 0.1:
            assert absorbing
            rew += 70
        if self.has_hit and not self.hit_step_flag:
            self.hit_step_flag = True
            self.hit_step = True
        else:
            self.hit_step = False
        f = lambda puck_vel: 30 + 100 * (100 ** (-0.25 * np.linalg.norm(puck_vel[:2])))
        if not self.give_reward_next and not self.received_hit_reward and self.hit_step and ee_pos[0] < puck_pos[0]:
            self.hit_this_step = True
            if np.linalg.norm(puck_vel[:2]) < 0.1:
                return rew + f(puck_vel)
            self.give_reward_next = True
            return rew
        if not self.received_hit_reward and self.give_reward_next:
            self.received_hit_reward = True
            if puck_vel[0] >= -0.2:
                return rew + f(puck_vel)
            return rew
        else:
            return rew
    def is_absorbing(self, obs):
        puck_pos, puck_vel = self.get_puck(obs)
        # If puck is over the middle line and moving towards opponent
        if puck_pos[0] > 0 and puck_vel[0] > 0:
            return True
        if self.episode_steps == self._mdp_info.horizon:
            return True
        if np.linalg.norm(puck_vel[:2]) < 0.1:
            return True
        return super().is_absorbing(obs)
 if __name__ == '__main__':
    env = AirHockeyDefend()
    R = 0.
    J = 0.
    gamma = 1.
    steps = 0
    env.reset()
    env.render()
    while True:
        # action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8
        action = np.zeros(7)
        observation, reward, done, info = env.step(action)
        env.render()
        print(observation)
        gamma *= env.info.gamma
        J += gamma * reward
        R += reward
        steps += 1
        if done or steps > env.info.horizon:
            print("J: ", J, " R: ", R)
            R = 0.
            J = 0.
            gamma = 1.
            steps = 0
            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
@ -0,0 +1,258 @@
 import os
 import mujoco
 import numpy as np
 from scipy.spatial.transform import Rotation as R
 from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path
 from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin
 from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
 from mushroom_rl.utils.spaces import Box
 """
    Abstract class for all AirHockey Environments.
 """
 class AirHockeyBase(MuJoCo):
    def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
                 n_agents=1, viewer_params={}):
        """
            Constructor.
            Args:
                n_agents (int, 1): number of agent to be used in the environment (one or two)
        """
        self.n_agents = n_agents
        action_spec = []
        observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
                            ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
                            ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
                            ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
                            ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
                            ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
        additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
                           ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
                           ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
                           ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
                           ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
                           ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
        collision_spec = [("puck", ["puck"]),
                          ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
                          ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
        if 1 <= self.n_agents <= 2:
            scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
            action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
                            "iiwa_1/joint_6", "iiwa_1/joint_7"]
            observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS),
                                 ("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS),
                                 ("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS),
                                 ("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS),
                                 ("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS),
                                 ("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS),
                                 ("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS),
                                 ("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)]
            additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS),
                                ("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS),
                                ("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL),
                                ("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL),
                                ("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS),
                                ("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL),
                                ("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)]
            collision_spec += [("robot_1/ee", ["iiwa_1/ee"])]
            if self.n_agents == 2:
                scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
                observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)]
                action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4",
                                "iiwa_2/joint_5",
                                "iiwa_2/joint_6", "iiwa_2/joint_7"]
                observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
                                     ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
                                     ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
                                     ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
                                     ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
                                     ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS),
                                     ("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS),
                                     ("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS),
                                     ("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS),
                                     ("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS),
                                     ("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS),
                                     ("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS),
                                     ("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)]
                observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)]
                additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS),
                                    ("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS),
                                    ("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL),
                                    ("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL),
                                    ("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS),
                                    ("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL),
                                    ("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)]
                collision_spec += [("robot_2/ee", ["iiwa_2/ee"])]
        else:
            raise ValueError('n_agents should be 1 or 2')
        self.env_info = dict()
        self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
        self.env_info['puck'] = {"radius": 0.03165}
        self.env_info['mallet'] = {"radius": 0.04815}
        self.env_info['n_agents'] = self.n_agents
        self.env_info['robot'] = {
            "n_joints": 7,
            "ee_desired_height": 0.1645,
            "joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
                                         [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi,
            "joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
                                         [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10,
            "base_frame": [],
            "universal_height": 0.0645,
            "control_frequency": 50,
        }
        self.env_info['puck_pos_ids'] = [0, 1, 2]
        self.env_info['puck_vel_ids'] = [3, 4, 5]
        self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12]
        self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19]
        if self.n_agents == 2:
            self.env_info['opponent_ee_ids'] = [20, 21, 22]
        else:
            self.env_info['opponent_ee_ids'] = []
        max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents
        super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
                         n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
        # Construct the mujoco model at origin
        robot_model = mujoco.MjModel.from_xml_path(
            os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml"))
        robot_model.body('iiwa_1/base').pos = np.zeros(3)
        robot_data = mujoco.MjData(robot_model)
        # Add env_info that requires mujoco models
        self.env_info['dt'] = self.dt
        self.env_info["robot"]["joint_pos_limit"] = np.array(
            [self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T
        self.env_info["robot"]["robot_model"] = robot_model
        self.env_info["robot"]["robot_data"] = robot_data
        self.env_info["rl_info"] = self.info
        frame_T = np.eye(4)
        temp = np.zeros((9, 1))
        mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat)
        frame_T[:3, :3] = temp.reshape(3, 3)
        frame_T[:3, 3] = self._model.body("iiwa_1/base").pos
        self.env_info['robot']['base_frame'].append(frame_T.copy())
        if self.n_agents == 2:
            mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat)
            frame_T[:3, :3] = temp.reshape(3, 3)
            frame_T[:3, 3] = self._model.body("iiwa_2/base").pos
            self.env_info['robot']['base_frame'].append(frame_T.copy())
        # Ids of the joint, which are controller by the action space
        self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
        self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info)
    def _modify_mdp_info(self, mdp_info):
        obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
                            *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0]
                                       for i in range(self.env_info['robot']['n_joints'])]),
                            *self.env_info['robot']['joint_vel_limit'][0]])
        obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
                             *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1]
                                        for i in range(self.env_info['robot']['n_joints'])]),
                             *self.env_info['robot']['joint_vel_limit'][1]])
        if self.n_agents == 2:
            obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
            obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
        mdp_info.observation_space = Box(obs_low, obs_high)
        return mdp_info
    def _simulation_pre_step(self):
        self.universal_joint_plugin.update()
    def is_absorbing(self, obs):
        boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
        puck_pos, puck_vel = self.get_puck(obs)
        if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
            return True
        return False
    @staticmethod
    def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
        if type == 'pose':
            puck_w = np.eye(4)
            puck_w[:2, 3] = puck_in[:2]
            puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
            puck_r = np.linalg.inv(robot_frame) @ puck_w
            puck_out = np.concatenate([puck_r[:2, 3],
                                       R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
        if type == 'vel':
            rot_mat = robot_frame[:3, :3]
            vel_lin = np.array([*puck_in[:2], 0])
            vel_ang = np.array([0., 0., puck_in[2]])
            vel_lin_r = rot_mat.T @ vel_lin
            vel_ang_r = rot_mat.T @ vel_ang
            puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
        return puck_out
    def get_puck(self, obs):
        """
        Getting the puck properties from the observations
        Args:
            obs: The current observation
        Returns:
            ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
        """
        puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
                                   self.obs_helper.get_from_obs(obs, "puck_y_pos"),
                                   self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
        puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
                                   self.obs_helper.get_from_obs(obs, "puck_y_vel"),
                                   self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
        return puck_pos, puck_vel
    def get_ee(self):
        raise NotImplementedError
    def get_joints(self, obs):
        raise NotImplementedError
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
@ -0,0 +1,171 @@
 import mujoco
 import numpy as np
 from scipy.spatial.transform import Rotation as R
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
 from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
 class AirHockeyDouble(AirHockeyBase):
    """
    Base class for two agents air hockey tasks.
    """
    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
        super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params)
        self._compute_init_state()
        self.filter_ratio = 0.274
        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
    def _compute_init_state(self):
        init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
        success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
                                                      self.env_info['robot']['robot_data'],
                                                      np.array([0.65, 0., 0.1645]),
                                                      R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
                                                      initial_q=init_state)
        assert success is True
    def get_ee(self, robot=1):
        """
        Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics
        on the current joint position, this function exists to avoid redundant computations.
        Args:
            robot: ID of robot, either 1 or 2
        Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
        """
        ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos")
        ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel")
        return ee_pos, ee_vel
    def get_joints(self, obs, agent=None):
        """
        Get joint position and velocity of the robots
        Can choose the robot with agent = 1 / 2. If agent is None both are returned
        """
        if agent:
            q_pos = np.zeros(7)
            q_vel = np.zeros(7)
            for i in range(7):
                q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0]
                q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0]
        else:
            q_pos = np.zeros(14)
            q_vel = np.zeros(14)
            for i in range(7):
                q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
                q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
                q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0]
                q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0]
        return q_pos, q_vel
    def _create_observation(self, obs):
        # Filter the joint velocity
        q_pos, q_vel = self.get_joints(obs)
        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
        self.q_pos_prev = q_pos
        self.q_vel_prev = q_vel_filter
        for i in range(7):
            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
            self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7]
        # Wrap puck's rotation angle to [-pi, pi)
        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
        return obs
    def _modify_observation(self, obs):
        new_obs = obs.copy()
        puck_pos, puck_vel = self.get_puck(new_obs)
        puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
        puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0]
        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1]
        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2]
        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0]
        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1]
        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2]
        opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')
        self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \
            (np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3]
        puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1])
        puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel')
        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0]
        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1]
        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2]
        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0]
        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1]
        self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2]
        opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')
        self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \
            (np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3]
        return new_obs
    def setup(self, obs):
        for i in range(7):
            self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
            self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i]
            self.q_pos_prev[i] = self.init_state[i]
            self.q_pos_prev[i + 7] = self.init_state[i]
            self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
            self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0]
        self.universal_joint_plugin.reset()
        super().setup(obs)
        # Update body positions, needed for _compute_universal_joint
        mujoco.mj_fwdPosition(self._model, self._data)
    def reward(self, state, action, next_state, absorbing):
        return 0
 def main():
    env = AirHockeyDouble(viewer_params={'start_paused': True})
    env.reset()
    env.render()
    R = 0.
    J = 0.
    gamma = 1.
    steps = 0
    while True:
        action = np.zeros(14)
        observation, reward, done, info = env.step(action)
        env.render()
        gamma *= env.info.gamma
        J += gamma * reward
        R += reward
        steps += 1
        if done or steps > env.info.horizon:
            print("J: ", J, " R: ", R)
            R = 0.
            J = 0.
            gamma = 1.
            steps = 0
            env.reset()
 if __name__ == '__main__':
    main()
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
@ -0,0 +1,104 @@
 import mujoco
 import numpy as np
 from scipy.spatial.transform import Rotation as R
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
 from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
 class AirHockeySingle(AirHockeyBase):
    """
    Base class for single agent air hockey tasks.
    """
    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
        super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
        self._compute_init_state()
        self.filter_ratio = 0.274
        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
    def _compute_init_state(self):
        init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
        success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
                                                      self.env_info['robot']['robot_data'],
                                                      np.array([0.65, 0., 0.1645]),
                                                      R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
                                                      initial_q=init_state)
        assert success is True
    def get_ee(self):
        """
        Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
        on the current joint position, this function exists to avoid redundant computations.
        Returns:
            ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
        """
        ee_pos = self._read_data("robot_1/ee_pos")
        ee_vel = self._read_data("robot_1/ee_vel")
        return ee_pos, ee_vel
    def get_joints(self, obs):
        """
        Get joint position and velocity of the robot
        """
        q_pos = np.zeros(7)
        q_vel = np.zeros(7)
        for i in range(7):
            q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
            q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
        return q_pos, q_vel
    def _create_observation(self, obs):
        # Filter the joint velocity
        q_pos, q_vel = self.get_joints(obs)
        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
        self.q_pos_prev = q_pos
        self.q_vel_prev = q_vel_filter
        for i in range(7):
            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
        return obs
    def _modify_observation(self, obs):
        new_obs = obs.copy()
        puck_pos, puck_vel = self.get_puck(new_obs)
        puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
        puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
        return new_obs
    def setup(self, obs):
        for i in range(7):
            self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
            self.q_pos_prev[i] = self.init_state[i]
            self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
        self.universal_joint_plugin.reset()
        super().setup(obs)
        # Update body positions, needed for _compute_universal_joint
        mujoco.mj_fwdPosition(self._model, self._data)
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
@ -0,0 +1,159 @@
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
 class AirHockeyHit(AirHockeySingle):
    """
        Class for the air hockey hitting task.
    """
    def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}):
        """
            Constructor
            Args:
                opponent_agent(Agent, None): Agent which controls the opponent
                moving_init(bool, False): If true, initialize the puck with inital velocity.
        """
        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
        self.moving_init = moving_init
        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
                    self.env_info['mallet']['radius'] * 2
        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
        self.init_velocity_range = (0, 0.5)  # Table Frame
        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
    def setup(self, obs):
        # Initial position of the puck
        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
        self._write_data("puck_x_pos", puck_pos[0])
        self._write_data("puck_y_pos", puck_pos[1])
        if self.moving_init:
            lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
            angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
            puck_vel = np.zeros(3)
            puck_vel[0] = -np.cos(angle) * lin_vel
            puck_vel[1] = np.sin(angle) * lin_vel
            puck_vel[2] = np.random.uniform(-2, 2)
            self._write_data("puck_x_vel", puck_vel[0])
            self._write_data("puck_y_vel", puck_vel[1])
            self._write_data("puck_yaw_vel", puck_vel[2])
        super(AirHockeyHit, self).setup(obs)
    def reward(self, state, action, next_state, absorbing):
        return 0
    def is_absorbing(self, obs):
        puck_pos, puck_vel = self.get_puck(obs)
        # Stop if the puck bounces back on the opponents wall
        if puck_pos[0] > 0 and puck_vel[0] < 0:
            return True
        return super(AirHockeyHit, self).is_absorbing(obs)
 class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv):
    def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}, **kwargs):
        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
        self.moving_init = moving_init
        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
                    self.env_info['mallet']['radius'] * 2
        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
        self.init_velocity_range = (0, 0.5)  # Table Frame
        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
        self._setup_metrics()
    def reset(self, *args):
        obs = super().reset()
        self.last_ee_pos = self.last_planned_world_pos.copy()
        self.last_ee_pos[0] -= 1.51
        return obs
    def setup(self, obs):
        self._setup_metrics()
        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
        self._write_data("puck_x_pos", puck_pos[0])
        self._write_data("puck_y_pos", puck_pos[1])
        if self.moving_init:
            lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
            angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
            puck_vel = np.zeros(3)
            puck_vel[0] = -np.cos(angle) * lin_vel
            puck_vel[1] = np.sin(angle) * lin_vel
            puck_vel[2] = np.random.uniform(-2, 2)
            self._write_data("puck_x_vel", puck_vel[0])
            self._write_data("puck_y_vel", puck_vel[1])
            self._write_data("puck_yaw_vel", puck_vel[2])
        super().setup(obs)
    def _setup_metrics(self):
        self.episode_steps = 0
        self.has_scored = False
    def _step_finalize(self):
        cur_obs = self._create_observation(self.obs_helper._build_obs(self._data))
        puck_pos, _ = self.get_puck(cur_obs)  # world frame [x, y, z] and [x', y', z']
        if not self.has_scored:
            boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
            self.has_scored = np.any(np.abs(puck_pos[:2]) > boundary) and puck_pos[0] > 0
        self.episode_steps += 1
        return super()._step_finalize()
    def reward(self, state, action, next_state, absorbing):
        rew = 0
        puck_pos, puck_vel = self.get_puck(next_state)
        ee_pos, _ = self.get_ee()
        ee_vel = (ee_pos - self.last_ee_pos) / 0.02
        self.last_ee_pos = ee_pos
        if puck_vel[0] < 0.25 and puck_pos[0] < 0:
            ee_puck_dir = (puck_pos - ee_pos)[:2]
            ee_puck_dir = ee_puck_dir / np.linalg.norm(ee_puck_dir)
            rew += 1 * max(0, np.dot(ee_puck_dir, ee_vel[:2]))
        else:
            rew += 10 * np.linalg.norm(puck_vel[:2])
        if self.has_scored:
            rew += 2000 + 5000 * np.linalg.norm(puck_vel[:2])
        return rew
    def is_absorbing(self, obs):
        puck_pos, puck_vel = self.get_puck(obs)
        # Stop if the puck bounces back on the opponents wall
        if puck_pos[0] > 0 and puck_vel[0] < 0:
            return True
        if self.has_scored:
            return True
        if self.episode_steps == self._mdp_info.horizon:
            return True
        return super().is_absorbing(obs)
 if __name__ == '__main__':
    env = AirHockeyHit(moving_init=True)
    env.reset()
    steps = 0
    while True:
        action = np.zeros(7)
        observation, reward, done, info = env.step(action)
        env.render()
        if done or steps > env.info.horizon:
            steps = 0
            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
@ -0,0 +1,111 @@
 import mujoco
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble
 class AirHockeyTournament(AirHockeyDouble):
    """
        Class for the air hockey tournament. Consists of 2 robots which should play against each other.
        When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty.
        If a player accumulates 3 penalties his score is reduced by 1.
    """
    def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"):
        self.agent_name = agent_name
        self.opponent_name = opponent_name
        self.score = [0, 0]
        self.faults = [0, 0]
        self.start_side = None
        self.timer = 0
        def custom_render_callback(viewport, context):
            names = f"Agents \nScores \nFaults "
            data = f"{self.agent_name} - {self.opponent_name}\n "
            data += f"{self.score[0]} - {self.score[1]}\n "
            data += f"{self.faults[0]} - {self.faults[1]}"
            mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context)
        viewer_params["custom_render_callback"] = custom_render_callback
        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
                    self.env_info['mallet']['radius'] * 2
        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
    def setup(self, obs):
        if self.start_side == None:
            self.start_side = np.random.choice([1, -1])
            self.prev_side = self.start_side
        # Initial position of the puck
        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
        self._write_data("puck_x_pos", puck_pos[0] * self.start_side)
        self._write_data("puck_y_pos", puck_pos[1])
        self.prev_side = self.start_side
        self.timer = 0
        super(AirHockeyTournament, self).setup(obs)
    def reward(self, state, action, next_state, absorbing):
        return 0
    def is_absorbing(self, obs):
        puck_pos, puck_vel = self.get_puck(obs)
        # Puck stuck on one side for more than 15s
        if np.sign(puck_pos[0]) == self.prev_side:
            self.timer += self.dt
        else:
            self.prev_side *= -1
            self.timer = 0
        if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15:
            if self.prev_side == -1:
                self.faults[0] += 1
                self.start_side = -1
                if self.faults[0] % 3 == 0:
                    self.score[1] += 1
            else:
                self.faults[1] += 1
                self.start_side = 1
                if self.faults[1] % 3 == 0:
                    self.score[0] += 1
            return True
        # Puck in Goal
        if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0:
            if puck_pos[0] > self.env_info['table']['length'] / 2:
                self.score[0] += 1
                self.start_side = -1
                return True
            if puck_pos[0] < -self.env_info['table']['length'] / 2:
                self.score[1] += 1
                self.start_side = 1
                return True
        # Puck stuck in the middle
        if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025:
            return True
        return super(AirHockeyTournament, self).is_absorbing(obs)
 if __name__ == '__main__':
    env = AirHockeyTournament()
    env.reset()
    steps = 0
    while True:
        action = np.zeros(14)
        steps += 1
        observation, reward, done, info = env.step(action)
        env.render()
        if done or steps > env.info.horizon:
            steps = 0
            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/init.py
@ -0,0 +1,3 @@
 from .env_base import AirHockeyBase
 from .defend import AirHockeyDefend
 from .hit import AirHockeyHit
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
@ -0,0 +1,76 @@
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
 class AirHockeyDefend(AirHockeySingle):
    """
    Class for the air hockey defending task.
    The agent should stop the puck at the line x=-0.6.
    """
    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
        self.init_velocity_range = (1, 3)
        self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]])  # Table Frame
        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
    def setup(self, state=None):
        # Set initial puck parameters
        puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
        lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
        angle = np.random.uniform(-0.5, 0.5)
        puck_vel = np.zeros(3)
        puck_vel[0] = -np.cos(angle) * lin_vel
        puck_vel[1] = np.sin(angle) * lin_vel
        puck_vel[2] = np.random.uniform(-10, 10)
        self._write_data("puck_x_pos", puck_pos[0])
        self._write_data("puck_y_pos", puck_pos[1])
        self._write_data("puck_x_vel", puck_vel[0])
        self._write_data("puck_y_vel", puck_vel[1])
        self._write_data("puck_yaw_vel", puck_vel[2])
        super(AirHockeyDefend, self).setup(state)
    def reward(self, state, action, next_state, absorbing):
        return 0
    def is_absorbing(self, state):
        puck_pos, puck_vel = self.get_puck(state)
        # If puck is over the middle line and moving towards opponent
        if puck_pos[0] > 0 and puck_vel[0] > 0:
            return True
        if np.linalg.norm(puck_vel[:2]) < 0.1:
            return True
        return super().is_absorbing(state)
 if __name__ == '__main__':
    env = AirHockeyDefend()
    R = 0.
    J = 0.
    gamma = 1.
    steps = 0
    env.reset()
    while True:
        action = np.zeros(3)
        observation, reward, done, info = env.step(action)
        env.render()
        gamma *= env.info.gamma
        J += gamma * reward
        R += reward
        steps += 1
        if done or steps > env.info.horizon:
            print("J: ", J, " R: ", R)
            R = 0.
            J = 0.
            gamma = 1.
            steps = 0
            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
@ -0,0 +1,224 @@
 import os
 import mujoco
 import numpy as np
 from scipy.spatial.transform import Rotation as R
 from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path
 from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
 from mushroom_rl.utils.spaces import Box
 class AirHockeyBase(MuJoCo):
    """
    Abstract class for all AirHockey Environments.
    """
    def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
                 n_agents=1, viewer_params={}):
        """
        Constructor.
        Args:
            n_agents (int, 1): number of agent to be used in the environment (one or two)
        """
        self.n_agents = n_agents
        action_spec = []
        observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
                            ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
                            ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
                            ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
                            ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
                            ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
        additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
                           ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
                           ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
                           ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
                           ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
                           ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
        collision_spec = [("puck", ["puck"]),
                          ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
                          ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
        if 1 <= self.n_agents <= 2:
            scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
            action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"]
            observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS),
                                 ("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS),
                                 ("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS),
                                 ("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL),
                                 ("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)]
            additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS),
                                ("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)]
            collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])]
            if self.n_agents == 2:
                scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
                observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)]
                action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"]
                # Add puck pos/vel again to transform into second agents frame
                observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
                                     ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
                                     ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
                                     ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
                                     ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
                                     ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS),
                                     ("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS),
                                     ("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS),
                                     ("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL),
                                     ("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)]
                observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)]
                additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS),
                                    ("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)]
                collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])]
        else:
            raise ValueError('n_agents should be 1 or 2')
        self.env_info = dict()
        self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
        self.env_info['puck'] = {"radius": 0.03165}
        self.env_info['mallet'] = {"radius": 0.04815}
        self.env_info['n_agents'] = self.n_agents
        self.env_info['robot'] = {
            "n_joints": 3,
            "ee_desired_height": 0.1,
            "joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3],
                                         [np.pi / 2, np.pi / 2, np.pi * 2 / 3]]),
            "joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi],
                                         [2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]),
            "base_frame": [],
            "control_frequency": 50,
        }
        self.env_info['puck_pos_ids'] = [0, 1, 2]
        self.env_info['puck_vel_ids'] = [3, 4, 5]
        self.env_info['joint_pos_ids'] = [6, 7, 8]
        self.env_info['joint_vel_ids'] = [9, 10, 11]
        if self.n_agents == 2:
            self.env_info['opponent_ee_ids'] = [13, 14, 15]
        else:
            self.env_info['opponent_ee_ids'] = []
        max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents
        super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
                         n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
        # Construct the mujoco model at origin
        robot_model = mujoco.MjModel.from_xml_path(
            os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml"))
        robot_model.body('planar_robot_1/base').pos = np.zeros(3)
        robot_data = mujoco.MjData(robot_model)
        # Add env_info that requires mujoco models
        self.env_info['dt'] = self.dt
        self.env_info["robot"]["joint_pos_limit"] = np.array(
            [self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T
        self.env_info["robot"]["robot_model"] = robot_model
        self.env_info["robot"]["robot_data"] = robot_data
        self.env_info["rl_info"] = self.info
        frame_T = np.eye(4)
        temp = np.zeros((9, 1))
        mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat)
        frame_T[:3, :3] = temp.reshape(3, 3)
        frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos
        self.env_info['robot']['base_frame'].append(frame_T.copy())
        if self.n_agents == 2:
            mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat)
            frame_T[:3, :3] = temp.reshape(3, 3)
            frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos
            self.env_info['robot']['base_frame'].append(frame_T.copy())
        # Ids of the joint, which are controller by the action space
        self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
    def _modify_mdp_info(self, mdp_info):
        obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
                            *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0]
                                       for i in range(self.env_info['robot']['n_joints'])]),
                            *self.env_info['robot']['joint_vel_limit'][0]])
        obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
                             *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1]
                                        for i in range(self.env_info['robot']['n_joints'])]),
                             *self.env_info['robot']['joint_vel_limit'][1]])
        if self.n_agents == 2:
            obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
            obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
        mdp_info.observation_space = Box(obs_low, obs_high)
        return mdp_info
    def is_absorbing(self, obs):
        boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
        puck_pos, puck_vel = self.get_puck(obs)
        if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
            return True
        return False
    @staticmethod
    def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
        if type == 'pose':
            puck_w = np.eye(4)
            puck_w[:2, 3] = puck_in[:2]
            puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
            puck_r = np.linalg.inv(robot_frame) @ puck_w
            puck_out = np.concatenate([puck_r[:2, 3],
                                       R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
        if type == 'vel':
            rot_mat = robot_frame[:3, :3]
            vel_lin = np.array([*puck_in[:2], 0])
            vel_ang = np.array([0., 0., puck_in[2]])
            vel_lin_r = rot_mat.T @ vel_lin
            vel_ang_r = rot_mat.T @ vel_ang
            puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
        return puck_out
    def get_puck(self, obs):
        """
        Getting the puck properties from the observations
        Args:
            obs: The current observation
        Returns:
            ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
        """
        puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
                                   self.obs_helper.get_from_obs(obs, "puck_y_pos"),
                                   self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
        puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
                                   self.obs_helper.get_from_obs(obs, "puck_y_vel"),
                                   self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
        return puck_pos, puck_vel
    def get_ee(self):
        raise NotImplementedError
    def get_joints(self, obs):
        raise NotImplementedError
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
@ -0,0 +1,91 @@
 import mujoco
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase
 class AirHockeySingle(AirHockeyBase):
    """
    Base class for single agent air hockey tasks.
    """
    def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
        """
        Constructor.
        """
        self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414])
        super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
        self.filter_ratio = 0.274
        self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
        self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
    def get_ee(self):
        """
        Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
        on the current joint position, this function exists to avoid redundant computations.
        Returns:
            ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
        """
        ee_pos = self._read_data("robot_1/ee_pos")
        ee_vel = self._read_data("robot_1/ee_vel")
        return ee_pos, ee_vel
    def get_joints(self, obs):
        """
        Get joint position and velocity of the robot
        """
        q_pos = np.zeros(3)
        q_vel = np.zeros(3)
        for i in range(3):
            q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
            q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
        return q_pos, q_vel
    def _modify_observation(self, obs):
        new_obs = obs.copy()
        puck_pos, puck_vel = self.get_puck(obs)
        puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
        puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
        self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
        self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
        self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
        self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
        self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
        self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
        return new_obs
    def setup(self, state=None):
        for i in range(3):
            self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i]
            self.q_pos_prev[i] = self.init_state[i]
            self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0]
        mujoco.mj_fwdPosition(self._model, self._data)
        super().setup(state)
    def _create_observation(self, obs):
        # Filter the joint velocity
        q_pos, q_vel = self.get_joints(obs)
        q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
        self.q_pos_prev = q_pos
        self.q_vel_prev = q_vel_filter
        for i in range(3):
            self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
        yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
        self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
        return obs
--- a/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
@ -0,0 +1,84 @@
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
 class AirHockeyHit(AirHockeySingle):
    """
    Class for the air hockey hitting task.
    """
    def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}):
        """
        Constructor
        Args:
            moving_init(bool, False): If true, initialize the puck with inital velocity.
        """
        super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
        self.moving_init = moving_init
        hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
                    self.env_info['mallet']['radius'] * 2
        self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame
        self.init_velocity_range = (0, 0.5)  # Table Frame
        self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame
    def setup(self, state=None):
        # Initial position of the puck
        puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
        # self.init_state = np.array([-0.9273, 0.9273, np.pi / 2])
        self._write_data("puck_x_pos", puck_pos[0])
        self._write_data("puck_y_pos", puck_pos[1])
        if self.moving_init:
            lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
            angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
            puck_vel = np.zeros(3)
            puck_vel[0] = -np.cos(angle) * lin_vel
            puck_vel[1] = np.sin(angle) * lin_vel
            puck_vel[2] = np.random.uniform(-2, 2, 1)
            self._write_data("puck_x_vel", puck_vel[0])
            self._write_data("puck_y_vel", puck_vel[1])
            self._write_data("puck_yaw_vel", puck_vel[2])
        super(AirHockeyHit, self).setup(state)
    def reward(self, state, action, next_state, absorbing):
        return 0
    def is_absorbing(self, obs):
        puck_pos, puck_vel = self.get_puck(obs)
        # Stop if the puck bounces back on the opponents wall
        if puck_pos[0] > 0 and puck_vel[0] < 0:
            return True
        return super(AirHockeyHit, self).is_absorbing(obs)
 if __name__ == '__main__':
    env = AirHockeyHit(moving_init=False)
    env.reset()
    env.render()
    R = 0.
    J = 0.
    gamma = 1.
    steps = 0
    while True:
        action = np.zeros(3)
        observation, reward, done, info = env.step(action)
        env.render()
        gamma *= env.info.gamma
        J += gamma * reward
        R += reward
        steps += 1
        if done or steps > env.info.horizon:
            print("J: ", J, " R: ", R)
            R = 0.
            J = 0.
            gamma = 1.
            steps = 0
            env.reset()
--- a/fancy_gym/envs/mujoco/air_hockey/utils/init.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/init.py
@ -0,0 +1,2 @@
 from .kinematics import inverse_kinematics, forward_kinematics, jacobian
 from .transformations import robot_to_world, world_to_robot
--- a/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
@ -0,0 +1,248 @@
 import mujoco
 import numpy as np
 def forward_kinematics(mj_model, mj_data, q, link="ee"):
    """
    Compute the forward kinematics of the robots.
    IMPORTANT:
        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
        case this function will return the wrong values.
    Coordinate System:
        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
        coordinate system
    Args:
        mj_model (mujoco.MjModel):
            mujoco MjModel of the robot-only model
        mj_data (mujoco.MjData):
            mujoco MjData object generated from the model
        q (np.array):
            joint configuration for which the forward kinematics are computed
        link (string, "ee"):
            Link for which the forward kinematics is calculated. When using the iiwas the choices are
            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
    Returns
    -------
    position: numpy.ndarray, (3,)
        Position of the link in robot's base frame
    orientation: numpy.ndarray, (3, 3)
        Orientation of the link in robot's base frame
    """
    return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
 def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"):
    """
    Compute the inverse kinematics of the robots.
    IMPORTANT:
        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is
        the case this function will return the wrong values.
    Coordinate System:
        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
        coordinate system
    Args:
        mj_model (mujoco.MjModel):
            mujoco MjModel of the robot-only model
        mj_data (mujoco.MjData):
            mujoco MjData object generated from the model
        desired_position (numpy.ndarray, (3,)):
            The desired position of the selected link.
        desired_rotation (optional, numpy.array, (3,3)):
            The desired rotation of the selected link.
        initial_q (numpy.ndarray, None):
            The initial configuration of the algorithm, if set to None it will take the initial configuration of the
            mj_data.
        link (str, "ee"):
            Link for which the inverse kinematics is calculated. When using the iiwas the choices are
            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
    """
    q_init = np.zeros(mj_model.nq)
    if initial_q is None:
        q_init = mj_data.qpos
    else:
        q_init[:initial_q.size] = initial_q
    q_l = mj_model.jnt_range[:, 0]
    q_h = mj_model.jnt_range[:, 1]
    lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2
    upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2
    desired_quat = None
    if desired_rotation is not None:
        desired_quat = np.zeros(4)
        mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1))
    return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model,
                        mj_data, lower_limit, upper_limit)
 def jacobian(mj_model, mj_data, q, link="ee"):
    """
    Compute the Jacobian of the robots.
    IMPORTANT:
        For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
        parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
        where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
        case this function will return the wrong values.
    Coordinate System:
        All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
        base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
        coordinate system
    Args:
        mj_model (mujoco.MjModel):
            mujoco MjModel of the robot-only model
        mj_data (mujoco.MjData):
            mujoco MjData object generated from the model
        q (numpy.ndarray):
            joint configuration for which the forward kinematics are computed
        link (string, "ee"):
            Link for which the forward kinematics is calculated. When using the iiwas the choices are
            ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
    Returns
    -------
    numpy.ndarray, (6, num_joints):
        The Jacobian matrix for the robot kinematics.
    """
    return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
 def link_to_xml_name(mj_model, link):
    try:
        mj_model.body('iiwa_1/base')
        link_to_frame_idx = {
            "1": "iiwa_1/link_1",
            "2": "iiwa_1/link_2",
            "3": "iiwa_1/link_3",
            "4": "iiwa_1/link_4",
            "5": "iiwa_1/link_5",
            "6": "iiwa_1/link_6",
            "7": "iiwa_1/link_7",
            "ee": "iiwa_1/striker_joint_link",
        }
    except:
        link_to_frame_idx = {
            "1": "planar_robot_1/body_1",
            "2": "planar_robot_1/body_2",
            "3": "planar_robot_1/body_3",
            "ee": "planar_robot_1/body_ee",
        }
    return link_to_frame_idx[link]
 def _mujoco_fk(q, name, model, data):
    data.qpos[:len(q)] = q
    mujoco.mj_fwdPosition(model, data)
    return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy()
 def _mujoco_jac(q, name, model, data):
    data.qpos[:len(q)] = q
    dtype = data.qpos.dtype
    jac = np.empty((6, model.nv), dtype=dtype)
    jac_pos, jac_rot = jac[:3], jac[3:]
    mujoco.mj_fwdPosition(model, data)
    mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
    return jac
 def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit):
    IT_MAX = 1000
    eps = 1e-4
    damp = 1e-3
    progress_thresh = 20.0
    max_update_norm = 0.1
    rot_weight = 1
    i = 0
    dtype = data.qpos.dtype
    data.qpos = initial_q
    neg_x_quat = np.empty(4, dtype=dtype)
    error_x_quat = np.empty(4, dtype=dtype)
    if desired_pos is not None and desired_quat is not None:
        jac = np.empty((6, model.nv), dtype=dtype)
        err = np.empty(6, dtype=dtype)
        jac_pos, jac_rot = jac[:3], jac[3:]
        err_pos, err_rot = err[:3], err[3:]
    else:
        jac = np.empty((3, model.nv), dtype=dtype)
        err = np.empty(3, dtype=dtype)
        if desired_pos is not None:
            jac_pos, jac_rot = jac, None
            err_pos, err_rot = err, None
        elif desired_quat is not None:
            jac_pos, jac_rot = None, jac
            err_pos, err_rot = None, err
        else:
            raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics")
    while True:
        # forward kinematics
        mujoco.mj_fwdPosition(model, data)
        x_pos = data.body(name).xpos
        x_quat = data.body(name).xquat
        error_norm = 0
        if desired_pos is not None:
            err_pos[:] = desired_pos - x_pos
            error_norm += np.linalg.norm(err_pos)
        if desired_quat is not None:
            mujoco.mju_negQuat(neg_x_quat, x_quat)
            mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat)
            mujoco.mju_quat2Vel(err_rot, error_x_quat, 1)
            error_norm += np.linalg.norm(err_rot) * rot_weight
        if error_norm < eps:
            success = True
            break
        if i >= IT_MAX:
            success = False
            break
        mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
        hess_approx = jac.T.dot(jac)
        joint_delta = jac.T.dot(err)
        hess_approx += np.eye(hess_approx.shape[0]) * damp
        update_joints = np.linalg.solve(hess_approx, joint_delta)
        update_norm = np.linalg.norm(update_joints)
        # Check whether we are still making enough progress, and halt if not.
        progress_criterion = error_norm / update_norm
        if progress_criterion > progress_thresh:
            success = False
            break
        if update_norm > max_update_norm:
            update_joints *= max_update_norm / update_norm
        mujoco.mj_integratePos(model, data.qpos, update_joints, 1)
        data.qpos = np.clip(data.qpos, lower_limit, upper_limit)
        i += 1
    q_cur = data.qpos.copy()
    return success, q_cur
--- a/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
@ -0,0 +1,65 @@
 import numpy as np
 def robot_to_world(base_frame, translation, rotation=None):
    """
    Transform position or rotation optional from the robot base frame to the world frame
    Args
    ----
    base_frame: numpy.ndarray, (4,4)
        The transformation matrix from the world to robot base frame
    translation: ndarray, (3,)
        The 3D position to be transformed
    rotation: optional, ndarray, (3, 3)
        The rotation in the matrix form to be transformed
    Returns
    -------
    position: ndarray (3,)
        The transformed 3D position
    rotation: ndarray (3, 3)
        The transformed rotation in the matrix form
    """
    target = np.eye(4)
    target[:len(translation), 3] = translation
    if rotation is not None:
        target[:3, :3] = rotation
    target_frame = base_frame @ target
    return target_frame[:len(translation), 3], target_frame[:3, :3]
 def world_to_robot(base_frame, translation, rotation=None):
    """
    Transfrom position and rotation (optional) from the world frame to the robot's base frame
    Args
    ----
    base_frame: ndarray, (4,4)
        The transformation matrix from the world to robot base frame
    translation: ndarray, (3,)
        The 3D position to be transformed
    rotation: optional, ndarray, (3, 3)
        The rotation in the matrix form to be tranformed
    Returns
    -------
    position: ndarray, (3,)
        The transformed 3D position
    rotation: ndarray, (3, 3)
        The transformed rotation in the matrix form
    """
    target = np.eye(4)
    target[:len(translation), 3] = translation
    if rotation is not None:
        target[:3, :3] = rotation
    target_frame = np.linalg.inv(base_frame) @ target
    return target_frame[:len(translation), 3], target_frame[:3, :3]
--- a/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
@ -0,0 +1,120 @@
 import numpy as np
 from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics
 class UniversalJointPlugin:
    def __init__(self, env_model, env_data, env_info):
        self.env_info = env_info
        self.env_model = env_model
        self.env_data = env_data
        self.Kp = 20
        self.Kd = 0.31
        self.universal_joint_ids = []
        self.universal_joint_ctrl_ids = []
        self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id,
                                     env_model.joint("iiwa_1/striker_joint_2").id]
        self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id,
                                          env_model.actuator("iiwa_1/striker_joint_2").id]
        action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
                       "iiwa_1/joint_6", "iiwa_1/joint_7"]
        if self.env_info['n_agents'] >= 2:
            self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id,
                                         env_model.joint("iiwa_2/striker_joint_2").id]
            self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id,
                                              env_model.actuator("iiwa_2/striker_joint_2").id]
            action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5",
                            "iiwa_2/joint_6", "iiwa_2/joint_7"]
        self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec]
        self.filter_ratio = 0.273
        self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents'])
        self.u_joint_pos_prev = None
        self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents'])
    def reset(self):
        self.u_joint_pos_prev = None
        self._control_universal_joint()
        for i in range(self.env_info['n_agents']):
            self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids]
        self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des
    def update(self):
        self._control_universal_joint()
    def _control_universal_joint(self):
        self._compute_universal_joint()
        self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids]
        self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \
                                (1 - self.filter_ratio) * self.u_joint_vel_prev
        Kp = 4
        Kd = 0.31
        torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev
        self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque
    def _compute_universal_joint(self):
        for i in range(self.env_info['n_agents']):
            q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]]
            # Have to exclude the puck joints
            pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'],
                                              self.env_info['robot']['robot_data'], q)
            v_x = rot_mat[:, 0]
            v_y = rot_mat[:, 1]
            # The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T
            # and the current y-axis. (0, 0, 1).T x v_y
            x_desired = np.array([-v_y[1], v_y[0], 0])
            # Find the signed angle from the current to the desired x-axis around the y-axis
            # https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane
            q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired)
            if self.u_joint_pos_prev is not None:
                if q1 - self.u_joint_pos_prev[0] > np.pi:
                    q1 -= np.pi * 2
                elif q1 - self.u_joint_pos_prev[0] < -np.pi:
                    q1 += np.pi * 2
            # Rotate the X axis by the calculated amount
            w = np.array([[0, -v_y[2], v_y[1]],
                          [v_y[2], 0, -v_y[0]],
                          [-v_y[1], v_y[0], 0]])
            r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1))
            v_x_rotated = r @ v_x
            # The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current
            # x-axis, which is already rotated around y. The negative is there because the x-axis is flipped.
            # -((0, 0, 1).T x v_x))
            y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0])
            # Find the signed angle from the current to the desired y-axis around the new rotated x-axis
            q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired)
            if self.u_joint_pos_prev is not None:
                if q2 - self.u_joint_pos_prev[1] > np.pi:
                    q2 -= np.pi * 2
                elif q2 - self.u_joint_pos_prev[1] < -np.pi:
                    q2 += np.pi * 2
            alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
            alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
            if self.u_joint_pos_prev is None:
                self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
            else:
                self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum(
                    10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]),
                    -np.pi * 0.01), np.pi * 0.01)
            self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
        return self.u_joint_pos_des
    def _cross_3d(self, a, b):
        return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]])
--- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py
+++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py
@ -6,6 +6,7 @@ from gymnasium.envs.mujoco import MujocoEnv
 from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
 from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max
 from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat
 from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import calculate_jerk_profile, calculate_mean_squared_jerk, calculate_dimensionless_jerk, calculate_maximum_jerk
 import mujoco
@ -110,6 +111,26 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
        return obs, reward, terminated, truncated, infos
    def calculate_smoothness_metrics(self, velocity_profile, dt):
        """
        Calculates the smoothness metrics for the given velocity profile.
        param velocity_profile: np.array
                                The array containing the movement velocity profile.
        param dt: float
                  The sampling time interval of the data.
        return mean_squared_jerk: float
                                  The mean squared jerk estimate of the given movement's smoothness.
        return maximum_jerk: float
                             The maximum jerk estimate of the given movement's smoothness.
        return dimensionless_jerk: float
                                   The dimensionless jerk estimate of the given movement's smoothness.
        """
        jerk_profile = calculate_jerk_profile(velocity_profile, dt)
        mean_squared_jerk = calculate_mean_squared_jerk(jerk_profile)
        maximum_jerk = calculate_maximum_jerk(jerk_profile)
        dimensionless_jerk = calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt)
        return mean_squared_jerk, maximum_jerk, dimensionless_jerk
    def reset_model(self):
        # rest box to initial position
        self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing)
--- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py
+++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py
@ -51,3 +51,19 @@ def rot_to_quat(theta, axis):
    quant[0] = np.sin(theta / 2.)
    quant[1:] = np.cos(theta / 2.) * axis
    return quant
 def calculate_jerk_profile(velocity_profile, dt):
    jerk = np.diff(velocity_profile, 2, 0) / pow(dt, 2)
    return jerk
 def calculate_mean_squared_jerk(jerk_profile):
    return np.mean(pow(jerk_profile, 2))
 def calculate_maximum_jerk(jerk_profile):
    return np.max(abs(jerk_profile))
 def calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt):
    sum_squared_jerk = np.sum(pow(jerk_profile, 2), 0)
    duration = len(velocity_profile) * dt
    peak_velocity = np.max(abs(velocity_profile), 0)
    return np.mean(sum_squared_jerk * pow(duration, 3) / pow(peak_velocity, 2))
--- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py
+++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py
@ -262,76 +262,100 @@ class HopperJumpEnv(HopperEnvCustomXML):
                return True
        return False
-# # TODO is that needed? if so test it
+class HopperJumpMarkovRew(HopperJumpEnv):
-# class HopperJumpStepEnv(HopperJumpEnv):
+    def step(self, action):
-#
+        self._steps += 1
-#     def __init__(self,
+
-#                  xml_file='hopper_jump.xml',
+        self.do_simulation(action, self.frame_skip)
-#                  forward_reward_weight=1.0,
+
-#                  ctrl_cost_weight=1e-3,
+        height_after = self.get_body_com("torso")[2]
-#                  healthy_reward=1.0,
+        # site_pos_after = self.data.get_site_xpos('foot_site')
-#                  height_weight=3,
+        site_pos_after = self.data.site('foot_site').xpos
-#                  dist_weight=3,
+        self.max_height = max(height_after, self.max_height)
-#                  terminate_when_unhealthy=False,
+
-#                  healthy_state_range=(-100.0, 100.0),
+        has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
-#                  healthy_z_range=(0.5, float('inf')),
+
-#                  healthy_angle_range=(-float('inf'), float('inf')),
+        if not self.init_floor_contact:
-#                  reset_noise_scale=5e-3,
+            self.init_floor_contact = has_floor_contact
-#                  exclude_current_positions_from_observation=False
+        if self.init_floor_contact and not self.has_left_floor:
-#                  ):
+            self.has_left_floor = not has_floor_contact
-#
+        if not self.contact_with_floor and self.has_left_floor:
-#         self._height_weight = height_weight
+            self.contact_with_floor = has_floor_contact
-#         self._dist_weight = dist_weight
+
-#         super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+        ctrl_cost = self.control_cost(action)
-#                          healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+        costs = ctrl_cost
-#                          exclude_current_positions_from_observation)
+        terminated = False
-#
+        truncated = False
-#     def step(self, action):
+
-#         self._steps += 1
+        goal_dist = np.linalg.norm(site_pos_after - self.goal)
-#
+        if self.contact_dist is None and self.contact_with_floor:
-#         self.do_simulation(action, self.frame_skip)
+            self.contact_dist = goal_dist
-#
+
-#         height_after = self.get_body_com("torso")[2]
+        rewards = 0
-#         site_pos_after = self.data.site('foot_site').xpos.copy()
+        if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP):
-#         self.max_height = max(height_after, self.max_height)
+            healthy_reward = self.healthy_reward
-#
+            distance_reward = -goal_dist * self._dist_weight
-#         ctrl_cost = self.control_cost(action)
+            height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
-#         healthy_reward = self.healthy_reward
+            contact_reward = -(self.contact_dist or 5) * self._contact_weight
-#         height_reward = self._height_weight * height_after
+            rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
-#         goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
+
-#         goal_dist_reward = -self._dist_weight * goal_dist
+        observation = self._get_obs()
-#         dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
+
-#
+        # While loop to simulate the process after jump to make the task Markovian
-#         rewards = dist_reward + healthy_reward
+        if self.sparse and self.has_left_floor:
-#         costs = ctrl_cost
+            while self._steps < MAX_EPISODE_STEPS_HOPPERJUMP:
-#         done = False
+                # Simulate to the end of the episode
-#
+                self._steps += 1
-#         # This is only for logging the distance to goal when first having the contact
+
-#         has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
+                try:
-#
+                    self.do_simulation(np.zeros_like(action), self.frame_skip)
-#         if not self.init_floor_contact:
+                except Exception as e:
-#             self.init_floor_contact = has_floor_contact
+                    print(e)
-#         if self.init_floor_contact and not self.has_left_floor:
+
-#             self.has_left_floor = not has_floor_contact
+                height_after = self.get_body_com("torso")[2]
-#         if not self.contact_with_floor and self.has_left_floor:
+                #site_pos_after = self.data.get_site_xpos('foot_site')
-#             self.contact_with_floor = has_floor_contact
+                site_pos_after = self.data.site('foot_site').xpos
-#
+                self.max_height = max(height_after, self.max_height)
-#         if self.contact_dist is None and self.contact_with_floor:
+
-#             self.contact_dist = goal_dist
+                has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
-#
+
-#         ##############################################################
+                if not self.init_floor_contact:
-#
+                    self.init_floor_contact = has_floor_contact
-#         observation = self._get_obs()
+                if self.init_floor_contact and not self.has_left_floor:
-#         reward = rewards - costs
+                    self.has_left_floor = not has_floor_contact
-#         info = {
+                if not self.contact_with_floor and self.has_left_floor:
-#             'height': height_after,
+                    self.contact_with_floor = has_floor_contact
-#             'x_pos': site_pos_after,
+
-#             'max_height': copy.copy(self.max_height),
+                ctrl_cost = self.control_cost(action)
-#             'goal': copy.copy(self.goal),
+                costs = ctrl_cost
-#             'goal_dist': goal_dist,
+                done = False
-#             'height_rew': height_reward,
+
-#             'healthy_reward': healthy_reward,
+                goal_dist = np.linalg.norm(site_pos_after - self.goal)
-#             'healthy': copy.copy(self.is_healthy),
+                if self.contact_dist is None and self.contact_with_floor:
-#             'contact_dist': copy.copy(self.contact_dist) or 0
+                    self.contact_dist = goal_dist
-#         }
+
-#         return observation, reward, done, info
+                rewards = 0
            # Task has reached the end, compute the sparse reward
            done = True
            healthy_reward = self.healthy_reward
            distance_reward = -goal_dist * self._dist_weight
            height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
            contact_reward = -(self.contact_dist or 5) * self._contact_weight
            rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
        reward = rewards - costs
        info = dict(
            height=height_after,
            x_pos=site_pos_after,
            max_height=self.max_height,
            goal=self.goal[:1],
            goal_dist=goal_dist,
            height_rew=self.max_height,
            healthy_reward=self.healthy_reward,
            healthy=self.is_healthy,
            contact_dist=self.contact_dist or 0,
            num_steps=self._steps,
            has_left_floor=self.has_left_floor
       )
        return observation, reward, terminated, truncated, info
--- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py
+++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py
@ -5,11 +5,12 @@ from gymnasium import utils, spaces
 from gymnasium.envs.mujoco import MujocoEnv
 from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force
-from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high
+from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, jnt_vel_low, jnt_vel_high
 import mujoco
 MAX_EPISODE_STEPS_TABLE_TENNIS = 350
 MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER = 300
 CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]])
 CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65],
@ -18,6 +19,9 @@ CONTEXT_BOUNDS_SWICHING = np.array([[-1.0, -0.65, -1.0, 0.],
                                    [-0.2, 0.65, -0.2, 0.65]])
 DEFAULT_ROBOT_INIT_POS = np.array([0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 1.5])
 DEFAULT_ROBOT_INIT_VEL = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
 class TableTennisEnv(MujocoEnv, utils.EzPickle):
    """
    7 DoF table tennis environment
@ -34,7 +38,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
    def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
                 goal_switching_step: int = None,
-                 enable_artificial_wind: bool = False, **kwargs):
+                 enable_artificial_wind: bool = False,
                 random_pos_scale: float = 0.0,
                 random_vel_scale: float = 0.0,
                ):
        utils.EzPickle.__init__(**locals())
        self._steps = 0
@ -48,6 +55,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
        self._id_set = False
        # initial robot state
        self._random_pos_scale = random_pos_scale
        self._random_vel_scale = random_vel_scale
        # reward calculation
        self.ball_landing_pos = None
        self._goal_pos = np.zeros(2)
@ -156,7 +167,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
            "num_steps": self._steps,
        }
-        terminated, truncated = self._terminated, False
+        terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS
        return self._get_obs(), reward, terminated, truncated, info
@ -183,8 +194,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
        self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
-        self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5])
+        robot_init_pos, robot_init_vel = self.get_initial_robot_state()
-        self.data.qvel[:7] = np.zeros(7)
+
        self.data.qpos[:7] = robot_init_pos
        self.data.qvel[:7] = robot_init_vel
        mujoco.mj_forward(self.model, self.data)
@ -257,7 +270,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
    def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs, tau_bound, delay_bound):
        obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])])  # 0 for invalid traj
        penalty = self._get_traj_invalid_penalty(action, pos_traj, tau_bound, delay_bound)
-        return obs, penalty, True, False, {
+        return obs, penalty, False, True, {
            "hit_ball": [False],
            "ball_returned_success": [False],
            "land_dist_error": [10.],
@ -274,6 +287,179 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
            return False, pos_traj, vel_traj
        return True, pos_traj, vel_traj
 class TableTennisMarkovian(TableTennisEnv):
    def _get_reward2(self, hit_now, land_now):
        # Phase 1 not hit ball
        if not self._hit_ball:
            # Not hit ball
            min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1))
            return 0.005 * (1 - np.tanh(min_r_b_dist**2))
        # Phase 2 hit ball now
        elif self._hit_ball and hit_now:
            return 2
        # Phase 3 hit ball already and not land yet
        elif self._hit_ball and self._ball_landing_pos is None:
            min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1))
            return 0.02 * (1 - np.tanh(min_b_des_b_dist**2))
        # Phase 4 hit ball already and land now
        elif self._hit_ball and land_now:
            over_net_bonus = int(self._ball_landing_pos[0] < 0)
            min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2])
            return 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
        # Phase 5 hit ball already and land already
        elif self._hit_ball and not land_now and self._ball_landing_pos is not None:
            return 0
        else:
            raise NotImplementedError
    def _get_reward(self, terminated):
        # if not terminated:
        #     return 0
        min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1))
        if not self._hit_ball:
            # Not hit ball
            return 0.2 * (1 - np.tanh(min_r_b_dist**2))
        elif self._ball_landing_pos is None:
            # Hit ball but not landing pos
            min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1))
            return 2 + (1 - np.tanh(min_b_des_b_dist**2))
        else:
            # Hit ball and land
            min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2])
            over_net_bonus = int(self._ball_landing_pos[0] < 0)
            return 2 + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
    def _get_traj_invalid_penalty(self, action, pos_traj, tau_bound, delay_bound):
        tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]]))
        delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]]))
        violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0))
        violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0))
        invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \
                          violate_high_bound_error + violate_low_bound_error
        return -invalid_penalty
    def get_invalid_traj_step_penalty(self, pos_traj):
        violate_high_bound_error = (
            np.maximum(pos_traj - jnt_pos_high, 0).mean())
        violate_low_bound_error = (
            np.maximum(jnt_pos_low - pos_traj, 0).mean())
        invalid_penalty = violate_high_bound_error + violate_low_bound_error
    def _update_game_state(self, action):
        for _ in range(self.frame_skip):
            if self._enable_artificial_wind:
                self.data.qfrc_applied[-2] = self._artificial_force
            try:
                self.do_simulation(action, 1)
            except Exception as e:
                print("Simulation get unstable return with MujocoException: ", e)
                unstable_simulation = True
                self._terminated = True
                break
            # Update game state
            if not self._terminated:
                if not self._hit_ball:
                    self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \
                                    self._contact_checker(self._ball_contact_id, self._bat_back_id)
                    if not self._hit_ball:
                        ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id)
                        if ball_land_on_floor_no_hit:
                            self._ball_landing_pos = self.data.body("target_ball").xpos.copy()
                            self._terminated = True
                if self._hit_ball and not self._ball_contact_after_hit:
                    if self._contact_checker(self._ball_contact_id, self._floor_contact_id):  # first check contact with floor
                        self._ball_contact_after_hit = True
                        self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
                        self._terminated = True
                    elif self._contact_checker(self._ball_contact_id, self._table_contact_id):  # second check contact with table
                        self._ball_contact_after_hit = True
                        self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
                        if self._ball_landing_pos[0] < 0.:  # ball lands on the opponent side
                            self._ball_return_success = True
                        self._terminated = True
            # update ball trajectory & racket trajectory
            self._ball_traj.append(self.data.body("target_ball").xpos.copy())
            self._racket_traj.append(self.data.geom("bat").xpos.copy())
    def ball_racket_contact(self):
        return self._contact_checker(self._ball_contact_id, self._bat_front_id) or \
               self._contact_checker(self._ball_contact_id, self._bat_back_id)
    def step(self, action):
        if not self._id_set:
            self._set_ids()
        unstable_simulation = False
        hit_already = self._hit_ball
        if self._steps == self._goal_switching_step and self.np_random.uniform() < 0.5:
                new_goal_pos = self._generate_goal_pos(random=True)
                new_goal_pos[1] = -new_goal_pos[1]
                self._goal_pos = new_goal_pos
                self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
                mujoco.mj_forward(self.model, self.data)
        self._update_game_state(action)
        self._steps += 1
        obs = self._get_obs()
        # Compute reward
        if unstable_simulation:
            reward = -25
        else:
            # reward = self._get_reward(self._terminated)
            # hit_now = not hit_already and self._hit_ball
            hit_finish = self._hit_ball and not self.ball_racket_contact()
            if hit_finish:
                # Clean the ball and racket traj before hit
                self._ball_traj = []
                self._racket_traj = []
                # Simulate the rest of the traj
                reward = self._get_reward2(True, False)
                while self._steps < MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER:
                    land_already = self._ball_landing_pos is not None
                    self._update_game_state(np.zeros_like(action))
                    self._steps += 1
                    land_now = (not land_already
                                and self._ball_landing_pos is not None)
                    temp_reward = self._get_reward2(False, land_now)
                    # print(temp_reward)
                    reward += temp_reward
                    # Uncomment the line below to visualize the sim after hit
                    # self.render(mode="human")
            else:
                reward = self._get_reward2(False, False)
        # Update ball landing error
        land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \
                            if self._ball_landing_pos is not None else 10.
        info = {
            "hit_ball": self._hit_ball,
            "ball_returned_success": self._ball_return_success,
            "land_dist_error": land_dist_err,
            "is_success": self._ball_return_success and land_dist_err < 0.2,
            "num_steps": self._steps,
        }
        terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER
        return obs, reward, terminated, truncated, info
 class TableTennisWind(TableTennisEnv):
    def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, **kwargs):
@ -296,7 +482,16 @@ class TableTennisWind(TableTennisEnv):
        ])
        return obs
 class TableTennisGoalSwitching(TableTennisEnv):
    def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99, **kwargs):
        super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step, **kwargs)
 class TableTennisRandomInit(TableTennisEnv):
    def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
                 random_pos_scale: float = 1.0,
                 random_vel_scale: float = 0.0):
        super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip,
                         random_pos_scale=random_pos_scale,
                         random_vel_scale=random_vel_scale)
--- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py
+++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py
@ -2,8 +2,9 @@ import numpy as np
 jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
 jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
-delay_bound = [0.05, 0.15]
+
-tau_bound = [0.5, 1.5]
+jnt_vel_low = np.ones(7) * -7
 jnt_vel_high = np.ones(7) * 7
 net_height = 0.1
 table_height = 0.77
@ -48,4 +49,4 @@ def magnus_force(top_spin=0.0, side_spin=0.0, v_ball=np.zeros(3), v_wind=np.zero
    C_l = 4.68 * 10e-4 - 2.0984 * 10e-5 * (np.linalg.norm(v_ball) - 50)  # Lift force coeffient or simply 1.23
    w = np.array([0.0, top_spin, side_spin]) # Angular velocity of ball
    f_m = 0.5 * rho * A * C_l * np.linalg.norm(v_ball-v_wind) * np.cross(w, v_ball-v_wind)
-    return f_m
+    return f_m
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "fancy_gym"
-version = "0.1.0"
+version = "0.1.4"
 description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches."
 readme = "README.md"
 authors = [
--- a/setup.py
+++ b/setup.py
@ -14,6 +14,7 @@ extras = {
    'box2d': ['gymnasium[box2d]>=0.26.0'],
    'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'],
    'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"],
    'mushroom-rl': ['mushroom-rl'],
 }
 # All dependencies
--- a/test/test_black_box.py
+++ b/test/test_black_box.py
@ -41,7 +41,7 @@ class ToyEnv(gym.Env):
        obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
        return obs, reward, terminated, truncated, info
-    def render(self, mode="human"):
+    def render(self):
        pass
--- a/test/test_fancy_registry.py
+++ b/test/test_fancy_registry.py
@ -33,7 +33,7 @@ class ToyEnv(gym.Env):
        obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
        return obs, reward, terminated, truncated, info
-    def render(self, mode="human"):
+    def render(self):
        pass
--- a/test/test_replanning_sequencing.py
+++ b/test/test_replanning_sequencing.py
@ -37,7 +37,7 @@ class ToyEnv(gym.Env):
        obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
        return obs, reward, terminated, truncated, info
-    def render(self, mode="human"):
+    def render(self):
        pass
		`@ -0,0 +1,2 @@`
							`from .kinematics import inverse_kinematics, forward_kinematics, jacobian`
							`from .transformations import robot_to_world, world_to_robot`