diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py
index a40c81f..8c0fa23 100644
--- a/fancy_gym/envs/__init__.py
+++ b/fancy_gym/envs/__init__.py
@@ -289,3 +289,25 @@ register(
'goal_switching_step': 99
}
)
+
+# Air Hockey environments
+for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]:
+ register(
+ id=f'fancy/AirHockey-{env_mode}-v0',
+ entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
+ max_episode_steps=500,
+ add_mp_types=[],
+ kwargs={
+ 'env_mode': env_mode
+ }
+ )
+
+register(
+ id=f'fancy/AirHockey-tournament-v0',
+ entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
+ max_episode_steps=15000,
+ add_mp_types=[],
+ kwargs={
+ 'env_mode': 'tournament'
+ }
+)
diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
index 4e5caaf..70b9bfd 100644
--- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
+++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py
@@ -178,9 +178,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
return False
- def render(self, mode=None):
- if mode==None:
- mode = self.render_mode
+ def render(self):
if self.fig is None:
# Create base figure once on the beginning. Afterwards only update
plt.ion()
@@ -199,7 +197,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
self.fig.gca().set_title(
f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}")
- if mode == "human":
+ if self.render_mode == "human":
# arm
self.line.set_data(self._joints[:, 0], self._joints[:, 1])
@@ -207,7 +205,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
self.fig.canvas.draw()
self.fig.canvas.flush_events()
- elif mode == "partial":
+ elif self.render_mode == "partial":
if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
# Arm
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
index 40a8153..9264b39 100644
--- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
+++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py
@@ -98,9 +98,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
def _check_collisions(self) -> bool:
return self._check_self_collision()
- def render(self, mode=None): # pragma: no cover
- if mode==None:
- mode = self.render_mode
+ def render(self): # pragma: no cover
if self.fig is None:
# Create base figure once on the beginning. Afterwards only update
plt.ion()
diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
index 932f50a..4003319 100644
--- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
+++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py
@@ -123,9 +123,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
def _check_collisions(self) -> bool:
return self._check_self_collision()
- def render(self, mode=None):
- if mode==None:
- mode = self.render_mode
+ def render(self):
goal_pos = self._goal.T
via_pos = self._via_point.T
@@ -148,7 +146,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
- if mode == "human":
+ if self.render_mode == "human":
# goal
if self._steps == 1:
self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
@@ -160,7 +158,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
self.fig.canvas.draw()
self.fig.canvas.flush_events()
- elif mode == "partial":
+ elif self.render_mode == "partial":
if self._steps == 1:
# fig, ax = plt.subplots()
# Add the patch to the Axes
@@ -180,7 +178,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
plt.ylim([-1.1, lim])
plt.pause(0.01)
- elif mode == "final":
+ elif self.render_mode == "final":
if self._steps == 199 or self._is_collided:
# fig, ax = plt.subplots()
diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py
index ff51711..4afdcce 100644
--- a/fancy_gym/envs/mujoco/__init__.py
+++ b/fancy_gym/envs/mujoco/__init__.py
@@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching
+
+try:
+ from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv
+except ModuleNotFoundError:
+ print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)")
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/LICENSE b/fancy_gym/envs/mujoco/air_hockey/LICENSE
new file mode 100644
index 0000000..27c6bbb
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/LICENSE
@@ -0,0 +1,26 @@
+MIT License
+
+Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+---
+
+This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge).
+The changes are mostly focused on adapting the provided environments to fancy_gym.
diff --git a/fancy_gym/envs/mujoco/air_hockey/__init__.py b/fancy_gym/envs/mujoco/air_hockey/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
new file mode 100644
index 0000000..0e8e31e
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
@@ -0,0 +1,180 @@
+from copy import deepcopy
+import numpy as np
+from gymnasium import spaces
+
+import fancy_gym.envs.mujoco.air_hockey.constraints as constraints
+from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position
+from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
+from mushroom_rl.core import Environment
+
+class AirHockeyEnv(Environment):
+ metadata = {"render_modes": ["human"], "render_fps": 50}
+
+ def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs):
+ """
+ Environment Constructor
+
+ Args:
+ env [string]:
+ The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament].
+ interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
+ polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
+ Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
+ of position, velocity and acceleration of the shape (20, 3, n_joints)
+ """
+
+ env_dict = {
+ "tournament": position.IiwaPositionTournament,
+
+ "7dof-hit": position.IiwaPositionHit,
+ "7dof-defend": position.IiwaPositionDefend,
+
+ "3dof-hit": position.PlanarPositionHit,
+ "3dof-defend": position.PlanarPositionDefend
+ }
+
+ if env_mode not in env_dict:
+ raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!")
+
+ if env_mode == "tournament" and type(interpolation_order) != tuple:
+ interpolation_order = (interpolation_order, interpolation_order)
+
+ self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
+ self.env_name = env_mode
+ self.env_info = self.base_env.env_info
+ single_robot_obs_size = len(self.base_env.info.observation_space.low)
+ if env_mode == "tournament":
+ self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
+ else:
+ self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
+ robot_info = self.env_info["robot"]
+
+ if env_mode != "tournament":
+ if interpolation_order in [1, 2]:
+ self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
+ if interpolation_order in [3, 4, -1]:
+ self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
+ high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
+ if interpolation_order in [5]:
+ self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
+ high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
+ else:
+ acts = [None, None]
+ for i in range(2):
+ if interpolation_order[i] in [1, 2]:
+ acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
+ if interpolation_order[i] in [3, 4, -1]:
+ acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
+ high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
+ if interpolation_order[i] in [5]:
+ acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
+ high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
+ self.action_space = spaces.Tuple((acts[0], acts[1]))
+
+ constraint_list = constraints.ConstraintList()
+ constraint_list.add(constraints.JointPositionConstraint(self.env_info))
+ constraint_list.add(constraints.JointVelocityConstraint(self.env_info))
+ constraint_list.add(constraints.EndEffectorConstraint(self.env_info))
+ if "7dof" in self.env_name or self.env_name == "tournament":
+ constraint_list.add(constraints.LinkConstraint(self.env_info))
+
+ self.env_info['constraints'] = constraint_list
+ self.env_info['env_name'] = self.env_name
+
+ self.render_mode = render_mode
+ self.render_human_active = False
+
+ super().__init__(self.base_env.info)
+
+ def step(self, action):
+ obs, reward, done, info = self.base_env.step(action)
+
+ if "tournament" in self.env_name:
+ info["constraints_value"] = list()
+ info["jerk"] = list()
+ for i in range(2):
+ obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)]
+ info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun(
+ obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']])))
+ info["jerk"].append(
+ self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][
+ 'n_joints']])
+
+ info["score"] = self.base_env.score
+ info["faults"] = self.base_env.faults
+
+ else:
+ info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']],
+ obs[self.env_info['joint_vel_ids']]))
+ info["jerk"] = self.base_env.jerk
+ info["success"] = self.check_success(obs)
+
+ if self.env_info['env_name'] == "tournament":
+ obs = np.array(np.split(obs, 2))
+
+ if self.render_human_active:
+ self.base_env.render()
+
+ return obs, reward, done, False, info
+
+ def render(self):
+ self.render_human_active = True
+
+ def reset(self, seed=None, options={}):
+ self.base_env.seed(seed)
+ obs = self.base_env.reset()
+ if self.env_info['env_name'] == "tournament":
+ obs = np.array(np.split(obs, 2))
+ return obs, {}
+
+ def check_success(self, obs):
+ puck_pos, puck_vel = self.base_env.get_puck(obs)
+
+ puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos)
+ success = 0
+
+ if "hit" in self.env_name:
+ if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \
+ np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0:
+ success = 1
+
+ elif "defend" in self.env_name:
+ if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1:
+ success = 1
+
+ elif "prepare" in self.env_name:
+ if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1:
+ success = 1
+ return success
+
+ @property
+ def unwrapped(self):
+ return self
+
+ def close(self):
+ self.base_env.stop()
+
+
+if __name__ == "__main__":
+ env = AirHockeyEnv(env_mode="7dof-hit")
+ env.reset()
+
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ while True:
+ action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3
+ observation, reward, done, info = env.step(action)
+ env.render()
+ gamma *= env.info.gamma
+ J += gamma * reward
+ R += reward
+ steps += 1
+ if done or steps > env.info.horizon:
+ print("J: ", J, " R: ", R)
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ env.reset()
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
new file mode 100644
index 0000000..4486dce
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
@@ -0,0 +1 @@
+from .constraints import *
diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
new file mode 100644
index 0000000..399b35b
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
@@ -0,0 +1,212 @@
+import copy
+
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian
+
+
+class Constraint:
+ def __init__(self, env_info, output_dim, **kwargs):
+ """
+ Constructor
+
+ Args
+ ----
+ env_info: dict
+ A dictionary contains information about the environment;
+ output_dim: int
+ The output dimension of the constraints.
+ **kwargs: dict
+ A dictionary contains agent related information.
+ """
+ self._env_info = env_info
+ self._name = None
+
+ self.output_dim = output_dim
+
+ self._fun_value = np.zeros(self.output_dim)
+ self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"]))
+ self._q_prev = None
+ self._dq_prev = None
+
+ @property
+ def name(self):
+ """
+ The name of the constraints
+
+ """
+ return self._name
+
+ def fun(self, q, dq):
+ """
+ The function of the constraint.
+
+ Args
+ ----
+ q: numpy.ndarray, (num_joints,)
+ The joint position of the robot
+ dq: numpy.ndarray, (num_joints,)
+ The joint velocity of the robot
+
+ Returns
+ -------
+ numpy.ndarray, (out_dim,):
+ The value computed by the constraints function.
+ """
+ if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
+ return self._fun_value
+ else:
+ self._jacobian(q, dq)
+ return self._fun(q, dq)
+
+ def jacobian(self, q, dq):
+ """
+ Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity.
+
+ Args
+ ----
+ q: ndarray, (num_joints,)
+ The joint position of the robot
+ dq: ndarray, (num_joints,)
+ The joint velocity of the robot
+
+ Returns
+ -------
+ numpy.ndarray, (dim_output, num_joints * 2):
+ The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot]
+
+ """
+ if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
+ return self._fun_value
+ else:
+ self._fun(q, dq)
+ return self._jacobian(q, dq)
+
+ def _fun(self, q, dq):
+ raise NotImplementedError
+
+ def _jacobian(self, q, dq):
+ raise NotImplementedError
+
+
+class ConstraintList:
+ def __init__(self):
+ self.constraints = dict()
+
+ def keys(self):
+ return self.constraints.keys()
+
+ def get(self, key):
+ return self.constraints.get(key)
+
+ def add(self, c):
+ self.constraints.update({c.name: c})
+
+ def delete(self, name):
+ del self.constraints[name]
+
+ def fun(self, q, dq):
+ return {key: self.constraints[key].fun(q, dq) for key in self.constraints}
+
+ def jacobian(self, q, dq):
+ return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints}
+
+
+class JointPositionConstraint(Constraint):
+ def __init__(self, env_info, **kwargs):
+ super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
+ self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95
+ self._name = 'joint_pos_constr'
+
+ def _fun(self, q, dq):
+ self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1]
+ self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q
+ return self._fun_value
+
+ def _jacobian(self, q, dq):
+ self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye(
+ self._env_info['robot']['n_joints'])
+ self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye(
+ self._env_info['robot']['n_joints'])
+ return self._jac_value
+
+
+class JointVelocityConstraint(Constraint):
+ def __init__(self, env_info, **kwargs):
+ super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
+ self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95
+ self._name = 'joint_vel_constr'
+
+ def _fun(self, q, dq):
+ self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1]
+ self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq
+ return self._fun_value
+
+ def _jacobian(self, q, dq):
+ self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye(
+ self._env_info['robot']['n_joints'])
+ self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye(
+ self._env_info['robot']['n_joints'])
+ return self._jac_value
+
+
+class EndEffectorConstraint(Constraint):
+ def __init__(self, env_info, **kwargs):
+ # 1 Dimension on x direction: x > x_lb
+ # 2 Dimension on y direction: y > y_lb, y < y_ub
+ # 2 Dimension on z direction: z > z_lb, z < z_ub
+ super().__init__(env_info, output_dim=5, **kwargs)
+ self._name = "ee_constr"
+ tolerance = 0.02
+
+ self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
+ self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
+
+ self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - (
+ self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius'])
+ self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
+ self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
+ self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance
+ self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance
+
+ def _fun(self, q, dq):
+ ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q)
+ self._fun_value = np.array([-ee_pos[0] + self.x_lb,
+ -ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub,
+ -ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub])
+ return self._fun_value
+
+ def _jacobian(self, q, dq):
+ jac = jacobian(self.robot_model, self.robot_data, q)
+ dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]])
+ self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']]
+ return self._jac_value
+
+
+class LinkConstraint(Constraint):
+ def __init__(self, env_info, **kwargs):
+ # 1 Dimension: wrist_z > minimum_height
+ # 2 Dimension: elbow_z > minimum_height
+ super().__init__(env_info, output_dim=2, **kwargs)
+ self._name = "link_constr"
+
+ self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
+ self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
+
+ self.z_lb = 0.25
+
+ def _fun(self, q, dq):
+ wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7")
+ elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4")
+ self._fun_value = np.array([-wrist_pos[2] + self.z_lb,
+ -elbow_pos[2] + self.z_lb])
+ return self._fun_value
+
+ def _jacobian(self, q, dq):
+ jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7")
+ jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4")
+ self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([
+ -jac_wrist[2, :self._env_info['robot']['n_joints']],
+ -jac_elbow[2, :self._env_info['robot']['n_joints']],
+ ])
+ return self._jac_value
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
new file mode 100644
index 0000000..b2dd80e
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl
new file mode 100644
index 0000000..28ad13f
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
new file mode 100644
index 0000000..e057ea1
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
new file mode 100644
index 0000000..4eaa051
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
new file mode 100644
index 0000000..2eb3709
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
new file mode 100644
index 0000000..2892530
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
new file mode 100644
index 0000000..dda08fe
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
new file mode 100644
index 0000000..6095451
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
new file mode 100644
index 0000000..6224ddb
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
new file mode 100644
index 0000000..359cb48
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
new file mode 100644
index 0000000..12f4b3d
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
@@ -0,0 +1,58 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
new file mode 100644
index 0000000..044aae5
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
@@ -0,0 +1,108 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
new file mode 100644
index 0000000..450ef8f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
@@ -0,0 +1,89 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
new file mode 100644
index 0000000..6d9ca5f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
@@ -0,0 +1,94 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl
new file mode 100644
index 0000000..e057ea1
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl
new file mode 100644
index 0000000..2deabe0
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl
new file mode 100644
index 0000000..3187389
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl
new file mode 100644
index 0000000..6b172ff
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl
new file mode 100644
index 0000000..f1d761f
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl
new file mode 100644
index 0000000..f2d1ec7
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl
new file mode 100644
index 0000000..df26d8f
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl
new file mode 100644
index 0000000..5e0eba9
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl
new file mode 100644
index 0000000..ea2d9d2
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl
new file mode 100644
index 0000000..062ceb7
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl
new file mode 100644
index 0000000..12af33a
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl
new file mode 100644
index 0000000..a9f30b7
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl
new file mode 100644
index 0000000..b2dd80e
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl
new file mode 100644
index 0000000..7595fb0
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl
new file mode 100644
index 0000000..c03f7b2
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl
new file mode 100644
index 0000000..e057ea1
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl
new file mode 100644
index 0000000..4eaa051
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl
new file mode 100644
index 0000000..2eb3709
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl
new file mode 100644
index 0000000..2892530
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl
new file mode 100644
index 0000000..dda08fe
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl
new file mode 100644
index 0000000..6095451
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl
new file mode 100644
index 0000000..6224ddb
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl
new file mode 100644
index 0000000..359cb48
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl
new file mode 100644
index 0000000..3d1760b
Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl differ
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
new file mode 100644
index 0000000..9eb57a8
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
new file mode 100644
index 0000000..73fdfb4
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
new file mode 100644
index 0000000..a8d057c
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml
@@ -0,0 +1,61 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
new file mode 100644
index 0000000..c14d75a
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
new file mode 100644
index 0000000..1f63668
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/data/table.xml b/fancy_gym/envs/mujoco/air_hockey/data/table.xml
new file mode 100644
index 0000000..578af15
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/data/table.xml
@@ -0,0 +1,89 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
new file mode 100644
index 0000000..91e38e1
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
@@ -0,0 +1,270 @@
+from collections import deque
+
+import mujoco
+import numpy as np
+import scipy
+
+from fancy_gym.envs.mujoco.air_hockey import seven_dof
+from fancy_gym.envs.mujoco.air_hockey import three_dof
+
+
+class PositionControl:
+ def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs):
+ """
+ Mixin that adds position controller to mujoco environments.
+
+ Args:
+ p_gain (float): Proportional controller gain
+ d_gain (float): Differential controller gain
+ i_gain (float): Integral controller gain
+ interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
+ polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
+ Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
+ of position, velocity and acceleration of the shape (20, 3, n_joints)
+ In the case of 2 agents it is a tuple, which describes the interpolation order for each agent
+ debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the
+ entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk.
+ """
+
+ self.debug = debug
+
+ super(PositionControl, self).__init__(*args, **kwargs)
+
+ self.robot_model = self.env_info['robot']['robot_model']
+ self.robot_data = self.env_info['robot']['robot_data']
+
+ self.p_gain = np.array(p_gain * self.n_agents)
+ self.d_gain = np.array(d_gain * self.n_agents)
+ self.i_gain = np.array(i_gain * self.n_agents)
+
+ self.prev_pos = np.zeros(len(self.actuator_joint_ids))
+ self.prev_vel = np.zeros(len(self.actuator_joint_ids))
+ self.prev_acc = np.zeros(len(self.actuator_joint_ids))
+ self.i_error = np.zeros(len(self.actuator_joint_ids))
+ self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids))
+
+ self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,)
+
+ self._num_env_joints = len(self.actuator_joint_ids)
+ self.n_robot_joints = self.env_info['robot']["n_joints"]
+
+ self.action_shape = [None] * self.n_agents
+
+ for i in range(self.n_agents):
+ if self.interp_order[i] is None:
+ self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints)
+ elif self.interp_order[i] in [1, 2]:
+ self.action_shape[i] = (self.n_robot_joints,)
+ elif self.interp_order[i] in [3, 4, -1]:
+ self.action_shape[i] = (2, self.n_robot_joints)
+ elif self.interp_order[i] == 5:
+ self.action_shape[i] = (3, self.n_robot_joints)
+
+ self.traj = None
+
+ self.jerk = np.zeros(self._num_env_joints)
+
+ if self.debug:
+ self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
+
+ def _enforce_safety_limits(self, desired_pos, desired_vel):
+ # ROS safe controller
+ pos = self.prev_controller_cmd_pos
+ k = 20
+
+ joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents))
+ joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents))
+
+ min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1])
+
+ max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1])
+
+ clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel)
+
+ min_pos = pos + min_vel * self._timestep
+ max_pos = pos + max_vel * self._timestep
+
+ clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos)
+ self.prev_controller_cmd_pos = clipped_pos.copy()
+
+ return clipped_pos, clipped_vel
+
+ def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel):
+ clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel)
+
+ error = (clipped_pos - current_pos)
+
+ self.i_error += self.i_gain * error * self._timestep
+ torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error
+
+ # Acceleration FeedForward
+ tau_ff = np.zeros(self.robot_model.nv)
+ for i in range(self.n_agents):
+ robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i
+ self.robot_data.qpos = current_pos[robot_joint_ids]
+ self.robot_data.qvel = current_vel[robot_joint_ids]
+ acc_ff = desired_acc[robot_joint_ids]
+ mujoco.mj_forward(self.robot_model, self.robot_data)
+
+ mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff)
+ torque[robot_joint_ids] += tau_ff
+
+ # Gravity Compensation and Coriolis and Centrifugal force
+ torque[robot_joint_ids] += self.robot_data.qfrc_bias
+
+ torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids],
+ self.robot_model.actuator_ctrlrange[:, 0]),
+ self.robot_model.actuator_ctrlrange[:, 1])
+
+ if self.debug:
+ self.controller_record.append(
+ np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk]))
+
+ return torque
+
+ def _interpolate_trajectory(self, interp_order, action, i=0):
+ tf = self.dt
+ prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+ prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+ prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
+ if interp_order == 1 and action.ndim == 1:
+ coef = np.array([[1, 0], [1, tf]])
+ results = np.vstack([prev_pos, action])
+ elif interp_order == 2 and action.ndim == 1:
+ coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]])
+ if np.linalg.norm(action - prev_pos) < 1e-3:
+ prev_vel = np.zeros_like(prev_vel)
+ results = np.vstack([prev_pos, action, prev_vel])
+ elif interp_order == 3 and action.shape[0] == 2:
+ coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]])
+ results = np.vstack([prev_pos, action[0], prev_vel, action[1]])
+ elif interp_order == 4 and action.shape[0] == 2:
+ coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4],
+ [0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3],
+ [0, 0, 2, 0, 0]])
+ results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc])
+ elif interp_order == 5 and action.shape[0] == 3:
+ coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5],
+ [0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4],
+ [0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]])
+ results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]])
+ elif interp_order == -1:
+ # Interpolate position and velocity linearly
+ pass
+ else:
+ raise ValueError("Undefined interpolator order or the action dimension does not match!")
+
+ if interp_order > 0:
+ A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints)
+ y = results.reshape(-2, order='F')
+ weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1)
+ weights_d = np.polynomial.polynomial.polyder(weights, axis=1)
+ weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
+ elif interp_order == -1:
+ weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T
+ weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T
+ weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
+
+ if interp_order in [3, 4, 5]:
+ self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep
+ else:
+ self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf
+
+ self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T)
+ self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T)
+ self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T)
+
+ for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps):
+ q = np.polynomial.polynomial.polyval(t, weights.T)
+ qd = np.polynomial.polynomial.polyval(t, weights_d.T)
+ qdd = np.polynomial.polynomial.polyval(t, weights_dd.T)
+ yield q, qd, qdd
+
+ def reset(self, obs=None):
+ obs = super(PositionControl, self).reset(obs)
+ self.prev_pos = self._data.qpos[self.actuator_joint_ids]
+ self.prev_vel = self._data.qvel[self.actuator_joint_ids]
+ self.prev_acc = np.zeros(len(self.actuator_joint_ids))
+ self.i_error = np.zeros(len(self.actuator_joint_ids))
+ self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids]
+
+ if self.debug:
+ self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
+ return obs
+
+ def _step_init(self, obs, action):
+ super(PositionControl, self)._step_init(obs, action)
+
+ if self.n_agents == 1:
+ self.traj = self._create_traj(self.interp_order[0], action)
+ else:
+ def _traj():
+ traj_1 = self._create_traj(self.interp_order[0], action[0], 0)
+ traj_2 = self._create_traj(self.interp_order[1], action[1], 1)
+
+ for a1, a2 in zip(traj_1, traj_2):
+ yield np.hstack([a1, a2])
+
+ self.traj = _traj()
+
+ def _create_traj(self, interp_order, action, i=0):
+ if interp_order is None:
+ return iter(action)
+ return self._interpolate_trajectory(interp_order, action, i)
+
+ def _compute_action(self, obs, action):
+ cur_pos, cur_vel = self.get_joints(obs)
+
+ desired_pos, desired_vel, desired_acc = next(self.traj)
+
+ return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel)
+
+ def _preprocess_action(self, action):
+ action = super(PositionControl, self)._preprocess_action(action)
+
+ if self.n_agents == 1:
+ assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \
+ f" {action.shape}"
+ else:
+ for i in range(self.n_agents):
+ assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \
+ f" {action[i].shape}"
+
+ return action
+
+
+class PositionControlIIWA(PositionControl):
+ def __init__(self, *args, **kwargs):
+ p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.]
+ d_gain = [60, 80, 60, 30, 10, 1, 0.5]
+ i_gain = [0, 0, 0, 0, 0, 0, 0]
+
+ super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
+
+
+class PositionControlPlanar(PositionControl):
+ def __init__(self, *args, **kwargs):
+ p_gain = [960, 480, 240]
+ d_gain = [60, 20, 4]
+ i_gain = [0, 0, 0]
+ super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
+
+
+class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit):
+ pass
+
+
+class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
+ pass
+
+
+class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
+ pass
+
+
+class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
+ pass
+
+
+class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
+ pass
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
new file mode 100644
index 0000000..ef48d24
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
@@ -0,0 +1,4 @@
+from .env_base import AirHockeyBase
+from .tournament import AirHockeyTournament
+from .hit import AirHockeyHit
+from .defend import AirHockeyDefend
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
new file mode 100644
index 0000000..0ffdf66
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
@@ -0,0 +1,75 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
+
+
+class AirHockeyDefend(AirHockeySingle):
+ """
+ Class for the air hockey defending task.
+ The agent should stop the puck at the line x=-0.6.
+ """
+ def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+ self.init_velocity_range = (1, 3)
+
+ self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
+ self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
+ super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+ def setup(self, obs):
+ puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
+
+ lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+ angle = np.random.uniform(-0.5, 0.5)
+
+ puck_vel = np.zeros(3)
+ puck_vel[0] = -np.cos(angle) * lin_vel
+ puck_vel[1] = np.sin(angle) * lin_vel
+ puck_vel[2] = np.random.uniform(-10, 10)
+
+ self._write_data("puck_x_pos", puck_pos[0])
+ self._write_data("puck_y_pos", puck_pos[1])
+ self._write_data("puck_x_vel", puck_vel[0])
+ self._write_data("puck_y_vel", puck_vel[1])
+ self._write_data("puck_yaw_vel", puck_vel[2])
+
+ super(AirHockeyDefend, self).setup(obs)
+
+ def reward(self, state, action, next_state, absorbing):
+ return 0
+
+ def is_absorbing(self, state):
+ puck_pos, puck_vel = self.get_puck(state)
+ # If puck is over the middle line and moving towards opponent
+ if puck_pos[0] > 0 and puck_vel[0] > 0:
+ return True
+ if np.linalg.norm(puck_vel[:2]) < 0.1:
+ return True
+ return super().is_absorbing(state)
+
+
+if __name__ == '__main__':
+ env = AirHockeyDefend()
+
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ env.reset()
+ env.render()
+ while True:
+ # action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8
+ action = np.zeros(7)
+ observation, reward, done, info = env.step(action)
+ env.render()
+ print(observation)
+ gamma *= env.info.gamma
+ J += gamma * reward
+ R += reward
+ steps += 1
+ if done or steps > env.info.horizon:
+ print("J: ", J, " R: ", R)
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
new file mode 100644
index 0000000..fef5394
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
@@ -0,0 +1,258 @@
+import os
+
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path
+from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin
+from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
+from mushroom_rl.utils.spaces import Box
+
+
+"""
+ Abstract class for all AirHockey Environments.
+
+"""
+class AirHockeyBase(MuJoCo):
+ def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
+ n_agents=1, viewer_params={}):
+
+ """
+ Constructor.
+
+ Args:
+ n_agents (int, 1): number of agent to be used in the environment (one or two)
+ """
+
+ self.n_agents = n_agents
+
+ action_spec = []
+ observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+ ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+ ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+ ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+ ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+ ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+ additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+ ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+ ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+ ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+ ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+ ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+ collision_spec = [("puck", ["puck"]),
+ ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
+ ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
+
+ if 1 <= self.n_agents <= 2:
+ scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
+
+ action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
+ "iiwa_1/joint_6", "iiwa_1/joint_7"]
+
+ observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS),
+ ("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS),
+ ("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS),
+ ("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS),
+ ("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS),
+ ("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS),
+ ("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS),
+ ("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL),
+ ("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL),
+ ("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL),
+ ("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL),
+ ("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL),
+ ("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL),
+ ("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)]
+
+ additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS),
+ ("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS),
+ ("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL),
+ ("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL),
+ ("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS),
+ ("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL),
+ ("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)]
+
+ collision_spec += [("robot_1/ee", ["iiwa_1/ee"])]
+
+ if self.n_agents == 2:
+ scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
+
+ observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)]
+
+ action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4",
+ "iiwa_2/joint_5",
+ "iiwa_2/joint_6", "iiwa_2/joint_7"]
+
+ observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+ ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+ ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+ ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+ ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+ ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
+ ("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS),
+ ("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS),
+ ("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS),
+ ("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS),
+ ("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS),
+ ("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS),
+ ("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS),
+ ("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL),
+ ("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL),
+ ("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL),
+ ("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL),
+ ("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL),
+ ("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL),
+ ("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)]
+
+ observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)]
+
+ additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS),
+ ("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS),
+ ("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL),
+ ("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL),
+ ("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS),
+ ("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL),
+ ("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)]
+
+ collision_spec += [("robot_2/ee", ["iiwa_2/ee"])]
+ else:
+ raise ValueError('n_agents should be 1 or 2')
+
+ self.env_info = dict()
+ self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
+ self.env_info['puck'] = {"radius": 0.03165}
+ self.env_info['mallet'] = {"radius": 0.04815}
+ self.env_info['n_agents'] = self.n_agents
+ self.env_info['robot'] = {
+ "n_joints": 7,
+ "ee_desired_height": 0.1645,
+ "joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
+ [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi,
+ "joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
+ [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10,
+ "base_frame": [],
+ "universal_height": 0.0645,
+ "control_frequency": 50,
+ }
+
+ self.env_info['puck_pos_ids'] = [0, 1, 2]
+ self.env_info['puck_vel_ids'] = [3, 4, 5]
+ self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12]
+ self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19]
+ if self.n_agents == 2:
+ self.env_info['opponent_ee_ids'] = [20, 21, 22]
+ else:
+ self.env_info['opponent_ee_ids'] = []
+
+ max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents
+
+ super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
+ n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
+
+ # Construct the mujoco model at origin
+ robot_model = mujoco.MjModel.from_xml_path(
+ os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml"))
+ robot_model.body('iiwa_1/base').pos = np.zeros(3)
+ robot_data = mujoco.MjData(robot_model)
+
+ # Add env_info that requires mujoco models
+ self.env_info['dt'] = self.dt
+ self.env_info["robot"]["joint_pos_limit"] = np.array(
+ [self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T
+ self.env_info["robot"]["robot_model"] = robot_model
+ self.env_info["robot"]["robot_data"] = robot_data
+ self.env_info["rl_info"] = self.info
+
+ frame_T = np.eye(4)
+ temp = np.zeros((9, 1))
+ mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat)
+ frame_T[:3, :3] = temp.reshape(3, 3)
+ frame_T[:3, 3] = self._model.body("iiwa_1/base").pos
+ self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+ if self.n_agents == 2:
+ mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat)
+ frame_T[:3, :3] = temp.reshape(3, 3)
+ frame_T[:3, 3] = self._model.body("iiwa_2/base").pos
+ self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+ # Ids of the joint, which are controller by the action space
+ self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
+
+ self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info)
+
+ def _modify_mdp_info(self, mdp_info):
+ obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
+ *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0]
+ for i in range(self.env_info['robot']['n_joints'])]),
+ *self.env_info['robot']['joint_vel_limit'][0]])
+ obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
+ *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1]
+ for i in range(self.env_info['robot']['n_joints'])]),
+ *self.env_info['robot']['joint_vel_limit'][1]])
+ if self.n_agents == 2:
+ obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
+ obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
+ mdp_info.observation_space = Box(obs_low, obs_high)
+ return mdp_info
+
+ def _simulation_pre_step(self):
+ self.universal_joint_plugin.update()
+
+ def is_absorbing(self, obs):
+ boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
+ puck_pos, puck_vel = self.get_puck(obs)
+
+ if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
+ return True
+ return False
+
+ @staticmethod
+ def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
+ if type == 'pose':
+ puck_w = np.eye(4)
+ puck_w[:2, 3] = puck_in[:2]
+ puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
+
+ puck_r = np.linalg.inv(robot_frame) @ puck_w
+ puck_out = np.concatenate([puck_r[:2, 3],
+ R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
+
+ if type == 'vel':
+ rot_mat = robot_frame[:3, :3]
+
+ vel_lin = np.array([*puck_in[:2], 0])
+ vel_ang = np.array([0., 0., puck_in[2]])
+
+ vel_lin_r = rot_mat.T @ vel_lin
+ vel_ang_r = rot_mat.T @ vel_ang
+
+ puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
+ return puck_out
+
+ def get_puck(self, obs):
+ """
+ Getting the puck properties from the observations
+ Args:
+ obs: The current observation
+
+ Returns:
+ ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
+
+ """
+ puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
+ self.obs_helper.get_from_obs(obs, "puck_y_pos"),
+ self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
+ puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
+ self.obs_helper.get_from_obs(obs, "puck_y_vel"),
+ self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
+ return puck_pos, puck_vel
+
+ def get_ee(self):
+ raise NotImplementedError
+
+ def get_joints(self, obs):
+ raise NotImplementedError
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
new file mode 100644
index 0000000..54a5252
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
@@ -0,0 +1,171 @@
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
+from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
+
+class AirHockeyDouble(AirHockeyBase):
+ """
+ Base class for two agents air hockey tasks.
+ """
+
+ def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+ super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params)
+
+ self._compute_init_state()
+
+ self.filter_ratio = 0.274
+ self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
+ self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
+
+ def _compute_init_state(self):
+ init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
+
+ success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
+ self.env_info['robot']['robot_data'],
+ np.array([0.65, 0., 0.1645]),
+ R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
+ initial_q=init_state)
+
+ assert success is True
+
+ def get_ee(self, robot=1):
+ """
+ Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics
+ on the current joint position, this function exists to avoid redundant computations.
+ Args:
+ robot: ID of robot, either 1 or 2
+
+ Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+ """
+ ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos")
+
+ ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel")
+
+ return ee_pos, ee_vel
+
+ def get_joints(self, obs, agent=None):
+ """
+ Get joint position and velocity of the robots
+ Can choose the robot with agent = 1 / 2. If agent is None both are returned
+ """
+ if agent:
+ q_pos = np.zeros(7)
+ q_vel = np.zeros(7)
+ for i in range(7):
+ q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0]
+ q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0]
+ else:
+ q_pos = np.zeros(14)
+ q_vel = np.zeros(14)
+ for i in range(7):
+ q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+ q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+ q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0]
+ q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0]
+
+ return q_pos, q_vel
+
+ def _create_observation(self, obs):
+ # Filter the joint velocity
+ q_pos, q_vel = self.get_joints(obs)
+ q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+ self.q_pos_prev = q_pos
+ self.q_vel_prev = q_vel_filter
+
+ for i in range(7):
+ self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+ self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7]
+
+ # Wrap puck's rotation angle to [-pi, pi)
+ yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+ self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+ return obs
+
+ def _modify_observation(self, obs):
+ new_obs = obs.copy()
+
+ puck_pos, puck_vel = self.get_puck(new_obs)
+
+ puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+ puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+ self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0]
+ self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1]
+ self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2]
+
+ self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0]
+ self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1]
+ self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2]
+
+ opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')
+ self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \
+ (np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3]
+
+ puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1])
+ puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel')
+
+ self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0]
+ self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1]
+ self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2]
+
+ self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0]
+ self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1]
+ self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2]
+
+ opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')
+ self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \
+ (np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3]
+
+ return new_obs
+
+ def setup(self, obs):
+ for i in range(7):
+ self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+ self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i]
+
+ self.q_pos_prev[i] = self.init_state[i]
+ self.q_pos_prev[i + 7] = self.init_state[i]
+ self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
+ self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0]
+
+ self.universal_joint_plugin.reset()
+
+ super().setup(obs)
+ # Update body positions, needed for _compute_universal_joint
+ mujoco.mj_fwdPosition(self._model, self._data)
+
+ def reward(self, state, action, next_state, absorbing):
+ return 0
+
+
+def main():
+ env = AirHockeyDouble(viewer_params={'start_paused': True})
+ env.reset()
+ env.render()
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ while True:
+ action = np.zeros(14)
+ observation, reward, done, info = env.step(action)
+ env.render()
+ gamma *= env.info.gamma
+ J += gamma * reward
+ R += reward
+ steps += 1
+ if done or steps > env.info.horizon:
+ print("J: ", J, " R: ", R)
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ env.reset()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
new file mode 100644
index 0000000..accfc83
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
@@ -0,0 +1,104 @@
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
+from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
+
+class AirHockeySingle(AirHockeyBase):
+ """
+ Base class for single agent air hockey tasks.
+
+ """
+
+ def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+ super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
+
+ self._compute_init_state()
+
+ self.filter_ratio = 0.274
+ self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
+ self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
+
+ def _compute_init_state(self):
+ init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
+
+ success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
+ self.env_info['robot']['robot_data'],
+ np.array([0.65, 0., 0.1645]),
+ R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
+ initial_q=init_state)
+
+ assert success is True
+
+ def get_ee(self):
+ """
+ Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
+ on the current joint position, this function exists to avoid redundant computations.
+
+ Returns:
+ ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+ """
+ ee_pos = self._read_data("robot_1/ee_pos")
+
+ ee_vel = self._read_data("robot_1/ee_vel")
+
+ return ee_pos, ee_vel
+
+ def get_joints(self, obs):
+ """
+ Get joint position and velocity of the robot
+ """
+ q_pos = np.zeros(7)
+ q_vel = np.zeros(7)
+ for i in range(7):
+ q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+ q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+ return q_pos, q_vel
+
+ def _create_observation(self, obs):
+ # Filter the joint velocity
+ q_pos, q_vel = self.get_joints(obs)
+ q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+ self.q_pos_prev = q_pos
+ self.q_vel_prev = q_vel_filter
+
+ for i in range(7):
+ self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+
+ yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+ self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+ return obs
+
+ def _modify_observation(self, obs):
+ new_obs = obs.copy()
+ puck_pos, puck_vel = self.get_puck(new_obs)
+
+ puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+
+ puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+ self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
+ self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
+ self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
+
+ self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
+ self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
+ self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
+
+ return new_obs
+
+ def setup(self, obs):
+ for i in range(7):
+ self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+ self.q_pos_prev[i] = self.init_state[i]
+ self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
+
+ self.universal_joint_plugin.reset()
+
+ super().setup(obs)
+
+ # Update body positions, needed for _compute_universal_joint
+ mujoco.mj_fwdPosition(self._model, self._data)
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
new file mode 100644
index 0000000..59386d5
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
@@ -0,0 +1,74 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
+
+
+class AirHockeyHit(AirHockeySingle):
+ """
+ Class for the air hockey hitting task.
+ """
+ def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}):
+ """
+ Constructor
+ Args:
+ opponent_agent(Agent, None): Agent which controls the opponent
+ moving_init(bool, False): If true, initialize the puck with inital velocity.
+ """
+ self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]]) # Table Frame
+ self.init_velocity_range = (0, 0.5) # Table Frame
+
+ super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+ self.moving_init = moving_init
+ hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+ self.env_info['mallet']['radius'] * 2
+ self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
+ self.init_velocity_range = (0, 0.5) # Table Frame
+ self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
+
+
+ def setup(self, obs):
+ # Initial position of the puck
+ puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+ self._write_data("puck_x_pos", puck_pos[0])
+ self._write_data("puck_y_pos", puck_pos[1])
+
+ if self.moving_init:
+ lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+ angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
+ puck_vel = np.zeros(3)
+ puck_vel[0] = -np.cos(angle) * lin_vel
+ puck_vel[1] = np.sin(angle) * lin_vel
+ puck_vel[2] = np.random.uniform(-2, 2)
+
+ self._write_data("puck_x_vel", puck_vel[0])
+ self._write_data("puck_y_vel", puck_vel[1])
+ self._write_data("puck_yaw_vel", puck_vel[2])
+
+ super(AirHockeyHit, self).setup(obs)
+
+ def reward(self, state, action, next_state, absorbing):
+ return 0
+
+ def is_absorbing(self, obs):
+ puck_pos, puck_vel = self.get_puck(obs)
+ # Stop if the puck bounces back on the opponents wall
+ if puck_pos[0] > 0 and puck_vel[0] < 0:
+ return True
+ return super(AirHockeyHit, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+ env = AirHockeyHit(moving_init=True)
+ env.reset()
+
+ steps = 0
+ while True:
+ action = np.zeros(7)
+
+ observation, reward, done, info = env.step(action)
+ env.render()
+ if done or steps > env.info.horizon:
+ steps = 0
+ env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
new file mode 100644
index 0000000..49713a0
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
@@ -0,0 +1,111 @@
+import mujoco
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble
+
+
+class AirHockeyTournament(AirHockeyDouble):
+ """
+ Class for the air hockey tournament. Consists of 2 robots which should play against each other.
+ When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty.
+ If a player accumulates 3 penalties his score is reduced by 1.
+ """
+ def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"):
+ self.agent_name = agent_name
+ self.opponent_name = opponent_name
+
+ self.score = [0, 0]
+ self.faults = [0, 0]
+ self.start_side = None
+
+ self.timer = 0
+
+ def custom_render_callback(viewport, context):
+ names = f"Agents \nScores \nFaults "
+ data = f"{self.agent_name} - {self.opponent_name}\n "
+ data += f"{self.score[0]} - {self.score[1]}\n "
+ data += f"{self.faults[0]} - {self.faults[1]}"
+ mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context)
+
+ viewer_params["custom_render_callback"] = custom_render_callback
+ super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+ hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+ self.env_info['mallet']['radius'] * 2
+
+ self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
+
+ def setup(self, obs):
+ if self.start_side == None:
+ self.start_side = np.random.choice([1, -1])
+ self.prev_side = self.start_side
+
+ # Initial position of the puck
+ puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+ self._write_data("puck_x_pos", puck_pos[0] * self.start_side)
+ self._write_data("puck_y_pos", puck_pos[1])
+
+ self.prev_side = self.start_side
+ self.timer = 0
+
+ super(AirHockeyTournament, self).setup(obs)
+
+ def reward(self, state, action, next_state, absorbing):
+ return 0
+
+ def is_absorbing(self, obs):
+ puck_pos, puck_vel = self.get_puck(obs)
+
+ # Puck stuck on one side for more than 15s
+ if np.sign(puck_pos[0]) == self.prev_side:
+ self.timer += self.dt
+ else:
+ self.prev_side *= -1
+ self.timer = 0
+
+ if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15:
+ if self.prev_side == -1:
+ self.faults[0] += 1
+ self.start_side = -1
+ if self.faults[0] % 3 == 0:
+ self.score[1] += 1
+ else:
+ self.faults[1] += 1
+ self.start_side = 1
+ if self.faults[1] % 3 == 0:
+ self.score[0] += 1
+
+ return True
+
+ # Puck in Goal
+ if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0:
+ if puck_pos[0] > self.env_info['table']['length'] / 2:
+ self.score[0] += 1
+ self.start_side = -1
+ return True
+
+ if puck_pos[0] < -self.env_info['table']['length'] / 2:
+ self.score[1] += 1
+ self.start_side = 1
+ return True
+
+ # Puck stuck in the middle
+ if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025:
+ return True
+ return super(AirHockeyTournament, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+ env = AirHockeyTournament()
+ env.reset()
+
+ steps = 0
+ while True:
+ action = np.zeros(14)
+ steps += 1
+ observation, reward, done, info = env.step(action)
+ env.render()
+ if done or steps > env.info.horizon:
+ steps = 0
+ env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
new file mode 100644
index 0000000..db285f1
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
@@ -0,0 +1,3 @@
+from .env_base import AirHockeyBase
+from .defend import AirHockeyDefend
+from .hit import AirHockeyHit
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
new file mode 100644
index 0000000..1f1224f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
@@ -0,0 +1,76 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
+
+
+class AirHockeyDefend(AirHockeySingle):
+ """
+ Class for the air hockey defending task.
+ The agent should stop the puck at the line x=-0.6.
+ """
+
+ def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+ self.init_velocity_range = (1, 3)
+
+ self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
+ self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
+ super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+ def setup(self, state=None):
+ # Set initial puck parameters
+ puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
+
+ lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+ angle = np.random.uniform(-0.5, 0.5)
+
+ puck_vel = np.zeros(3)
+ puck_vel[0] = -np.cos(angle) * lin_vel
+ puck_vel[1] = np.sin(angle) * lin_vel
+ puck_vel[2] = np.random.uniform(-10, 10)
+
+ self._write_data("puck_x_pos", puck_pos[0])
+ self._write_data("puck_y_pos", puck_pos[1])
+ self._write_data("puck_x_vel", puck_vel[0])
+ self._write_data("puck_y_vel", puck_vel[1])
+ self._write_data("puck_yaw_vel", puck_vel[2])
+
+ super(AirHockeyDefend, self).setup(state)
+
+ def reward(self, state, action, next_state, absorbing):
+ return 0
+
+ def is_absorbing(self, state):
+ puck_pos, puck_vel = self.get_puck(state)
+ # If puck is over the middle line and moving towards opponent
+ if puck_pos[0] > 0 and puck_vel[0] > 0:
+ return True
+ if np.linalg.norm(puck_vel[:2]) < 0.1:
+ return True
+ return super().is_absorbing(state)
+
+
+if __name__ == '__main__':
+ env = AirHockeyDefend()
+
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ env.reset()
+ while True:
+ action = np.zeros(3)
+ observation, reward, done, info = env.step(action)
+ env.render()
+ gamma *= env.info.gamma
+ J += gamma * reward
+ R += reward
+ steps += 1
+
+ if done or steps > env.info.horizon:
+ print("J: ", J, " R: ", R)
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ env.reset()
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
new file mode 100644
index 0000000..d9747f3
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
@@ -0,0 +1,224 @@
+import os
+
+import mujoco
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path
+from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
+from mushroom_rl.utils.spaces import Box
+
+
+class AirHockeyBase(MuJoCo):
+ """
+ Abstract class for all AirHockey Environments.
+
+ """
+
+ def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
+ n_agents=1, viewer_params={}):
+ """
+ Constructor.
+
+ Args:
+ n_agents (int, 1): number of agent to be used in the environment (one or two)
+ """
+
+ self.n_agents = n_agents
+
+ action_spec = []
+
+ observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+ ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+ ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+ ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+ ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+ ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+ additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+ ("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+ ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+ ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+ ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+ ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
+
+ collision_spec = [("puck", ["puck"]),
+ ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
+ ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
+
+ if 1 <= self.n_agents <= 2:
+ scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
+
+ action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"]
+
+ observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS),
+ ("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS),
+ ("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS),
+ ("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL),
+ ("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL),
+ ("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)]
+
+ additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS),
+ ("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)]
+
+ collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])]
+
+ if self.n_agents == 2:
+ scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
+
+ observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)]
+
+ action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"]
+ # Add puck pos/vel again to transform into second agents frame
+ observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
+ ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
+ ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
+ ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
+ ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
+ ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
+ ("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS),
+ ("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS),
+ ("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS),
+ ("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL),
+ ("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL),
+ ("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)]
+
+ observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)]
+
+ additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS),
+ ("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)]
+
+ collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])]
+ else:
+ raise ValueError('n_agents should be 1 or 2')
+
+ self.env_info = dict()
+ self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
+ self.env_info['puck'] = {"radius": 0.03165}
+ self.env_info['mallet'] = {"radius": 0.04815}
+ self.env_info['n_agents'] = self.n_agents
+ self.env_info['robot'] = {
+ "n_joints": 3,
+ "ee_desired_height": 0.1,
+ "joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3],
+ [np.pi / 2, np.pi / 2, np.pi * 2 / 3]]),
+
+ "joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi],
+ [2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]),
+ "base_frame": [],
+ "control_frequency": 50,
+ }
+
+ self.env_info['puck_pos_ids'] = [0, 1, 2]
+ self.env_info['puck_vel_ids'] = [3, 4, 5]
+ self.env_info['joint_pos_ids'] = [6, 7, 8]
+ self.env_info['joint_vel_ids'] = [9, 10, 11]
+ if self.n_agents == 2:
+ self.env_info['opponent_ee_ids'] = [13, 14, 15]
+ else:
+ self.env_info['opponent_ee_ids'] = []
+
+ max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents
+
+ super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
+ n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
+
+ # Construct the mujoco model at origin
+ robot_model = mujoco.MjModel.from_xml_path(
+ os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml"))
+ robot_model.body('planar_robot_1/base').pos = np.zeros(3)
+ robot_data = mujoco.MjData(robot_model)
+
+ # Add env_info that requires mujoco models
+ self.env_info['dt'] = self.dt
+ self.env_info["robot"]["joint_pos_limit"] = np.array(
+ [self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T
+ self.env_info["robot"]["robot_model"] = robot_model
+ self.env_info["robot"]["robot_data"] = robot_data
+ self.env_info["rl_info"] = self.info
+
+ frame_T = np.eye(4)
+ temp = np.zeros((9, 1))
+ mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat)
+ frame_T[:3, :3] = temp.reshape(3, 3)
+ frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos
+ self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+ if self.n_agents == 2:
+ mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat)
+ frame_T[:3, :3] = temp.reshape(3, 3)
+ frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos
+ self.env_info['robot']['base_frame'].append(frame_T.copy())
+
+ # Ids of the joint, which are controller by the action space
+ self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
+
+ def _modify_mdp_info(self, mdp_info):
+ obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
+ *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0]
+ for i in range(self.env_info['robot']['n_joints'])]),
+ *self.env_info['robot']['joint_vel_limit'][0]])
+ obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
+ *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1]
+ for i in range(self.env_info['robot']['n_joints'])]),
+ *self.env_info['robot']['joint_vel_limit'][1]])
+ if self.n_agents == 2:
+ obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
+ obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
+ mdp_info.observation_space = Box(obs_low, obs_high)
+ return mdp_info
+
+ def is_absorbing(self, obs):
+ boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
+ puck_pos, puck_vel = self.get_puck(obs)
+
+ if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
+ return True
+ return False
+
+ @staticmethod
+ def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
+ if type == 'pose':
+ puck_w = np.eye(4)
+ puck_w[:2, 3] = puck_in[:2]
+ puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
+
+ puck_r = np.linalg.inv(robot_frame) @ puck_w
+ puck_out = np.concatenate([puck_r[:2, 3],
+ R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
+
+ if type == 'vel':
+ rot_mat = robot_frame[:3, :3]
+
+ vel_lin = np.array([*puck_in[:2], 0])
+ vel_ang = np.array([0., 0., puck_in[2]])
+
+ vel_lin_r = rot_mat.T @ vel_lin
+ vel_ang_r = rot_mat.T @ vel_ang
+
+ puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
+ return puck_out
+
+ def get_puck(self, obs):
+ """
+ Getting the puck properties from the observations
+ Args:
+ obs: The current observation
+
+ Returns:
+ ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
+
+ """
+ puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
+ self.obs_helper.get_from_obs(obs, "puck_y_pos"),
+ self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
+ puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
+ self.obs_helper.get_from_obs(obs, "puck_y_vel"),
+ self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
+ return puck_pos, puck_vel
+
+ def get_ee(self):
+ raise NotImplementedError
+
+ def get_joints(self, obs):
+ raise NotImplementedError
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
new file mode 100644
index 0000000..4dfa117
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
@@ -0,0 +1,91 @@
+import mujoco
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase
+
+
+class AirHockeySingle(AirHockeyBase):
+ """
+ Base class for single agent air hockey tasks.
+
+ """
+
+ def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
+
+ """
+ Constructor.
+
+ """
+ self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414])
+ super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
+
+ self.filter_ratio = 0.274
+ self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
+ self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
+
+ def get_ee(self):
+ """
+ Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
+ on the current joint position, this function exists to avoid redundant computations.
+
+ Returns:
+ ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
+ """
+ ee_pos = self._read_data("robot_1/ee_pos")
+
+ ee_vel = self._read_data("robot_1/ee_vel")
+
+ return ee_pos, ee_vel
+
+ def get_joints(self, obs):
+ """
+ Get joint position and velocity of the robot
+ """
+ q_pos = np.zeros(3)
+ q_vel = np.zeros(3)
+ for i in range(3):
+ q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
+ q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
+
+ return q_pos, q_vel
+
+ def _modify_observation(self, obs):
+ new_obs = obs.copy()
+ puck_pos, puck_vel = self.get_puck(obs)
+
+ puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
+
+ puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
+
+ self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
+ self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
+ self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
+
+ self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
+ self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
+ self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
+
+ return new_obs
+
+ def setup(self, state=None):
+ for i in range(3):
+ self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i]
+ self.q_pos_prev[i] = self.init_state[i]
+ self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0]
+
+ mujoco.mj_fwdPosition(self._model, self._data)
+ super().setup(state)
+
+ def _create_observation(self, obs):
+ # Filter the joint velocity
+ q_pos, q_vel = self.get_joints(obs)
+ q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
+ self.q_pos_prev = q_pos
+ self.q_vel_prev = q_vel_filter
+
+ for i in range(3):
+ self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
+
+ yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
+ self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
+ return obs
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
new file mode 100644
index 0000000..6124fba
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
@@ -0,0 +1,84 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
+
+class AirHockeyHit(AirHockeySingle):
+ """
+ Class for the air hockey hitting task.
+ """
+
+ def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}):
+ """
+ Constructor
+ Args:
+ moving_init(bool, False): If true, initialize the puck with inital velocity.
+ """
+ super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
+
+ self.moving_init = moving_init
+ hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
+ self.env_info['mallet']['radius'] * 2
+ self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
+ self.init_velocity_range = (0, 0.5) # Table Frame
+ self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
+
+ def setup(self, state=None):
+ # Initial position of the puck
+ puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
+
+ # self.init_state = np.array([-0.9273, 0.9273, np.pi / 2])
+
+ self._write_data("puck_x_pos", puck_pos[0])
+ self._write_data("puck_y_pos", puck_pos[1])
+
+ if self.moving_init:
+ lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
+ angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
+ puck_vel = np.zeros(3)
+ puck_vel[0] = -np.cos(angle) * lin_vel
+ puck_vel[1] = np.sin(angle) * lin_vel
+ puck_vel[2] = np.random.uniform(-2, 2, 1)
+
+ self._write_data("puck_x_vel", puck_vel[0])
+ self._write_data("puck_y_vel", puck_vel[1])
+ self._write_data("puck_yaw_vel", puck_vel[2])
+
+ super(AirHockeyHit, self).setup(state)
+
+ def reward(self, state, action, next_state, absorbing):
+ return 0
+
+ def is_absorbing(self, obs):
+ puck_pos, puck_vel = self.get_puck(obs)
+ # Stop if the puck bounces back on the opponents wall
+ if puck_pos[0] > 0 and puck_vel[0] < 0:
+ return True
+ return super(AirHockeyHit, self).is_absorbing(obs)
+
+
+if __name__ == '__main__':
+ env = AirHockeyHit(moving_init=False)
+
+ env.reset()
+ env.render()
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ while True:
+ action = np.zeros(3)
+
+ observation, reward, done, info = env.step(action)
+ env.render()
+
+ gamma *= env.info.gamma
+ J += gamma * reward
+ R += reward
+ steps += 1
+ if done or steps > env.info.horizon:
+ print("J: ", J, " R: ", R)
+ R = 0.
+ J = 0.
+ gamma = 1.
+ steps = 0
+ env.reset()
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
new file mode 100644
index 0000000..64c297a
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
@@ -0,0 +1,2 @@
+from .kinematics import inverse_kinematics, forward_kinematics, jacobian
+from .transformations import robot_to_world, world_to_robot
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
new file mode 100644
index 0000000..2156d48
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
@@ -0,0 +1,248 @@
+import mujoco
+import numpy as np
+
+
+def forward_kinematics(mj_model, mj_data, q, link="ee"):
+ """
+ Compute the forward kinematics of the robots.
+
+ IMPORTANT:
+ For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+ parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+ where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
+ case this function will return the wrong values.
+
+ Coordinate System:
+ All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+ base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+ coordinate system
+
+ Args:
+ mj_model (mujoco.MjModel):
+ mujoco MjModel of the robot-only model
+ mj_data (mujoco.MjData):
+ mujoco MjData object generated from the model
+ q (np.array):
+ joint configuration for which the forward kinematics are computed
+ link (string, "ee"):
+ Link for which the forward kinematics is calculated. When using the iiwas the choices are
+ ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+
+ Returns
+ -------
+ position: numpy.ndarray, (3,)
+ Position of the link in robot's base frame
+ orientation: numpy.ndarray, (3, 3)
+ Orientation of the link in robot's base frame
+ """
+
+ return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
+
+
+def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"):
+ """
+ Compute the inverse kinematics of the robots.
+
+ IMPORTANT:
+ For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+ parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+ where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is
+ the case this function will return the wrong values.
+
+ Coordinate System:
+ All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+ base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+ coordinate system
+
+ Args:
+ mj_model (mujoco.MjModel):
+ mujoco MjModel of the robot-only model
+ mj_data (mujoco.MjData):
+ mujoco MjData object generated from the model
+ desired_position (numpy.ndarray, (3,)):
+ The desired position of the selected link.
+ desired_rotation (optional, numpy.array, (3,3)):
+ The desired rotation of the selected link.
+ initial_q (numpy.ndarray, None):
+ The initial configuration of the algorithm, if set to None it will take the initial configuration of the
+ mj_data.
+ link (str, "ee"):
+ Link for which the inverse kinematics is calculated. When using the iiwas the choices are
+ ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+ """
+ q_init = np.zeros(mj_model.nq)
+ if initial_q is None:
+ q_init = mj_data.qpos
+ else:
+ q_init[:initial_q.size] = initial_q
+
+ q_l = mj_model.jnt_range[:, 0]
+ q_h = mj_model.jnt_range[:, 1]
+ lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2
+ upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2
+
+ desired_quat = None
+ if desired_rotation is not None:
+ desired_quat = np.zeros(4)
+ mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1))
+
+ return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model,
+ mj_data, lower_limit, upper_limit)
+
+
+def jacobian(mj_model, mj_data, q, link="ee"):
+ """
+ Compute the Jacobian of the robots.
+
+ IMPORTANT:
+ For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
+ parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
+ where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
+ case this function will return the wrong values.
+
+ Coordinate System:
+ All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
+ base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
+ coordinate system
+
+ Args:
+ mj_model (mujoco.MjModel):
+ mujoco MjModel of the robot-only model
+ mj_data (mujoco.MjData):
+ mujoco MjData object generated from the model
+ q (numpy.ndarray):
+ joint configuration for which the forward kinematics are computed
+ link (string, "ee"):
+ Link for which the forward kinematics is calculated. When using the iiwas the choices are
+ ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
+
+ Returns
+ -------
+ numpy.ndarray, (6, num_joints):
+ The Jacobian matrix for the robot kinematics.
+ """
+ return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
+
+
+def link_to_xml_name(mj_model, link):
+ try:
+ mj_model.body('iiwa_1/base')
+ link_to_frame_idx = {
+ "1": "iiwa_1/link_1",
+ "2": "iiwa_1/link_2",
+ "3": "iiwa_1/link_3",
+ "4": "iiwa_1/link_4",
+ "5": "iiwa_1/link_5",
+ "6": "iiwa_1/link_6",
+ "7": "iiwa_1/link_7",
+ "ee": "iiwa_1/striker_joint_link",
+ }
+ except:
+ link_to_frame_idx = {
+ "1": "planar_robot_1/body_1",
+ "2": "planar_robot_1/body_2",
+ "3": "planar_robot_1/body_3",
+ "ee": "planar_robot_1/body_ee",
+ }
+ return link_to_frame_idx[link]
+
+
+def _mujoco_fk(q, name, model, data):
+ data.qpos[:len(q)] = q
+ mujoco.mj_fwdPosition(model, data)
+ return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy()
+
+
+def _mujoco_jac(q, name, model, data):
+ data.qpos[:len(q)] = q
+ dtype = data.qpos.dtype
+ jac = np.empty((6, model.nv), dtype=dtype)
+ jac_pos, jac_rot = jac[:3], jac[3:]
+ mujoco.mj_fwdPosition(model, data)
+ mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
+ return jac
+
+
+def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit):
+ IT_MAX = 1000
+ eps = 1e-4
+ damp = 1e-3
+ progress_thresh = 20.0
+ max_update_norm = 0.1
+ rot_weight = 1
+ i = 0
+
+ dtype = data.qpos.dtype
+
+ data.qpos = initial_q
+
+ neg_x_quat = np.empty(4, dtype=dtype)
+ error_x_quat = np.empty(4, dtype=dtype)
+
+ if desired_pos is not None and desired_quat is not None:
+ jac = np.empty((6, model.nv), dtype=dtype)
+ err = np.empty(6, dtype=dtype)
+ jac_pos, jac_rot = jac[:3], jac[3:]
+ err_pos, err_rot = err[:3], err[3:]
+ else:
+ jac = np.empty((3, model.nv), dtype=dtype)
+ err = np.empty(3, dtype=dtype)
+ if desired_pos is not None:
+ jac_pos, jac_rot = jac, None
+ err_pos, err_rot = err, None
+ elif desired_quat is not None:
+ jac_pos, jac_rot = None, jac
+ err_pos, err_rot = None, err
+ else:
+ raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics")
+
+ while True:
+ # forward kinematics
+ mujoco.mj_fwdPosition(model, data)
+
+ x_pos = data.body(name).xpos
+ x_quat = data.body(name).xquat
+
+ error_norm = 0
+ if desired_pos is not None:
+ err_pos[:] = desired_pos - x_pos
+ error_norm += np.linalg.norm(err_pos)
+
+ if desired_quat is not None:
+ mujoco.mju_negQuat(neg_x_quat, x_quat)
+ mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat)
+ mujoco.mju_quat2Vel(err_rot, error_x_quat, 1)
+ error_norm += np.linalg.norm(err_rot) * rot_weight
+
+ if error_norm < eps:
+ success = True
+ break
+ if i >= IT_MAX:
+ success = False
+ break
+
+ mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
+
+ hess_approx = jac.T.dot(jac)
+ joint_delta = jac.T.dot(err)
+
+ hess_approx += np.eye(hess_approx.shape[0]) * damp
+ update_joints = np.linalg.solve(hess_approx, joint_delta)
+
+ update_norm = np.linalg.norm(update_joints)
+
+ # Check whether we are still making enough progress, and halt if not.
+ progress_criterion = error_norm / update_norm
+ if progress_criterion > progress_thresh:
+ success = False
+ break
+
+ if update_norm > max_update_norm:
+ update_joints *= max_update_norm / update_norm
+
+ mujoco.mj_integratePos(model, data.qpos, update_joints, 1)
+ data.qpos = np.clip(data.qpos, lower_limit, upper_limit)
+ i += 1
+ q_cur = data.qpos.copy()
+
+ return success, q_cur
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
new file mode 100644
index 0000000..6a6ee84
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
@@ -0,0 +1,65 @@
+import numpy as np
+
+
+def robot_to_world(base_frame, translation, rotation=None):
+ """
+ Transform position or rotation optional from the robot base frame to the world frame
+
+ Args
+ ----
+ base_frame: numpy.ndarray, (4,4)
+ The transformation matrix from the world to robot base frame
+ translation: ndarray, (3,)
+ The 3D position to be transformed
+ rotation: optional, ndarray, (3, 3)
+ The rotation in the matrix form to be transformed
+
+ Returns
+ -------
+ position: ndarray (3,)
+ The transformed 3D position
+ rotation: ndarray (3, 3)
+ The transformed rotation in the matrix form
+
+ """
+
+ target = np.eye(4)
+ target[:len(translation), 3] = translation
+ if rotation is not None:
+ target[:3, :3] = rotation
+
+ target_frame = base_frame @ target
+
+ return target_frame[:len(translation), 3], target_frame[:3, :3]
+
+
+def world_to_robot(base_frame, translation, rotation=None):
+ """
+ Transfrom position and rotation (optional) from the world frame to the robot's base frame
+
+ Args
+ ----
+ base_frame: ndarray, (4,4)
+ The transformation matrix from the world to robot base frame
+ translation: ndarray, (3,)
+ The 3D position to be transformed
+ rotation: optional, ndarray, (3, 3)
+ The rotation in the matrix form to be tranformed
+
+ Returns
+ -------
+ position: ndarray, (3,)
+ The transformed 3D position
+ rotation: ndarray, (3, 3)
+ The transformed rotation in the matrix form
+
+ """
+
+ target = np.eye(4)
+ target[:len(translation), 3] = translation
+ if rotation is not None:
+ target[:3, :3] = rotation
+
+ target_frame = np.linalg.inv(base_frame) @ target
+
+ return target_frame[:len(translation), 3], target_frame[:3, :3]
diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
new file mode 100644
index 0000000..20b43ac
--- /dev/null
+++ b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
@@ -0,0 +1,120 @@
+import numpy as np
+
+from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics
+
+
+class UniversalJointPlugin:
+ def __init__(self, env_model, env_data, env_info):
+ self.env_info = env_info
+ self.env_model = env_model
+ self.env_data = env_data
+ self.Kp = 20
+ self.Kd = 0.31
+
+ self.universal_joint_ids = []
+ self.universal_joint_ctrl_ids = []
+ self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id,
+ env_model.joint("iiwa_1/striker_joint_2").id]
+ self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id,
+ env_model.actuator("iiwa_1/striker_joint_2").id]
+ action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
+ "iiwa_1/joint_6", "iiwa_1/joint_7"]
+
+ if self.env_info['n_agents'] >= 2:
+ self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id,
+ env_model.joint("iiwa_2/striker_joint_2").id]
+ self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id,
+ env_model.actuator("iiwa_2/striker_joint_2").id]
+ action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5",
+ "iiwa_2/joint_6", "iiwa_2/joint_7"]
+
+ self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec]
+
+ self.filter_ratio = 0.273
+ self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents'])
+ self.u_joint_pos_prev = None
+ self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents'])
+
+ def reset(self):
+ self.u_joint_pos_prev = None
+ self._control_universal_joint()
+ for i in range(self.env_info['n_agents']):
+ self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids]
+
+ self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des
+
+ def update(self):
+ self._control_universal_joint()
+
+ def _control_universal_joint(self):
+ self._compute_universal_joint()
+ self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids]
+ self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \
+ (1 - self.filter_ratio) * self.u_joint_vel_prev
+
+ Kp = 4
+ Kd = 0.31
+ torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev
+ self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque
+
+ def _compute_universal_joint(self):
+ for i in range(self.env_info['n_agents']):
+ q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]]
+ # Have to exclude the puck joints
+ pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'],
+ self.env_info['robot']['robot_data'], q)
+
+ v_x = rot_mat[:, 0]
+ v_y = rot_mat[:, 1]
+
+ # The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T
+ # and the current y-axis. (0, 0, 1).T x v_y
+ x_desired = np.array([-v_y[1], v_y[0], 0])
+
+ # Find the signed angle from the current to the desired x-axis around the y-axis
+ # https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane
+ q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired)
+ if self.u_joint_pos_prev is not None:
+ if q1 - self.u_joint_pos_prev[0] > np.pi:
+ q1 -= np.pi * 2
+ elif q1 - self.u_joint_pos_prev[0] < -np.pi:
+ q1 += np.pi * 2
+
+ # Rotate the X axis by the calculated amount
+ w = np.array([[0, -v_y[2], v_y[1]],
+ [v_y[2], 0, -v_y[0]],
+ [-v_y[1], v_y[0], 0]])
+
+ r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1))
+ v_x_rotated = r @ v_x
+
+ # The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current
+ # x-axis, which is already rotated around y. The negative is there because the x-axis is flipped.
+ # -((0, 0, 1).T x v_x))
+ y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0])
+
+ # Find the signed angle from the current to the desired y-axis around the new rotated x-axis
+ q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired)
+
+ if self.u_joint_pos_prev is not None:
+ if q2 - self.u_joint_pos_prev[1] > np.pi:
+ q2 -= np.pi * 2
+ elif q2 - self.u_joint_pos_prev[1] < -np.pi:
+ q2 += np.pi * 2
+
+ alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
+ alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
+
+ if self.u_joint_pos_prev is None:
+ self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
+ else:
+ self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum(
+ 10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]),
+ -np.pi * 0.01), np.pi * 0.01)
+
+ self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
+
+ return self.u_joint_pos_des
+
+ def _cross_3d(self, a, b):
+ return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]])
diff --git a/setup.py b/setup.py
index 1daa568..53ee8f1 100644
--- a/setup.py
+++ b/setup.py
@@ -7,11 +7,12 @@ from setuptools import setup, find_packages
# Environment-specific dependencies for dmc and metaworld
extras = {
'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'],
- 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'],
+ 'metaworld': ['mujoco==2.3.3', 'metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'],
'box2d': ['gymnasium[box2d]>=0.26.0'],
'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'],
'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'],
'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"],
+ 'mushroom-rl': ['mushroom-rl'],
}
# All dependencies
diff --git a/test/test_black_box.py b/test/test_black_box.py
index 8cdc543..c1a760a 100644
--- a/test/test_black_box.py
+++ b/test/test_black_box.py
@@ -41,7 +41,7 @@ class ToyEnv(gym.Env):
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
return obs, reward, terminated, truncated, info
- def render(self, mode="human"):
+ def render(self):
pass
diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py
index aad076b..849b36f 100644
--- a/test/test_fancy_registry.py
+++ b/test/test_fancy_registry.py
@@ -33,7 +33,7 @@ class ToyEnv(gym.Env):
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
return obs, reward, terminated, truncated, info
- def render(self, mode="human"):
+ def render(self):
pass
diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py
index c2edf42..db463c8 100644
--- a/test/test_replanning_sequencing.py
+++ b/test/test_replanning_sequencing.py
@@ -37,7 +37,7 @@ class ToyEnv(gym.Env):
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
return obs, reward, terminated, truncated, info
- def render(self, mode="human"):
+ def render(self):
pass