diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index a40c81f..8c0fa23 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -289,3 +289,25 @@ register( 'goal_switching_step': 99 } ) + +# Air Hockey environments +for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]: + register( + id=f'fancy/AirHockey-{env_mode}-v0', + entry_point='fancy_gym.envs.mujoco:AirHockeyEnv', + max_episode_steps=500, + add_mp_types=[], + kwargs={ + 'env_mode': env_mode + } + ) + +register( + id=f'fancy/AirHockey-tournament-v0', + entry_point='fancy_gym.envs.mujoco:AirHockeyEnv', + max_episode_steps=15000, + add_mp_types=[], + kwargs={ + 'env_mode': 'tournament' + } +) diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py index 4e5caaf..70b9bfd 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py +++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py @@ -178,9 +178,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): return False - def render(self, mode=None): - if mode==None: - mode = self.render_mode + def render(self): if self.fig is None: # Create base figure once on the beginning. Afterwards only update plt.ion() @@ -199,7 +197,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.fig.gca().set_title( f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") - if mode == "human": + if self.render_mode == "human": # arm self.line.set_data(self._joints[:, 0], self._joints[:, 1]) @@ -207,7 +205,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.fig.canvas.draw() self.fig.canvas.flush_events() - elif mode == "partial": + elif self.render_mode == "partial": if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: # Arm plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index 40a8153..9264b39 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -98,9 +98,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): def _check_collisions(self) -> bool: return self._check_self_collision() - def render(self, mode=None): # pragma: no cover - if mode==None: - mode = self.render_mode + def render(self): # pragma: no cover if self.fig is None: # Create base figure once on the beginning. Afterwards only update plt.ion() diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py index 932f50a..4003319 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -123,9 +123,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): def _check_collisions(self) -> bool: return self._check_self_collision() - def render(self, mode=None): - if mode==None: - mode = self.render_mode + def render(self): goal_pos = self._goal.T via_pos = self._via_point.T @@ -148,7 +146,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}") - if mode == "human": + if self.render_mode == "human": # goal if self._steps == 1: self.goal_point_plot.set_data(goal_pos[0], goal_pos[1]) @@ -160,7 +158,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self.fig.canvas.draw() self.fig.canvas.flush_events() - elif mode == "partial": + elif self.render_mode == "partial": if self._steps == 1: # fig, ax = plt.subplots() # Add the patch to the Axes @@ -180,7 +178,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): plt.ylim([-1.1, lim]) plt.pause(0.01) - elif mode == "final": + elif self.render_mode == "final": if self._steps == 199 or self._is_collided: # fig, ax = plt.subplots() diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py index ff51711..4afdcce 100644 --- a/fancy_gym/envs/mujoco/__init__.py +++ b/fancy_gym/envs/mujoco/__init__.py @@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching + +try: + from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv +except ModuleNotFoundError: + print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)") \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/LICENSE b/fancy_gym/envs/mujoco/air_hockey/LICENSE new file mode 100644 index 0000000..27c6bbb --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/LICENSE @@ -0,0 +1,26 @@ +MIT License + +Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--- + +This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge). +The changes are mostly focused on adapting the provided environments to fancy_gym. diff --git a/fancy_gym/envs/mujoco/air_hockey/__init__.py b/fancy_gym/envs/mujoco/air_hockey/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py new file mode 100644 index 0000000..0e8e31e --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py @@ -0,0 +1,180 @@ +from copy import deepcopy +import numpy as np +from gymnasium import spaces + +import fancy_gym.envs.mujoco.air_hockey.constraints as constraints +from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position +from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world +from mushroom_rl.core import Environment + +class AirHockeyEnv(Environment): + metadata = {"render_modes": ["human"], "render_fps": 50} + + def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs): + """ + Environment Constructor + + Args: + env [string]: + The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament]. + interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are + polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity. + Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory + of position, velocity and acceleration of the shape (20, 3, n_joints) + """ + + env_dict = { + "tournament": position.IiwaPositionTournament, + + "7dof-hit": position.IiwaPositionHit, + "7dof-defend": position.IiwaPositionDefend, + + "3dof-hit": position.PlanarPositionHit, + "3dof-defend": position.PlanarPositionDefend + } + + if env_mode not in env_dict: + raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!") + + if env_mode == "tournament" and type(interpolation_order) != tuple: + interpolation_order = (interpolation_order, interpolation_order) + + self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs) + self.env_name = env_mode + self.env_info = self.base_env.env_info + single_robot_obs_size = len(self.base_env.info.observation_space.low) + if env_mode == "tournament": + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64) + else: + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64) + robot_info = self.env_info["robot"] + + if env_mode != "tournament": + if interpolation_order in [1, 2]: + self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) + if interpolation_order in [3, 4, -1]: + self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) + if interpolation_order in [5]: + self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) + else: + acts = [None, None] + for i in range(2): + if interpolation_order[i] in [1, 2]: + acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) + if interpolation_order[i] in [3, 4, -1]: + acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) + if interpolation_order[i] in [5]: + acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) + self.action_space = spaces.Tuple((acts[0], acts[1])) + + constraint_list = constraints.ConstraintList() + constraint_list.add(constraints.JointPositionConstraint(self.env_info)) + constraint_list.add(constraints.JointVelocityConstraint(self.env_info)) + constraint_list.add(constraints.EndEffectorConstraint(self.env_info)) + if "7dof" in self.env_name or self.env_name == "tournament": + constraint_list.add(constraints.LinkConstraint(self.env_info)) + + self.env_info['constraints'] = constraint_list + self.env_info['env_name'] = self.env_name + + self.render_mode = render_mode + self.render_human_active = False + + super().__init__(self.base_env.info) + + def step(self, action): + obs, reward, done, info = self.base_env.step(action) + + if "tournament" in self.env_name: + info["constraints_value"] = list() + info["jerk"] = list() + for i in range(2): + obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)] + info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun( + obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']]))) + info["jerk"].append( + self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][ + 'n_joints']]) + + info["score"] = self.base_env.score + info["faults"] = self.base_env.faults + + else: + info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']], + obs[self.env_info['joint_vel_ids']])) + info["jerk"] = self.base_env.jerk + info["success"] = self.check_success(obs) + + if self.env_info['env_name'] == "tournament": + obs = np.array(np.split(obs, 2)) + + if self.render_human_active: + self.base_env.render() + + return obs, reward, done, False, info + + def render(self): + self.render_human_active = True + + def reset(self, seed=None, options={}): + self.base_env.seed(seed) + obs = self.base_env.reset() + if self.env_info['env_name'] == "tournament": + obs = np.array(np.split(obs, 2)) + return obs, {} + + def check_success(self, obs): + puck_pos, puck_vel = self.base_env.get_puck(obs) + + puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos) + success = 0 + + if "hit" in self.env_name: + if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \ + np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0: + success = 1 + + elif "defend" in self.env_name: + if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1: + success = 1 + + elif "prepare" in self.env_name: + if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1: + success = 1 + return success + + @property + def unwrapped(self): + return self + + def close(self): + self.base_env.stop() + + +if __name__ == "__main__": + env = AirHockeyEnv(env_mode="7dof-hit") + env.reset() + + R = 0. + J = 0. + gamma = 1. + steps = 0 + while True: + action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3 + observation, reward, done, info = env.step(action) + env.render() + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py new file mode 100644 index 0000000..4486dce --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py @@ -0,0 +1 @@ +from .constraints import * diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py new file mode 100644 index 0000000..399b35b --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py @@ -0,0 +1,212 @@ +import copy + +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian + + +class Constraint: + def __init__(self, env_info, output_dim, **kwargs): + """ + Constructor + + Args + ---- + env_info: dict + A dictionary contains information about the environment; + output_dim: int + The output dimension of the constraints. + **kwargs: dict + A dictionary contains agent related information. + """ + self._env_info = env_info + self._name = None + + self.output_dim = output_dim + + self._fun_value = np.zeros(self.output_dim) + self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"])) + self._q_prev = None + self._dq_prev = None + + @property + def name(self): + """ + The name of the constraints + + """ + return self._name + + def fun(self, q, dq): + """ + The function of the constraint. + + Args + ---- + q: numpy.ndarray, (num_joints,) + The joint position of the robot + dq: numpy.ndarray, (num_joints,) + The joint velocity of the robot + + Returns + ------- + numpy.ndarray, (out_dim,): + The value computed by the constraints function. + """ + if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev): + return self._fun_value + else: + self._jacobian(q, dq) + return self._fun(q, dq) + + def jacobian(self, q, dq): + """ + Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity. + + Args + ---- + q: ndarray, (num_joints,) + The joint position of the robot + dq: ndarray, (num_joints,) + The joint velocity of the robot + + Returns + ------- + numpy.ndarray, (dim_output, num_joints * 2): + The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot] + + """ + if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev): + return self._fun_value + else: + self._fun(q, dq) + return self._jacobian(q, dq) + + def _fun(self, q, dq): + raise NotImplementedError + + def _jacobian(self, q, dq): + raise NotImplementedError + + +class ConstraintList: + def __init__(self): + self.constraints = dict() + + def keys(self): + return self.constraints.keys() + + def get(self, key): + return self.constraints.get(key) + + def add(self, c): + self.constraints.update({c.name: c}) + + def delete(self, name): + del self.constraints[name] + + def fun(self, q, dq): + return {key: self.constraints[key].fun(q, dq) for key in self.constraints} + + def jacobian(self, q, dq): + return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints} + + +class JointPositionConstraint(Constraint): + def __init__(self, env_info, **kwargs): + super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs) + self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95 + self._name = 'joint_pos_constr' + + def _fun(self, q, dq): + self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1] + self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q + return self._fun_value + + def _jacobian(self, q, dq): + self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye( + self._env_info['robot']['n_joints']) + self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye( + self._env_info['robot']['n_joints']) + return self._jac_value + + +class JointVelocityConstraint(Constraint): + def __init__(self, env_info, **kwargs): + super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs) + self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95 + self._name = 'joint_vel_constr' + + def _fun(self, q, dq): + self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1] + self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq + return self._fun_value + + def _jacobian(self, q, dq): + self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye( + self._env_info['robot']['n_joints']) + self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye( + self._env_info['robot']['n_joints']) + return self._jac_value + + +class EndEffectorConstraint(Constraint): + def __init__(self, env_info, **kwargs): + # 1 Dimension on x direction: x > x_lb + # 2 Dimension on y direction: y > y_lb, y < y_ub + # 2 Dimension on z direction: z > z_lb, z < z_ub + super().__init__(env_info, output_dim=5, **kwargs) + self._name = "ee_constr" + tolerance = 0.02 + + self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model']) + self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data']) + + self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - ( + self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius']) + self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius']) + self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius']) + self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance + self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance + + def _fun(self, q, dq): + ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q) + self._fun_value = np.array([-ee_pos[0] + self.x_lb, + -ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub, + -ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub]) + return self._fun_value + + def _jacobian(self, q, dq): + jac = jacobian(self.robot_model, self.robot_data, q) + dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]]) + self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']] + return self._jac_value + + +class LinkConstraint(Constraint): + def __init__(self, env_info, **kwargs): + # 1 Dimension: wrist_z > minimum_height + # 2 Dimension: elbow_z > minimum_height + super().__init__(env_info, output_dim=2, **kwargs) + self._name = "link_constr" + + self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model']) + self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data']) + + self.z_lb = 0.25 + + def _fun(self, q, dq): + wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7") + elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4") + self._fun_value = np.array([-wrist_pos[2] + self.z_lb, + -elbow_pos[2] + self.z_lb]) + return self._fun_value + + def _jacobian(self, q, dq): + jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7") + jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4") + self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([ + -jac_wrist[2, :self._env_info['robot']['n_joints']], + -jac_elbow[2, :self._env_info['robot']['n_joints']], + ]) + return self._jac_value diff --git a/fancy_gym/envs/mujoco/air_hockey/data/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl new file mode 100644 index 0000000..b2dd80e Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl new file mode 100644 index 0000000..28ad13f Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl new file mode 100644 index 0000000..e057ea1 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl new file mode 100644 index 0000000..4eaa051 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl new file mode 100644 index 0000000..2eb3709 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl new file mode 100644 index 0000000..2892530 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl new file mode 100644 index 0000000..dda08fe Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl new file mode 100644 index 0000000..6095451 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl new file mode 100644 index 0000000..6224ddb Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl new file mode 100644 index 0000000..359cb48 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml new file mode 100644 index 0000000..12f4b3d --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml new file mode 100644 index 0000000..044aae5 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml new file mode 100644 index 0000000..450ef8f --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml new file mode 100644 index 0000000..6d9ca5f --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl new file mode 100644 index 0000000..e057ea1 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl new file mode 100644 index 0000000..2deabe0 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl new file mode 100644 index 0000000..3187389 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl new file mode 100644 index 0000000..6b172ff Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl new file mode 100644 index 0000000..f1d761f Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl new file mode 100644 index 0000000..f2d1ec7 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl new file mode 100644 index 0000000..df26d8f Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl new file mode 100644 index 0000000..5e0eba9 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl new file mode 100644 index 0000000..ea2d9d2 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl new file mode 100644 index 0000000..062ceb7 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl new file mode 100644 index 0000000..12af33a Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl new file mode 100644 index 0000000..a9f30b7 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl new file mode 100644 index 0000000..b2dd80e Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl new file mode 100644 index 0000000..7595fb0 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl new file mode 100644 index 0000000..c03f7b2 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl new file mode 100644 index 0000000..e057ea1 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl new file mode 100644 index 0000000..4eaa051 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl new file mode 100644 index 0000000..2eb3709 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl new file mode 100644 index 0000000..2892530 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl new file mode 100644 index 0000000..dda08fe Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl new file mode 100644 index 0000000..6095451 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl new file mode 100644 index 0000000..6224ddb Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl new file mode 100644 index 0000000..359cb48 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl new file mode 100644 index 0000000..3d1760b Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml new file mode 100644 index 0000000..9eb57a8 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml new file mode 100644 index 0000000..73fdfb4 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml new file mode 100644 index 0000000..a8d057c --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml new file mode 100644 index 0000000..c14d75a --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml new file mode 100644 index 0000000..1f63668 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/table.xml b/fancy_gym/envs/mujoco/air_hockey/data/table.xml new file mode 100644 index 0000000..578af15 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/table.xml @@ -0,0 +1,89 @@ + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py new file mode 100644 index 0000000..91e38e1 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py @@ -0,0 +1,270 @@ +from collections import deque + +import mujoco +import numpy as np +import scipy + +from fancy_gym.envs.mujoco.air_hockey import seven_dof +from fancy_gym.envs.mujoco.air_hockey import three_dof + + +class PositionControl: + def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs): + """ + Mixin that adds position controller to mujoco environments. + + Args: + p_gain (float): Proportional controller gain + d_gain (float): Differential controller gain + i_gain (float): Integral controller gain + interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are + polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity. + Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory + of position, velocity and acceleration of the shape (20, 3, n_joints) + In the case of 2 agents it is a tuple, which describes the interpolation order for each agent + debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the + entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk. + """ + + self.debug = debug + + super(PositionControl, self).__init__(*args, **kwargs) + + self.robot_model = self.env_info['robot']['robot_model'] + self.robot_data = self.env_info['robot']['robot_data'] + + self.p_gain = np.array(p_gain * self.n_agents) + self.d_gain = np.array(d_gain * self.n_agents) + self.i_gain = np.array(i_gain * self.n_agents) + + self.prev_pos = np.zeros(len(self.actuator_joint_ids)) + self.prev_vel = np.zeros(len(self.actuator_joint_ids)) + self.prev_acc = np.zeros(len(self.actuator_joint_ids)) + self.i_error = np.zeros(len(self.actuator_joint_ids)) + self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids)) + + self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,) + + self._num_env_joints = len(self.actuator_joint_ids) + self.n_robot_joints = self.env_info['robot']["n_joints"] + + self.action_shape = [None] * self.n_agents + + for i in range(self.n_agents): + if self.interp_order[i] is None: + self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints) + elif self.interp_order[i] in [1, 2]: + self.action_shape[i] = (self.n_robot_joints,) + elif self.interp_order[i] in [3, 4, -1]: + self.action_shape[i] = (2, self.n_robot_joints) + elif self.interp_order[i] == 5: + self.action_shape[i] = (3, self.n_robot_joints) + + self.traj = None + + self.jerk = np.zeros(self._num_env_joints) + + if self.debug: + self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps) + + def _enforce_safety_limits(self, desired_pos, desired_vel): + # ROS safe controller + pos = self.prev_controller_cmd_pos + k = 20 + + joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents)) + joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents)) + + min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1]) + + max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1]) + + clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel) + + min_pos = pos + min_vel * self._timestep + max_pos = pos + max_vel * self._timestep + + clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos) + self.prev_controller_cmd_pos = clipped_pos.copy() + + return clipped_pos, clipped_vel + + def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel): + clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel) + + error = (clipped_pos - current_pos) + + self.i_error += self.i_gain * error * self._timestep + torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error + + # Acceleration FeedForward + tau_ff = np.zeros(self.robot_model.nv) + for i in range(self.n_agents): + robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i + self.robot_data.qpos = current_pos[robot_joint_ids] + self.robot_data.qvel = current_vel[robot_joint_ids] + acc_ff = desired_acc[robot_joint_ids] + mujoco.mj_forward(self.robot_model, self.robot_data) + + mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff) + torque[robot_joint_ids] += tau_ff + + # Gravity Compensation and Coriolis and Centrifugal force + torque[robot_joint_ids] += self.robot_data.qfrc_bias + + torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids], + self.robot_model.actuator_ctrlrange[:, 0]), + self.robot_model.actuator_ctrlrange[:, 1]) + + if self.debug: + self.controller_record.append( + np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk])) + + return torque + + def _interpolate_trajectory(self, interp_order, action, i=0): + tf = self.dt + prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] + prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] + prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] + if interp_order == 1 and action.ndim == 1: + coef = np.array([[1, 0], [1, tf]]) + results = np.vstack([prev_pos, action]) + elif interp_order == 2 and action.ndim == 1: + coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]]) + if np.linalg.norm(action - prev_pos) < 1e-3: + prev_vel = np.zeros_like(prev_vel) + results = np.vstack([prev_pos, action, prev_vel]) + elif interp_order == 3 and action.shape[0] == 2: + coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]]) + results = np.vstack([prev_pos, action[0], prev_vel, action[1]]) + elif interp_order == 4 and action.shape[0] == 2: + coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4], + [0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3], + [0, 0, 2, 0, 0]]) + results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc]) + elif interp_order == 5 and action.shape[0] == 3: + coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5], + [0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4], + [0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]]) + results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]]) + elif interp_order == -1: + # Interpolate position and velocity linearly + pass + else: + raise ValueError("Undefined interpolator order or the action dimension does not match!") + + if interp_order > 0: + A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints) + y = results.reshape(-2, order='F') + weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1) + weights_d = np.polynomial.polynomial.polyder(weights, axis=1) + weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1) + elif interp_order == -1: + weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T + weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T + weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1) + + if interp_order in [3, 4, 5]: + self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep + else: + self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf + + self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T) + self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T) + self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T) + + for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps): + q = np.polynomial.polynomial.polyval(t, weights.T) + qd = np.polynomial.polynomial.polyval(t, weights_d.T) + qdd = np.polynomial.polynomial.polyval(t, weights_dd.T) + yield q, qd, qdd + + def reset(self, obs=None): + obs = super(PositionControl, self).reset(obs) + self.prev_pos = self._data.qpos[self.actuator_joint_ids] + self.prev_vel = self._data.qvel[self.actuator_joint_ids] + self.prev_acc = np.zeros(len(self.actuator_joint_ids)) + self.i_error = np.zeros(len(self.actuator_joint_ids)) + self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids] + + if self.debug: + self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps) + return obs + + def _step_init(self, obs, action): + super(PositionControl, self)._step_init(obs, action) + + if self.n_agents == 1: + self.traj = self._create_traj(self.interp_order[0], action) + else: + def _traj(): + traj_1 = self._create_traj(self.interp_order[0], action[0], 0) + traj_2 = self._create_traj(self.interp_order[1], action[1], 1) + + for a1, a2 in zip(traj_1, traj_2): + yield np.hstack([a1, a2]) + + self.traj = _traj() + + def _create_traj(self, interp_order, action, i=0): + if interp_order is None: + return iter(action) + return self._interpolate_trajectory(interp_order, action, i) + + def _compute_action(self, obs, action): + cur_pos, cur_vel = self.get_joints(obs) + + desired_pos, desired_vel, desired_acc = next(self.traj) + + return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel) + + def _preprocess_action(self, action): + action = super(PositionControl, self)._preprocess_action(action) + + if self.n_agents == 1: + assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \ + f" {action.shape}" + else: + for i in range(self.n_agents): + assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \ + f" {action[i].shape}" + + return action + + +class PositionControlIIWA(PositionControl): + def __init__(self, *args, **kwargs): + p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.] + d_gain = [60, 80, 60, 30, 10, 1, 0.5] + i_gain = [0, 0, 0, 0, 0, 0, 0] + + super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs) + + +class PositionControlPlanar(PositionControl): + def __init__(self, *args, **kwargs): + p_gain = [960, 480, 240] + d_gain = [60, 20, 4] + i_gain = [0, 0, 0] + super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs) + + +class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit): + pass + + +class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend): + pass + + +class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit): + pass + + +class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend): + pass + + +class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament): + pass diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py new file mode 100644 index 0000000..ef48d24 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py @@ -0,0 +1,4 @@ +from .env_base import AirHockeyBase +from .tournament import AirHockeyTournament +from .hit import AirHockeyHit +from .defend import AirHockeyDefend \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py new file mode 100644 index 0000000..0ffdf66 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py @@ -0,0 +1,75 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle + + +class AirHockeyDefend(AirHockeySingle): + """ + Class for the air hockey defending task. + The agent should stop the puck at the line x=-0.6. + """ + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + self.init_velocity_range = (1, 3) + + self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + def setup(self, obs): + puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0] + + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-0.5, 0.5) + + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-10, 10) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super(AirHockeyDefend, self).setup(obs) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, state): + puck_pos, puck_vel = self.get_puck(state) + # If puck is over the middle line and moving towards opponent + if puck_pos[0] > 0 and puck_vel[0] > 0: + return True + if np.linalg.norm(puck_vel[:2]) < 0.1: + return True + return super().is_absorbing(state) + + +if __name__ == '__main__': + env = AirHockeyDefend() + + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() + env.render() + while True: + # action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8 + action = np.zeros(7) + observation, reward, done, info = env.step(action) + env.render() + print(observation) + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py new file mode 100644 index 0000000..fef5394 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py @@ -0,0 +1,258 @@ +import os + +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path +from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin +from mushroom_rl.environments.mujoco import MuJoCo, ObservationType +from mushroom_rl.utils.spaces import Box + + +""" + Abstract class for all AirHockey Environments. + +""" +class AirHockeyBase(MuJoCo): + def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1, + n_agents=1, viewer_params={}): + + """ + Constructor. + + Args: + n_agents (int, 1): number of agent to be used in the environment (one or two) + """ + + self.n_agents = n_agents + + action_spec = [] + observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + collision_spec = [("puck", ["puck"]), + ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]), + ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])] + + if 1 <= self.n_agents <= 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml") + + action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5", + "iiwa_1/joint_6", "iiwa_1/joint_7"] + + observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS), + ("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS), + ("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS), + ("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS), + ("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS), + ("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS), + ("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS), + ("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL), + ("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL), + ("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL), + ("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL), + ("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL), + ("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL), + ("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)] + + additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS), + ("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS), + ("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL), + ("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL), + ("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS), + ("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL), + ("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)] + + collision_spec += [("robot_1/ee", ["iiwa_1/ee"])] + + if self.n_agents == 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml") + + observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)] + + action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", + "iiwa_2/joint_5", + "iiwa_2/joint_6", "iiwa_2/joint_7"] + + observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL), + ("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS), + ("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS), + ("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS), + ("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS), + ("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS), + ("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS), + ("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS), + ("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL), + ("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL), + ("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL), + ("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL), + ("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL), + ("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL), + ("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)] + + observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)] + + additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS), + ("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS), + ("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL), + ("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL), + ("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS), + ("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL), + ("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)] + + collision_spec += [("robot_2/ee", ["iiwa_2/ee"])] + else: + raise ValueError('n_agents should be 1 or 2') + + self.env_info = dict() + self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25} + self.env_info['puck'] = {"radius": 0.03165} + self.env_info['mallet'] = {"radius": 0.04815} + self.env_info['n_agents'] = self.n_agents + self.env_info['robot'] = { + "n_joints": 7, + "ee_desired_height": 0.1645, + "joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135], + [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi, + "joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135], + [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10, + "base_frame": [], + "universal_height": 0.0645, + "control_frequency": 50, + } + + self.env_info['puck_pos_ids'] = [0, 1, 2] + self.env_info['puck_vel_ids'] = [3, 4, 5] + self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12] + self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19] + if self.n_agents == 2: + self.env_info['opponent_ee_ids'] = [20, 21, 22] + else: + self.env_info['opponent_ee_ids'] = [] + + max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents + + super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps, + n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params) + + # Construct the mujoco model at origin + robot_model = mujoco.MjModel.from_xml_path( + os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml")) + robot_model.body('iiwa_1/base').pos = np.zeros(3) + robot_data = mujoco.MjData(robot_model) + + # Add env_info that requires mujoco models + self.env_info['dt'] = self.dt + self.env_info["robot"]["joint_pos_limit"] = np.array( + [self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T + self.env_info["robot"]["robot_model"] = robot_model + self.env_info["robot"]["robot_data"] = robot_data + self.env_info["rl_info"] = self.info + + frame_T = np.eye(4) + temp = np.zeros((9, 1)) + mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("iiwa_1/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + if self.n_agents == 2: + mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("iiwa_2/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + # Ids of the joint, which are controller by the action space + self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec] + + self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info) + + def _modify_mdp_info(self, mdp_info): + obs_low = np.array([0, -1, -np.pi, -20., -20., -100, + *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][0]]) + obs_high = np.array([3.02, 1, np.pi, 20., 20., 100, + *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][1]]) + if self.n_agents == 2: + obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]]) + obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]]) + mdp_info.observation_space = Box(obs_low, obs_high) + return mdp_info + + def _simulation_pre_step(self): + self.universal_joint_plugin.update() + + def is_absorbing(self, obs): + boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2 + puck_pos, puck_vel = self.get_puck(obs) + + if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100: + return True + return False + + @staticmethod + def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'): + if type == 'pose': + puck_w = np.eye(4) + puck_w[:2, 3] = puck_in[:2] + puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix() + + puck_r = np.linalg.inv(robot_frame) @ puck_w + puck_out = np.concatenate([puck_r[:2, 3], + R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]]) + + if type == 'vel': + rot_mat = robot_frame[:3, :3] + + vel_lin = np.array([*puck_in[:2], 0]) + vel_ang = np.array([0., 0., puck_in[2]]) + + vel_lin_r = rot_mat.T @ vel_lin + vel_ang_r = rot_mat.T @ vel_ang + + puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]]) + return puck_out + + def get_puck(self, obs): + """ + Getting the puck properties from the observations + Args: + obs: The current observation + + Returns: + ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel]) + + """ + puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"), + self.obs_helper.get_from_obs(obs, "puck_y_pos"), + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")]) + puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"), + self.obs_helper.get_from_obs(obs, "puck_y_vel"), + self.obs_helper.get_from_obs(obs, "puck_yaw_vel")]) + return puck_pos, puck_vel + + def get_ee(self): + raise NotImplementedError + + def get_joints(self, obs): + raise NotImplementedError diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py new file mode 100644 index 0000000..54a5252 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py @@ -0,0 +1,171 @@ +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase +from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics + +class AirHockeyDouble(AirHockeyBase): + """ + Base class for two agents air hockey tasks. + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params) + + self._compute_init_state() + + self.filter_ratio = 0.274 + self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"]) + self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"]) + + def _compute_init_state(self): + init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.]) + + success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'], + self.env_info['robot']['robot_data'], + np.array([0.65, 0., 0.1645]), + R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(), + initial_q=init_state) + + assert success is True + + def get_ee(self, robot=1): + """ + Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics + on the current joint position, this function exists to avoid redundant computations. + Args: + robot: ID of robot, either 1 or 2 + + Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z]) + """ + ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos") + + ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel") + + return ee_pos, ee_vel + + def get_joints(self, obs, agent=None): + """ + Get joint position and velocity of the robots + Can choose the robot with agent = 1 / 2. If agent is None both are returned + """ + if agent: + q_pos = np.zeros(7) + q_vel = np.zeros(7) + for i in range(7): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0] + else: + q_pos = np.zeros(14) + q_vel = np.zeros(14) + for i in range(7): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0] + + q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0] + q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0] + + return q_pos, q_vel + + def _create_observation(self, obs): + # Filter the joint velocity + q_pos, q_vel = self.get_joints(obs) + q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev + self.q_pos_prev = q_pos + self.q_vel_prev = q_vel_filter + + for i in range(7): + self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i] + self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7] + + # Wrap puck's rotation angle to [-pi, pi) + yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos") + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi + return obs + + def _modify_observation(self, obs): + new_obs = obs.copy() + + puck_pos, puck_vel = self.get_puck(new_obs) + + puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0]) + puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel') + + self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2] + + self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2] + + opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos') + self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \ + (np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3] + + puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1]) + puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel') + + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2] + + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2] + + opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos') + self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \ + (np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3] + + return new_obs + + def setup(self, obs): + for i in range(7): + self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i] + self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i] + + self.q_pos_prev[i] = self.init_state[i] + self.q_pos_prev[i + 7] = self.init_state[i] + self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0] + self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0] + + self.universal_joint_plugin.reset() + + super().setup(obs) + # Update body positions, needed for _compute_universal_joint + mujoco.mj_fwdPosition(self._model, self._data) + + def reward(self, state, action, next_state, absorbing): + return 0 + + +def main(): + env = AirHockeyDouble(viewer_params={'start_paused': True}) + env.reset() + env.render() + R = 0. + J = 0. + gamma = 1. + steps = 0 + while True: + action = np.zeros(14) + observation, reward, done, info = env.step(action) + env.render() + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() + + +if __name__ == '__main__': + main() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py new file mode 100644 index 0000000..accfc83 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py @@ -0,0 +1,104 @@ +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase +from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics + +class AirHockeySingle(AirHockeyBase): + """ + Base class for single agent air hockey tasks. + + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params) + + self._compute_init_state() + + self.filter_ratio = 0.274 + self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"]) + self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"]) + + def _compute_init_state(self): + init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.]) + + success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'], + self.env_info['robot']['robot_data'], + np.array([0.65, 0., 0.1645]), + R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(), + initial_q=init_state) + + assert success is True + + def get_ee(self): + """ + Getting the ee properties from the current internal state. Can also be obtained via forward kinematics + on the current joint position, this function exists to avoid redundant computations. + + Returns: + ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z]) + """ + ee_pos = self._read_data("robot_1/ee_pos") + + ee_vel = self._read_data("robot_1/ee_vel") + + return ee_pos, ee_vel + + def get_joints(self, obs): + """ + Get joint position and velocity of the robot + """ + q_pos = np.zeros(7) + q_vel = np.zeros(7) + for i in range(7): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0] + + return q_pos, q_vel + + def _create_observation(self, obs): + # Filter the joint velocity + q_pos, q_vel = self.get_joints(obs) + q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev + self.q_pos_prev = q_pos + self.q_vel_prev = q_vel_filter + + for i in range(7): + self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i] + + yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos") + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi + return obs + + def _modify_observation(self, obs): + new_obs = obs.copy() + puck_pos, puck_vel = self.get_puck(new_obs) + + puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0]) + + puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel') + + self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2] + + self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2] + + return new_obs + + def setup(self, obs): + for i in range(7): + self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i] + self.q_pos_prev[i] = self.init_state[i] + self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0] + + self.universal_joint_plugin.reset() + + super().setup(obs) + + # Update body positions, needed for _compute_universal_joint + mujoco.mj_fwdPosition(self._model, self._data) diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py new file mode 100644 index 0000000..59386d5 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py @@ -0,0 +1,74 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle + + +class AirHockeyHit(AirHockeySingle): + """ + Class for the air hockey hitting task. + """ + def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}): + """ + Constructor + Args: + opponent_agent(Agent, None): Agent which controls the opponent + moving_init(bool, False): If true, initialize the puck with inital velocity. + """ + self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]]) # Table Frame + self.init_velocity_range = (0, 0.5) # Table Frame + + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + self.moving_init = moving_init + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + self.init_velocity_range = (0, 0.5) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + + + def setup(self, obs): + # Initial position of the puck + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + + if self.moving_init: + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1) + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-2, 2) + + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super(AirHockeyHit, self).setup(obs) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # Stop if the puck bounces back on the opponents wall + if puck_pos[0] > 0 and puck_vel[0] < 0: + return True + return super(AirHockeyHit, self).is_absorbing(obs) + + +if __name__ == '__main__': + env = AirHockeyHit(moving_init=True) + env.reset() + + steps = 0 + while True: + action = np.zeros(7) + + observation, reward, done, info = env.step(action) + env.render() + if done or steps > env.info.horizon: + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py new file mode 100644 index 0000000..49713a0 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py @@ -0,0 +1,111 @@ +import mujoco +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble + + +class AirHockeyTournament(AirHockeyDouble): + """ + Class for the air hockey tournament. Consists of 2 robots which should play against each other. + When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty. + If a player accumulates 3 penalties his score is reduced by 1. + """ + def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"): + self.agent_name = agent_name + self.opponent_name = opponent_name + + self.score = [0, 0] + self.faults = [0, 0] + self.start_side = None + + self.timer = 0 + + def custom_render_callback(viewport, context): + names = f"Agents \nScores \nFaults " + data = f"{self.agent_name} - {self.opponent_name}\n " + data += f"{self.score[0]} - {self.score[1]}\n " + data += f"{self.faults[0]} - {self.faults[1]}" + mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context) + + viewer_params["custom_render_callback"] = custom_render_callback + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + + def setup(self, obs): + if self.start_side == None: + self.start_side = np.random.choice([1, -1]) + self.prev_side = self.start_side + + # Initial position of the puck + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + self._write_data("puck_x_pos", puck_pos[0] * self.start_side) + self._write_data("puck_y_pos", puck_pos[1]) + + self.prev_side = self.start_side + self.timer = 0 + + super(AirHockeyTournament, self).setup(obs) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + + # Puck stuck on one side for more than 15s + if np.sign(puck_pos[0]) == self.prev_side: + self.timer += self.dt + else: + self.prev_side *= -1 + self.timer = 0 + + if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15: + if self.prev_side == -1: + self.faults[0] += 1 + self.start_side = -1 + if self.faults[0] % 3 == 0: + self.score[1] += 1 + else: + self.faults[1] += 1 + self.start_side = 1 + if self.faults[1] % 3 == 0: + self.score[0] += 1 + + return True + + # Puck in Goal + if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0: + if puck_pos[0] > self.env_info['table']['length'] / 2: + self.score[0] += 1 + self.start_side = -1 + return True + + if puck_pos[0] < -self.env_info['table']['length'] / 2: + self.score[1] += 1 + self.start_side = 1 + return True + + # Puck stuck in the middle + if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025: + return True + return super(AirHockeyTournament, self).is_absorbing(obs) + + +if __name__ == '__main__': + env = AirHockeyTournament() + env.reset() + + steps = 0 + while True: + action = np.zeros(14) + steps += 1 + observation, reward, done, info = env.step(action) + env.render() + if done or steps > env.info.horizon: + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py new file mode 100644 index 0000000..db285f1 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py @@ -0,0 +1,3 @@ +from .env_base import AirHockeyBase +from .defend import AirHockeyDefend +from .hit import AirHockeyHit \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py new file mode 100644 index 0000000..1f1224f --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py @@ -0,0 +1,76 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle + + +class AirHockeyDefend(AirHockeySingle): + """ + Class for the air hockey defending task. + The agent should stop the puck at the line x=-0.6. + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + self.init_velocity_range = (1, 3) + + self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + def setup(self, state=None): + # Set initial puck parameters + puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0] + + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-0.5, 0.5) + + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-10, 10) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super(AirHockeyDefend, self).setup(state) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, state): + puck_pos, puck_vel = self.get_puck(state) + # If puck is over the middle line and moving towards opponent + if puck_pos[0] > 0 and puck_vel[0] > 0: + return True + if np.linalg.norm(puck_vel[:2]) < 0.1: + return True + return super().is_absorbing(state) + + +if __name__ == '__main__': + env = AirHockeyDefend() + + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() + while True: + action = np.zeros(3) + observation, reward, done, info = env.step(action) + env.render() + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py new file mode 100644 index 0000000..d9747f3 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py @@ -0,0 +1,224 @@ +import os + +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path +from mushroom_rl.environments.mujoco import MuJoCo, ObservationType +from mushroom_rl.utils.spaces import Box + + +class AirHockeyBase(MuJoCo): + """ + Abstract class for all AirHockey Environments. + + """ + + def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1, + n_agents=1, viewer_params={}): + """ + Constructor. + + Args: + n_agents (int, 1): number of agent to be used in the environment (one or two) + """ + + self.n_agents = n_agents + + action_spec = [] + + observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + collision_spec = [("puck", ["puck"]), + ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]), + ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])] + + if 1 <= self.n_agents <= 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml") + + action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"] + + observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS), + ("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS), + ("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS), + ("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL), + ("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL), + ("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)] + + additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS), + ("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)] + + collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])] + + if self.n_agents == 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml") + + observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)] + + action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"] + # Add puck pos/vel again to transform into second agents frame + observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL), + ("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS), + ("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS), + ("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS), + ("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL), + ("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL), + ("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)] + + observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)] + + additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS), + ("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)] + + collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])] + else: + raise ValueError('n_agents should be 1 or 2') + + self.env_info = dict() + self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25} + self.env_info['puck'] = {"radius": 0.03165} + self.env_info['mallet'] = {"radius": 0.04815} + self.env_info['n_agents'] = self.n_agents + self.env_info['robot'] = { + "n_joints": 3, + "ee_desired_height": 0.1, + "joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3], + [np.pi / 2, np.pi / 2, np.pi * 2 / 3]]), + + "joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi], + [2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]), + "base_frame": [], + "control_frequency": 50, + } + + self.env_info['puck_pos_ids'] = [0, 1, 2] + self.env_info['puck_vel_ids'] = [3, 4, 5] + self.env_info['joint_pos_ids'] = [6, 7, 8] + self.env_info['joint_vel_ids'] = [9, 10, 11] + if self.n_agents == 2: + self.env_info['opponent_ee_ids'] = [13, 14, 15] + else: + self.env_info['opponent_ee_ids'] = [] + + max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents + + super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps, + n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params) + + # Construct the mujoco model at origin + robot_model = mujoco.MjModel.from_xml_path( + os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml")) + robot_model.body('planar_robot_1/base').pos = np.zeros(3) + robot_data = mujoco.MjData(robot_model) + + # Add env_info that requires mujoco models + self.env_info['dt'] = self.dt + self.env_info["robot"]["joint_pos_limit"] = np.array( + [self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T + self.env_info["robot"]["robot_model"] = robot_model + self.env_info["robot"]["robot_data"] = robot_data + self.env_info["rl_info"] = self.info + + frame_T = np.eye(4) + temp = np.zeros((9, 1)) + mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + if self.n_agents == 2: + mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + # Ids of the joint, which are controller by the action space + self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec] + + def _modify_mdp_info(self, mdp_info): + obs_low = np.array([0, -1, -np.pi, -20., -20., -100, + *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][0]]) + obs_high = np.array([3.02, 1, np.pi, 20., 20., 100, + *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][1]]) + if self.n_agents == 2: + obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]]) + obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]]) + mdp_info.observation_space = Box(obs_low, obs_high) + return mdp_info + + def is_absorbing(self, obs): + boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2 + puck_pos, puck_vel = self.get_puck(obs) + + if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100: + return True + return False + + @staticmethod + def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'): + if type == 'pose': + puck_w = np.eye(4) + puck_w[:2, 3] = puck_in[:2] + puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix() + + puck_r = np.linalg.inv(robot_frame) @ puck_w + puck_out = np.concatenate([puck_r[:2, 3], + R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]]) + + if type == 'vel': + rot_mat = robot_frame[:3, :3] + + vel_lin = np.array([*puck_in[:2], 0]) + vel_ang = np.array([0., 0., puck_in[2]]) + + vel_lin_r = rot_mat.T @ vel_lin + vel_ang_r = rot_mat.T @ vel_ang + + puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]]) + return puck_out + + def get_puck(self, obs): + """ + Getting the puck properties from the observations + Args: + obs: The current observation + + Returns: + ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel]) + + """ + puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"), + self.obs_helper.get_from_obs(obs, "puck_y_pos"), + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")]) + puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"), + self.obs_helper.get_from_obs(obs, "puck_y_vel"), + self.obs_helper.get_from_obs(obs, "puck_yaw_vel")]) + return puck_pos, puck_vel + + def get_ee(self): + raise NotImplementedError + + def get_joints(self, obs): + raise NotImplementedError \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py new file mode 100644 index 0000000..4dfa117 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py @@ -0,0 +1,91 @@ +import mujoco +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase + + +class AirHockeySingle(AirHockeyBase): + """ + Base class for single agent air hockey tasks. + + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + """ + Constructor. + + """ + self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414]) + super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params) + + self.filter_ratio = 0.274 + self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"]) + self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"]) + + def get_ee(self): + """ + Getting the ee properties from the current internal state. Can also be obtained via forward kinematics + on the current joint position, this function exists to avoid redundant computations. + + Returns: + ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z]) + """ + ee_pos = self._read_data("robot_1/ee_pos") + + ee_vel = self._read_data("robot_1/ee_vel") + + return ee_pos, ee_vel + + def get_joints(self, obs): + """ + Get joint position and velocity of the robot + """ + q_pos = np.zeros(3) + q_vel = np.zeros(3) + for i in range(3): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0] + + return q_pos, q_vel + + def _modify_observation(self, obs): + new_obs = obs.copy() + puck_pos, puck_vel = self.get_puck(obs) + + puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0]) + + puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel') + + self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2] + + self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2] + + return new_obs + + def setup(self, state=None): + for i in range(3): + self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i] + self.q_pos_prev[i] = self.init_state[i] + self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0] + + mujoco.mj_fwdPosition(self._model, self._data) + super().setup(state) + + def _create_observation(self, obs): + # Filter the joint velocity + q_pos, q_vel = self.get_joints(obs) + q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev + self.q_pos_prev = q_pos + self.q_vel_prev = q_vel_filter + + for i in range(3): + self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i] + + yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos") + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi + return obs \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py new file mode 100644 index 0000000..6124fba --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py @@ -0,0 +1,84 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle + +class AirHockeyHit(AirHockeySingle): + """ + Class for the air hockey hitting task. + """ + + def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}): + """ + Constructor + Args: + moving_init(bool, False): If true, initialize the puck with inital velocity. + """ + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + self.moving_init = moving_init + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + self.init_velocity_range = (0, 0.5) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + + def setup(self, state=None): + # Initial position of the puck + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + # self.init_state = np.array([-0.9273, 0.9273, np.pi / 2]) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + + if self.moving_init: + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1) + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-2, 2, 1) + + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super(AirHockeyHit, self).setup(state) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # Stop if the puck bounces back on the opponents wall + if puck_pos[0] > 0 and puck_vel[0] < 0: + return True + return super(AirHockeyHit, self).is_absorbing(obs) + + +if __name__ == '__main__': + env = AirHockeyHit(moving_init=False) + + env.reset() + env.render() + R = 0. + J = 0. + gamma = 1. + steps = 0 + while True: + action = np.zeros(3) + + observation, reward, done, info = env.step(action) + env.render() + + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py new file mode 100644 index 0000000..64c297a --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py @@ -0,0 +1,2 @@ +from .kinematics import inverse_kinematics, forward_kinematics, jacobian +from .transformations import robot_to_world, world_to_robot diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py new file mode 100644 index 0000000..2156d48 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py @@ -0,0 +1,248 @@ +import mujoco +import numpy as np + + +def forward_kinematics(mj_model, mj_data, q, link="ee"): + """ + Compute the forward kinematics of the robots. + + IMPORTANT: + For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet + parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations + where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the + case this function will return the wrong values. + + Coordinate System: + All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the + base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed + coordinate system + + Args: + mj_model (mujoco.MjModel): + mujoco MjModel of the robot-only model + mj_data (mujoco.MjData): + mujoco MjData object generated from the model + q (np.array): + joint configuration for which the forward kinematics are computed + link (string, "ee"): + Link for which the forward kinematics is calculated. When using the iiwas the choices are + ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"] + + Returns + ------- + position: numpy.ndarray, (3,) + Position of the link in robot's base frame + orientation: numpy.ndarray, (3, 3) + Orientation of the link in robot's base frame + """ + + return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data) + + +def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"): + """ + Compute the inverse kinematics of the robots. + + IMPORTANT: + For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet + parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations + where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is + the case this function will return the wrong values. + + Coordinate System: + All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the + base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed + coordinate system + + Args: + mj_model (mujoco.MjModel): + mujoco MjModel of the robot-only model + mj_data (mujoco.MjData): + mujoco MjData object generated from the model + desired_position (numpy.ndarray, (3,)): + The desired position of the selected link. + desired_rotation (optional, numpy.array, (3,3)): + The desired rotation of the selected link. + initial_q (numpy.ndarray, None): + The initial configuration of the algorithm, if set to None it will take the initial configuration of the + mj_data. + link (str, "ee"): + Link for which the inverse kinematics is calculated. When using the iiwas the choices are + ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"] + """ + q_init = np.zeros(mj_model.nq) + if initial_q is None: + q_init = mj_data.qpos + else: + q_init[:initial_q.size] = initial_q + + q_l = mj_model.jnt_range[:, 0] + q_h = mj_model.jnt_range[:, 1] + lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2 + upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2 + + desired_quat = None + if desired_rotation is not None: + desired_quat = np.zeros(4) + mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1)) + + return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model, + mj_data, lower_limit, upper_limit) + + +def jacobian(mj_model, mj_data, q, link="ee"): + """ + Compute the Jacobian of the robots. + + IMPORTANT: + For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet + parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations + where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the + case this function will return the wrong values. + + Coordinate System: + All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the + base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed + coordinate system + + Args: + mj_model (mujoco.MjModel): + mujoco MjModel of the robot-only model + mj_data (mujoco.MjData): + mujoco MjData object generated from the model + q (numpy.ndarray): + joint configuration for which the forward kinematics are computed + link (string, "ee"): + Link for which the forward kinematics is calculated. When using the iiwas the choices are + ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"] + + Returns + ------- + numpy.ndarray, (6, num_joints): + The Jacobian matrix for the robot kinematics. + """ + return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data) + + +def link_to_xml_name(mj_model, link): + try: + mj_model.body('iiwa_1/base') + link_to_frame_idx = { + "1": "iiwa_1/link_1", + "2": "iiwa_1/link_2", + "3": "iiwa_1/link_3", + "4": "iiwa_1/link_4", + "5": "iiwa_1/link_5", + "6": "iiwa_1/link_6", + "7": "iiwa_1/link_7", + "ee": "iiwa_1/striker_joint_link", + } + except: + link_to_frame_idx = { + "1": "planar_robot_1/body_1", + "2": "planar_robot_1/body_2", + "3": "planar_robot_1/body_3", + "ee": "planar_robot_1/body_ee", + } + return link_to_frame_idx[link] + + +def _mujoco_fk(q, name, model, data): + data.qpos[:len(q)] = q + mujoco.mj_fwdPosition(model, data) + return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy() + + +def _mujoco_jac(q, name, model, data): + data.qpos[:len(q)] = q + dtype = data.qpos.dtype + jac = np.empty((6, model.nv), dtype=dtype) + jac_pos, jac_rot = jac[:3], jac[3:] + mujoco.mj_fwdPosition(model, data) + mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id) + return jac + + +def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit): + IT_MAX = 1000 + eps = 1e-4 + damp = 1e-3 + progress_thresh = 20.0 + max_update_norm = 0.1 + rot_weight = 1 + i = 0 + + dtype = data.qpos.dtype + + data.qpos = initial_q + + neg_x_quat = np.empty(4, dtype=dtype) + error_x_quat = np.empty(4, dtype=dtype) + + if desired_pos is not None and desired_quat is not None: + jac = np.empty((6, model.nv), dtype=dtype) + err = np.empty(6, dtype=dtype) + jac_pos, jac_rot = jac[:3], jac[3:] + err_pos, err_rot = err[:3], err[3:] + else: + jac = np.empty((3, model.nv), dtype=dtype) + err = np.empty(3, dtype=dtype) + if desired_pos is not None: + jac_pos, jac_rot = jac, None + err_pos, err_rot = err, None + elif desired_quat is not None: + jac_pos, jac_rot = None, jac + err_pos, err_rot = None, err + else: + raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics") + + while True: + # forward kinematics + mujoco.mj_fwdPosition(model, data) + + x_pos = data.body(name).xpos + x_quat = data.body(name).xquat + + error_norm = 0 + if desired_pos is not None: + err_pos[:] = desired_pos - x_pos + error_norm += np.linalg.norm(err_pos) + + if desired_quat is not None: + mujoco.mju_negQuat(neg_x_quat, x_quat) + mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat) + mujoco.mju_quat2Vel(err_rot, error_x_quat, 1) + error_norm += np.linalg.norm(err_rot) * rot_weight + + if error_norm < eps: + success = True + break + if i >= IT_MAX: + success = False + break + + mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id) + + hess_approx = jac.T.dot(jac) + joint_delta = jac.T.dot(err) + + hess_approx += np.eye(hess_approx.shape[0]) * damp + update_joints = np.linalg.solve(hess_approx, joint_delta) + + update_norm = np.linalg.norm(update_joints) + + # Check whether we are still making enough progress, and halt if not. + progress_criterion = error_norm / update_norm + if progress_criterion > progress_thresh: + success = False + break + + if update_norm > max_update_norm: + update_joints *= max_update_norm / update_norm + + mujoco.mj_integratePos(model, data.qpos, update_joints, 1) + data.qpos = np.clip(data.qpos, lower_limit, upper_limit) + i += 1 + q_cur = data.qpos.copy() + + return success, q_cur diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py new file mode 100644 index 0000000..6a6ee84 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py @@ -0,0 +1,65 @@ +import numpy as np + + +def robot_to_world(base_frame, translation, rotation=None): + """ + Transform position or rotation optional from the robot base frame to the world frame + + Args + ---- + base_frame: numpy.ndarray, (4,4) + The transformation matrix from the world to robot base frame + translation: ndarray, (3,) + The 3D position to be transformed + rotation: optional, ndarray, (3, 3) + The rotation in the matrix form to be transformed + + Returns + ------- + position: ndarray (3,) + The transformed 3D position + rotation: ndarray (3, 3) + The transformed rotation in the matrix form + + """ + + target = np.eye(4) + target[:len(translation), 3] = translation + if rotation is not None: + target[:3, :3] = rotation + + target_frame = base_frame @ target + + return target_frame[:len(translation), 3], target_frame[:3, :3] + + +def world_to_robot(base_frame, translation, rotation=None): + """ + Transfrom position and rotation (optional) from the world frame to the robot's base frame + + Args + ---- + base_frame: ndarray, (4,4) + The transformation matrix from the world to robot base frame + translation: ndarray, (3,) + The 3D position to be transformed + rotation: optional, ndarray, (3, 3) + The rotation in the matrix form to be tranformed + + Returns + ------- + position: ndarray, (3,) + The transformed 3D position + rotation: ndarray, (3, 3) + The transformed rotation in the matrix form + + """ + + target = np.eye(4) + target[:len(translation), 3] = translation + if rotation is not None: + target[:3, :3] = rotation + + target_frame = np.linalg.inv(base_frame) @ target + + return target_frame[:len(translation), 3], target_frame[:3, :3] diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py new file mode 100644 index 0000000..20b43ac --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py @@ -0,0 +1,120 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics + + +class UniversalJointPlugin: + def __init__(self, env_model, env_data, env_info): + self.env_info = env_info + self.env_model = env_model + self.env_data = env_data + self.Kp = 20 + self.Kd = 0.31 + + self.universal_joint_ids = [] + self.universal_joint_ctrl_ids = [] + self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id, + env_model.joint("iiwa_1/striker_joint_2").id] + self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id, + env_model.actuator("iiwa_1/striker_joint_2").id] + action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5", + "iiwa_1/joint_6", "iiwa_1/joint_7"] + + if self.env_info['n_agents'] >= 2: + self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id, + env_model.joint("iiwa_2/striker_joint_2").id] + self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id, + env_model.actuator("iiwa_2/striker_joint_2").id] + action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5", + "iiwa_2/joint_6", "iiwa_2/joint_7"] + + self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec] + + self.filter_ratio = 0.273 + self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents']) + self.u_joint_pos_prev = None + self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents']) + + def reset(self): + self.u_joint_pos_prev = None + self._control_universal_joint() + for i in range(self.env_info['n_agents']): + self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids] + + self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des + + def update(self): + self._control_universal_joint() + + def _control_universal_joint(self): + self._compute_universal_joint() + self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids] + self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \ + (1 - self.filter_ratio) * self.u_joint_vel_prev + + Kp = 4 + Kd = 0.31 + torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev + self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque + + def _compute_universal_joint(self): + for i in range(self.env_info['n_agents']): + q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]] + # Have to exclude the puck joints + pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'], + self.env_info['robot']['robot_data'], q) + + v_x = rot_mat[:, 0] + v_y = rot_mat[:, 1] + + # The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T + # and the current y-axis. (0, 0, 1).T x v_y + x_desired = np.array([-v_y[1], v_y[0], 0]) + + # Find the signed angle from the current to the desired x-axis around the y-axis + # https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane + q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired) + if self.u_joint_pos_prev is not None: + if q1 - self.u_joint_pos_prev[0] > np.pi: + q1 -= np.pi * 2 + elif q1 - self.u_joint_pos_prev[0] < -np.pi: + q1 += np.pi * 2 + + # Rotate the X axis by the calculated amount + w = np.array([[0, -v_y[2], v_y[1]], + [v_y[2], 0, -v_y[0]], + [-v_y[1], v_y[0], 0]]) + + r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1)) + v_x_rotated = r @ v_x + + # The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current + # x-axis, which is already rotated around y. The negative is there because the x-axis is flipped. + # -((0, 0, 1).T x v_x)) + y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0]) + + # Find the signed angle from the current to the desired y-axis around the new rotated x-axis + q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired) + + if self.u_joint_pos_prev is not None: + if q2 - self.u_joint_pos_prev[1] > np.pi: + q2 -= np.pi * 2 + elif q2 - self.u_joint_pos_prev[1] < -np.pi: + q2 += np.pi * 2 + + alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95) + alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95) + + if self.u_joint_pos_prev is None: + self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x]) + else: + self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum( + 10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]), + -np.pi * 0.01), np.pi * 0.01) + + self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x]) + + return self.u_joint_pos_des + + def _cross_3d(self, a, b): + return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]]) diff --git a/setup.py b/setup.py index 1daa568..53ee8f1 100644 --- a/setup.py +++ b/setup.py @@ -7,11 +7,12 @@ from setuptools import setup, find_packages # Environment-specific dependencies for dmc and metaworld extras = { 'dmc': ['shimmy[dm-control]', 'Shimmy==1.0.0'], - 'metaworld': ['metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'], + 'metaworld': ['mujoco==2.3.3', 'metaworld @ git+https://github.com/Farama-Foundation/Metaworld.git@d155d0051630bb365ea6a824e02c66c068947439#egg=metaworld'], 'box2d': ['gymnasium[box2d]>=0.26.0'], 'mujoco': ['mujoco==2.3.3', 'gymnasium[mujoco]>0.26.0'], 'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'], 'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"], + 'mushroom-rl': ['mushroom-rl'], } # All dependencies diff --git a/test/test_black_box.py b/test/test_black_box.py index 8cdc543..c1a760a 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -41,7 +41,7 @@ class ToyEnv(gym.Env): obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} return obs, reward, terminated, truncated, info - def render(self, mode="human"): + def render(self): pass diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py index aad076b..849b36f 100644 --- a/test/test_fancy_registry.py +++ b/test/test_fancy_registry.py @@ -33,7 +33,7 @@ class ToyEnv(gym.Env): obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} return obs, reward, terminated, truncated, info - def render(self, mode="human"): + def render(self): pass diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index c2edf42..db463c8 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -37,7 +37,7 @@ class ToyEnv(gym.Env): obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} return obs, reward, terminated, truncated, info - def render(self, mode="human"): + def render(self): pass