diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 3747e30..8eebbee 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -2,8 +2,6 @@ name: Publish Python package to PyPI on: push: - branches: - - master tags: - '*' @@ -16,6 +14,17 @@ jobs: steps: - name: Check out code uses: actions/checkout@v4 + with: + fetch-depth: 0 # This fetches all history for all branches and tags + + - name: Verify tag is on master branch + run: | + TAG_IS_ON_MASTER=$(git branch -r --contains ${{ github.ref }} | grep 'origin/master') + if [ -z "$TAG_IS_ON_MASTER" ]; then + echo "Tag is not on the master branch. Cancelling the workflow." + exit 1 + fi + echo "Tag is on the master branch. Proceeding with the workflow." - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml index 8b8d003..64c1888 100644 --- a/.github/workflows/publish-to-test-pypi.yml +++ b/.github/workflows/publish-to-test-pypi.yml @@ -2,8 +2,6 @@ name: Publish Python package to TestPyPI on: push: - branches: - - master tags: - '*' @@ -16,6 +14,17 @@ jobs: steps: - name: Check out code uses: actions/checkout@v4 + with: + fetch-depth: 0 # This fetches all history for all branches and tags + + - name: Verify tag is on master branch + run: | + TAG_IS_ON_MASTER=$(git branch -r --contains ${{ github.ref }} | grep 'origin/master') + if [ -z "$TAG_IS_ON_MASTER" ]; then + echo "Tag is not on the master branch. Cancelling the workflow." + exit 1 + fi + echo "Tag is on the master branch. Proceeding with the workflow." - name: Set up Python uses: actions/setup-python@v4 diff --git a/README.md b/README.md index 1cedaba..085f501 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ While the overarching objective of MP environments remains the learning of an op ## Installation -We recommend installing `fancy_gym` into a virtual environments like [venv](https://docs.python.org/3/library/venv.html). 3rd party alternatives to venv like [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/) can also be used. +We recommend installing `fancy_gym` into a virtual environment as provided by [venv](https://docs.python.org/3/library/venv.html). 3rd party alternatives to venv like [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/) can also be used. ### Installation from PyPI (recommended) @@ -105,17 +105,16 @@ Regular step based environments added by Fancy Gym are added into the `fancy/` n import gymnasium as gym import fancy_gym -env = gym.make('fancy/Reacher5d-v0') -# or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS -# or env = gym.make('dm_control/ball_in_cup-catch-v0') -# or env = gym.make('Reacher-v2') +env = gym.make('fancy/Reacher5d-v0', render_mode='human') +# or env = gym.make('metaworld/reach-v2', render_mode='human') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS +# or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human') +# or env = gym.make('Reacher-v2', render_mode='human') observation = env.reset(seed=1) +env.render() for i in range(1000): action = env.action_space.sample() observation, reward, terminated, truncated, info = env.step(action) - if i % 5 == 0: - env.render() if terminated or truncated: observation, info = env.reset() @@ -149,17 +148,14 @@ Just keep in mind, calling `step()` executes a full trajectory. import gymnasium as gym import fancy_gym -env = gym.make('fancy_ProMP/Reacher5d-v0') -# or env = gym.make('metaworld_ProDMP/reach-v2') -# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0') -# or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_ NS - -# render() can be called once in the beginning with all necessary arguments. -# To turn it of again just call render() without any arguments. -env.render(mode='human') +env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human") +# or env = gym.make('metaworld_ProDMP/reach-v2', render_mode="human") +# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0', render_mode="human") +# or env = gym.make('gym_ProMP/Reacher-v2', render_mode="human") # mp versions of envs added directly by gymnasium are in the gym_ NS # This returns the context information, not the full state observation observation, info = env.reset(seed=1) +env.render() for i in range(5): action = env.action_space.sample() diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index a40c81f..62ed981 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -26,7 +26,7 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \ - MAX_EPISODE_STEPS_TABLE_TENNIS + MAX_EPISODE_STEPS_TABLE_TENNIS, MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper as MPWrapper_TableTennis from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper_Replan as MPWrapper_TableTennis_Replan from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper as MPWrapper_TableTennis_VelObs @@ -135,6 +135,19 @@ register( } ) +register( + id='fancy/HopperJumpMarkov-v0', + entry_point='fancy_gym.envs.mujoco:HopperJumpMarkovRew', + mp_wrapper=mujoco.hopper_jump.MPWrapper, + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "sparse": False, + "healthy_reward": 1.0, + "contact_weight": 0.0, + "height_weight": 3.0, + } +) + # TODO: Add [MPs] later when finished (old TODO I moved here during refactor) register( id='fancy/AntJump-v0', @@ -289,3 +302,53 @@ register( 'goal_switching_step': 99 } ) + +register( + id='TableTennisRndRobot-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisRandomInit', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, + kwargs={ + 'random_pos_scale': 0.1, + 'random_vel_scale': 0.0, + } +) + +register( + id='TableTennisMarkovian-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisMarkovian', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER, + kwargs={ + } +) + +register( + id='TableTennisRndRobotMarkovian-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisMarkovian', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER, + kwargs={ + 'random_pos_scale': 0.1, + 'random_vel_scale': 0.0, + } +) + +# Air Hockey environments +for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]: + register( + id=f'fancy/AirHockey-{env_mode}-v0', + entry_point='fancy_gym.envs.mujoco:AirHockeyEnv', + max_episode_steps=500, + add_mp_types=[], + kwargs={ + 'env_mode': env_mode + } + ) + +register( + id=f'fancy/AirHockey-tournament-v0', + entry_point='fancy_gym.envs.mujoco:AirHockeyEnv', + max_episode_steps=15000, + add_mp_types=[], + kwargs={ + 'env_mode': 'tournament' + } +) diff --git a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py index 4e5caaf..70b9bfd 100644 --- a/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py +++ b/fancy_gym/envs/classic_control/hole_reacher/hole_reacher.py @@ -178,9 +178,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): return False - def render(self, mode=None): - if mode==None: - mode = self.render_mode + def render(self): if self.fig is None: # Create base figure once on the beginning. Afterwards only update plt.ion() @@ -199,7 +197,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.fig.gca().set_title( f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") - if mode == "human": + if self.render_mode == "human": # arm self.line.set_data(self._joints[:, 0], self._joints[:, 1]) @@ -207,7 +205,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.fig.canvas.draw() self.fig.canvas.flush_events() - elif mode == "partial": + elif self.render_mode == "partial": if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: # Arm plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index 40a8153..9264b39 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -98,9 +98,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): def _check_collisions(self) -> bool: return self._check_self_collision() - def render(self, mode=None): # pragma: no cover - if mode==None: - mode = self.render_mode + def render(self): # pragma: no cover if self.fig is None: # Create base figure once on the beginning. Afterwards only update plt.ion() diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py index 932f50a..4003319 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -123,9 +123,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): def _check_collisions(self) -> bool: return self._check_self_collision() - def render(self, mode=None): - if mode==None: - mode = self.render_mode + def render(self): goal_pos = self._goal.T via_pos = self._via_point.T @@ -148,7 +146,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}") - if mode == "human": + if self.render_mode == "human": # goal if self._steps == 1: self.goal_point_plot.set_data(goal_pos[0], goal_pos[1]) @@ -160,7 +158,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self.fig.canvas.draw() self.fig.canvas.flush_events() - elif mode == "partial": + elif self.render_mode == "partial": if self._steps == 1: # fig, ax = plt.subplots() # Add the patch to the Axes @@ -180,7 +178,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): plt.ylim([-1.1, lim]) plt.pause(0.01) - elif mode == "final": + elif self.render_mode == "final": if self._steps == 199 or self._is_collided: # fig, ax = plt.subplots() diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py index ff51711..4afdcce 100644 --- a/fancy_gym/envs/mujoco/__init__.py +++ b/fancy_gym/envs/mujoco/__init__.py @@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching + +try: + from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv +except ModuleNotFoundError: + print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)") \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/LICENSE b/fancy_gym/envs/mujoco/air_hockey/LICENSE new file mode 100644 index 0000000..27c6bbb --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/LICENSE @@ -0,0 +1,26 @@ +MIT License + +Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--- + +This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge). +The changes are mostly focused on adapting the provided environments to fancy_gym. diff --git a/fancy_gym/envs/mujoco/air_hockey/__init__.py b/fancy_gym/envs/mujoco/air_hockey/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py new file mode 100644 index 0000000..64972d8 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py @@ -0,0 +1,202 @@ +from copy import deepcopy +import numpy as np +from gymnasium import spaces + +import fancy_gym.envs.mujoco.air_hockey.constraints as constraints +from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position +from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world +from mushroom_rl.core import Environment + +class AirHockeyEnv(Environment): + metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50} + + def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, width=1920, height=1080, **kwargs): + """ + Environment Constructor + + Args: + env [string]: + The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament]. + interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are + polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity. + Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory + of position, velocity and acceleration of the shape (20, 3, n_joints) + """ + + env_dict = { + "tournament": position.IiwaPositionTournament, + + "7dof-hit": position.IiwaPositionHit, + "7dof-defend": position.IiwaPositionDefend, + + "3dof-hit": position.PlanarPositionHit, + "3dof-defend": position.PlanarPositionDefend, + + "7dof-hit-airhockit2023": position.IiwaPositionHitAirhocKIT2023, + "7dof-defend-airhockit2023": position.IiwaPositionDefendAirhocKIT2023, + } + + if env_mode not in env_dict: + raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!") + + if env_mode == "tournament" and type(interpolation_order) != tuple: + interpolation_order = (interpolation_order, interpolation_order) + + self.render_mode = render_mode + self.render_human_active = False + + # Determine headless mode based on render_mode + headless = self.render_mode == 'rgb_array' + + # Prepare viewer_params + viewer_params = kwargs.get('viewer_params', {}) + viewer_params.update({'headless': headless, 'width': width, 'height': height}) + kwargs['viewer_params'] = viewer_params + + self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs) + self.env_name = env_mode + self.env_info = self.base_env.env_info + + if hasattr(self.base_env, "wrapper_obs_space") and hasattr(self.base_env, "wrapper_act_space"): + self.observation_space = self.base_env.wrapper_obs_space + self.action_space = self.base_env.wrapper_act_space + else: + single_robot_obs_size = len(self.base_env.info.observation_space.low) + if env_mode == "tournament": + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64) + else: + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64) + robot_info = self.env_info["robot"] + + if env_mode != "tournament": + if interpolation_order in [1, 2]: + self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) + if interpolation_order in [3, 4, -1]: + self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) + if interpolation_order in [5]: + self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) + else: + acts = [None, None] + for i in range(2): + if interpolation_order[i] in [1, 2]: + acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) + if interpolation_order[i] in [3, 4, -1]: + acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) + if interpolation_order[i] in [5]: + acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]), + high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) + self.action_space = spaces.Tuple((acts[0], acts[1])) + + constraint_list = constraints.ConstraintList() + constraint_list.add(constraints.JointPositionConstraint(self.env_info)) + constraint_list.add(constraints.JointVelocityConstraint(self.env_info)) + constraint_list.add(constraints.EndEffectorConstraint(self.env_info)) + if "7dof" in self.env_name or self.env_name == "tournament": + constraint_list.add(constraints.LinkConstraint(self.env_info)) + + self.env_info['constraints'] = constraint_list + self.env_info['env_name'] = self.env_name + + super().__init__(self.base_env.info) + + def step(self, action): + obs, reward, done, info = self.base_env.step(action) + + if "tournament" in self.env_name: + info["constraints_value"] = list() + info["jerk"] = list() + for i in range(2): + obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)] + info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun( + obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']]))) + info["jerk"].append( + self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][ + 'n_joints']]) + + info["score"] = self.base_env.score + info["faults"] = self.base_env.faults + + else: + info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']], + obs[self.env_info['joint_vel_ids']])) + info["jerk"] = self.base_env.jerk + info["success"] = self.check_success(obs) + + if self.env_info['env_name'] == "tournament": + obs = np.array(np.split(obs, 2)) + + if self.render_human_active: + self.base_env.render() + + return obs, reward, done, False, info + + def render(self): + if self.render_mode == 'rgb_array': + return self.base_env.render(record = True) + elif self.render_mode == 'human': + self.render_human_active = True + self.base_env.render() + else: + raise ValueError(f"Unsupported render mode: '{self.render_mode}'") + + def reset(self, seed=None, options={}): + self.base_env.seed(seed) + obs = self.base_env.reset() + if self.env_info['env_name'] == "tournament": + obs = np.array(np.split(obs, 2)) + return obs, {} + + def check_success(self, obs): + puck_pos, puck_vel = self.base_env.get_puck(obs) + + puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos) + success = 0 + + if "hit" in self.env_name: + if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \ + np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0: + success = 1 + + elif "defend" in self.env_name: + if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1: + success = 1 + + elif "prepare" in self.env_name: + if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1: + success = 1 + return success + + @property + def unwrapped(self): + return self + + def close(self): + self.base_env.stop() + + +if __name__ == "__main__": + env = AirHockeyEnv(env_mode="7dof-hit") + env.reset() + + R = 0. + J = 0. + gamma = 1. + steps = 0 + while True: + action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3 + observation, reward, done, info = env.step(action) + env.render() + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py new file mode 100644 index 0000000..4486dce --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py @@ -0,0 +1 @@ +from .constraints import * diff --git a/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py new file mode 100644 index 0000000..399b35b --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py @@ -0,0 +1,212 @@ +import copy + +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian + + +class Constraint: + def __init__(self, env_info, output_dim, **kwargs): + """ + Constructor + + Args + ---- + env_info: dict + A dictionary contains information about the environment; + output_dim: int + The output dimension of the constraints. + **kwargs: dict + A dictionary contains agent related information. + """ + self._env_info = env_info + self._name = None + + self.output_dim = output_dim + + self._fun_value = np.zeros(self.output_dim) + self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"])) + self._q_prev = None + self._dq_prev = None + + @property + def name(self): + """ + The name of the constraints + + """ + return self._name + + def fun(self, q, dq): + """ + The function of the constraint. + + Args + ---- + q: numpy.ndarray, (num_joints,) + The joint position of the robot + dq: numpy.ndarray, (num_joints,) + The joint velocity of the robot + + Returns + ------- + numpy.ndarray, (out_dim,): + The value computed by the constraints function. + """ + if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev): + return self._fun_value + else: + self._jacobian(q, dq) + return self._fun(q, dq) + + def jacobian(self, q, dq): + """ + Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity. + + Args + ---- + q: ndarray, (num_joints,) + The joint position of the robot + dq: ndarray, (num_joints,) + The joint velocity of the robot + + Returns + ------- + numpy.ndarray, (dim_output, num_joints * 2): + The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot] + + """ + if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev): + return self._fun_value + else: + self._fun(q, dq) + return self._jacobian(q, dq) + + def _fun(self, q, dq): + raise NotImplementedError + + def _jacobian(self, q, dq): + raise NotImplementedError + + +class ConstraintList: + def __init__(self): + self.constraints = dict() + + def keys(self): + return self.constraints.keys() + + def get(self, key): + return self.constraints.get(key) + + def add(self, c): + self.constraints.update({c.name: c}) + + def delete(self, name): + del self.constraints[name] + + def fun(self, q, dq): + return {key: self.constraints[key].fun(q, dq) for key in self.constraints} + + def jacobian(self, q, dq): + return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints} + + +class JointPositionConstraint(Constraint): + def __init__(self, env_info, **kwargs): + super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs) + self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95 + self._name = 'joint_pos_constr' + + def _fun(self, q, dq): + self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1] + self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q + return self._fun_value + + def _jacobian(self, q, dq): + self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye( + self._env_info['robot']['n_joints']) + self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye( + self._env_info['robot']['n_joints']) + return self._jac_value + + +class JointVelocityConstraint(Constraint): + def __init__(self, env_info, **kwargs): + super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs) + self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95 + self._name = 'joint_vel_constr' + + def _fun(self, q, dq): + self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1] + self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq + return self._fun_value + + def _jacobian(self, q, dq): + self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye( + self._env_info['robot']['n_joints']) + self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye( + self._env_info['robot']['n_joints']) + return self._jac_value + + +class EndEffectorConstraint(Constraint): + def __init__(self, env_info, **kwargs): + # 1 Dimension on x direction: x > x_lb + # 2 Dimension on y direction: y > y_lb, y < y_ub + # 2 Dimension on z direction: z > z_lb, z < z_ub + super().__init__(env_info, output_dim=5, **kwargs) + self._name = "ee_constr" + tolerance = 0.02 + + self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model']) + self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data']) + + self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - ( + self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius']) + self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius']) + self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius']) + self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance + self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance + + def _fun(self, q, dq): + ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q) + self._fun_value = np.array([-ee_pos[0] + self.x_lb, + -ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub, + -ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub]) + return self._fun_value + + def _jacobian(self, q, dq): + jac = jacobian(self.robot_model, self.robot_data, q) + dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]]) + self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']] + return self._jac_value + + +class LinkConstraint(Constraint): + def __init__(self, env_info, **kwargs): + # 1 Dimension: wrist_z > minimum_height + # 2 Dimension: elbow_z > minimum_height + super().__init__(env_info, output_dim=2, **kwargs) + self._name = "link_constr" + + self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model']) + self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data']) + + self.z_lb = 0.25 + + def _fun(self, q, dq): + wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7") + elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4") + self._fun_value = np.array([-wrist_pos[2] + self.z_lb, + -elbow_pos[2] + self.z_lb]) + return self._fun_value + + def _jacobian(self, q, dq): + jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7") + jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4") + self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([ + -jac_wrist[2, :self._env_info['robot']['n_joints']], + -jac_elbow[2, :self._env_info['robot']['n_joints']], + ]) + return self._jac_value diff --git a/fancy_gym/envs/mujoco/air_hockey/data/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl new file mode 100644 index 0000000..b2dd80e Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl new file mode 100644 index 0000000..28ad13f Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_mallet_foam.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl new file mode 100644 index 0000000..e057ea1 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl new file mode 100644 index 0000000..4eaa051 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl new file mode 100644 index 0000000..2eb3709 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl new file mode 100644 index 0000000..2892530 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl new file mode 100644 index 0000000..dda08fe Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl new file mode 100644 index 0000000..6095451 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl new file mode 100644 index 0000000..6224ddb Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl new file mode 100644 index 0000000..359cb48 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml new file mode 100644 index 0000000..12f4b3d --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml new file mode 100644 index 0000000..044aae5 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml new file mode 100644 index 0000000..450ef8f --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml new file mode 100644 index 0000000..6d9ca5f --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl new file mode 100644 index 0000000..e057ea1 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_0.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl new file mode 100644 index 0000000..2deabe0 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_1.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl new file mode 100644 index 0000000..3187389 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_2.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl new file mode 100644 index 0000000..6b172ff Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_3.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl new file mode 100644 index 0000000..f1d761f Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_4.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl new file mode 100644 index 0000000..f2d1ec7 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_5.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl new file mode 100644 index 0000000..df26d8f Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_6.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl new file mode 100644 index 0000000..5e0eba9 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl new file mode 100644 index 0000000..ea2d9d2 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/collision/link_7_old.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl new file mode 100644 index 0000000..062ceb7 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_arm_collision.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl new file mode 100644 index 0000000..12af33a Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_collision.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl new file mode 100644 index 0000000..a9f30b7 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/collision/EE_mallet_short_collision.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl new file mode 100644 index 0000000..b2dd80e Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_arm.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl new file mode 100644 index 0000000..7595fb0 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl new file mode 100644 index 0000000..c03f7b2 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/striker/visual/EE_mallet_short.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl new file mode 100644 index 0000000..e057ea1 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_0.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl new file mode 100644 index 0000000..4eaa051 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_1.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl new file mode 100644 index 0000000..2eb3709 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_2.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl new file mode 100644 index 0000000..2892530 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_3.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl new file mode 100644 index 0000000..dda08fe Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_4.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl new file mode 100644 index 0000000..6095451 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_5.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl new file mode 100644 index 0000000..6224ddb Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_6.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl new file mode 100644 index 0000000..359cb48 Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl new file mode 100644 index 0000000..3d1760b Binary files /dev/null and b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/meshes/visual/link_7_old.stl differ diff --git a/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml new file mode 100644 index 0000000..9eb57a8 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py b/fancy_gym/envs/mujoco/air_hockey/data/planar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml new file mode 100644 index 0000000..73fdfb4 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml new file mode 100644 index 0000000..a8d057c --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_1.xml @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml new file mode 100644 index 0000000..c14d75a --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/planar_robot_2.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml new file mode 100644 index 0000000..1f63668 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/data/table.xml b/fancy_gym/envs/mujoco/air_hockey/data/table.xml new file mode 100644 index 0000000..578af15 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/data/table.xml @@ -0,0 +1,89 @@ + + + diff --git a/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py new file mode 100644 index 0000000..4e069b6 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py @@ -0,0 +1,274 @@ +from collections import deque + +import mujoco +import numpy as np +import scipy + +from fancy_gym.envs.mujoco.air_hockey import seven_dof +from fancy_gym.envs.mujoco.air_hockey import three_dof + + +class PositionControl: + def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs): + """ + Mixin that adds position controller to mujoco environments. + + Args: + p_gain (float): Proportional controller gain + d_gain (float): Differential controller gain + i_gain (float): Integral controller gain + interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are + polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity. + Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory + of position, velocity and acceleration of the shape (20, 3, n_joints) + In the case of 2 agents it is a tuple, which describes the interpolation order for each agent + debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the + entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk. + """ + + self.debug = debug + + super(PositionControl, self).__init__(*args, **kwargs) + + self.robot_model = self.env_info['robot']['robot_model'] + self.robot_data = self.env_info['robot']['robot_data'] + + self.p_gain = np.array(p_gain * self.n_agents) + self.d_gain = np.array(d_gain * self.n_agents) + self.i_gain = np.array(i_gain * self.n_agents) + + self.prev_pos = np.zeros(len(self.actuator_joint_ids)) + self.prev_vel = np.zeros(len(self.actuator_joint_ids)) + self.prev_acc = np.zeros(len(self.actuator_joint_ids)) + self.i_error = np.zeros(len(self.actuator_joint_ids)) + self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids)) + + self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,) + + self._num_env_joints = len(self.actuator_joint_ids) + self.n_robot_joints = self.env_info['robot']["n_joints"] + + self.action_shape = [None] * self.n_agents + + for i in range(self.n_agents): + if self.interp_order[i] is None: + self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints) + elif self.interp_order[i] in [1, 2]: + self.action_shape[i] = (self.n_robot_joints,) + elif self.interp_order[i] in [3, 4, -1]: + self.action_shape[i] = (2, self.n_robot_joints) + elif self.interp_order[i] == 5: + self.action_shape[i] = (3, self.n_robot_joints) + + self.traj = None + + self.jerk = np.zeros(self._num_env_joints) + + if self.debug: + self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps) + + def _enforce_safety_limits(self, desired_pos, desired_vel): + # ROS safe controller + pos = self.prev_controller_cmd_pos + k = 20 + + joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents)) + joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents)) + + min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1]) + + max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1]) + + clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel) + + min_pos = pos + min_vel * self._timestep + max_pos = pos + max_vel * self._timestep + + clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos) + self.prev_controller_cmd_pos = clipped_pos.copy() + + return clipped_pos, clipped_vel + + def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel): + clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel) + + error = (clipped_pos - current_pos) + + self.i_error += self.i_gain * error * self._timestep + torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error + + # Acceleration FeedForward + tau_ff = np.zeros(self.robot_model.nv) + for i in range(self.n_agents): + robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i + self.robot_data.qpos = current_pos[robot_joint_ids] + self.robot_data.qvel = current_vel[robot_joint_ids] + acc_ff = desired_acc[robot_joint_ids] + mujoco.mj_forward(self.robot_model, self.robot_data) + + mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff) + torque[robot_joint_ids] += tau_ff + + # Gravity Compensation and Coriolis and Centrifugal force + torque[robot_joint_ids] += self.robot_data.qfrc_bias + + torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids], + self.robot_model.actuator_ctrlrange[:, 0]), + self.robot_model.actuator_ctrlrange[:, 1]) + + if self.debug: + self.controller_record.append( + np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk])) + + return torque + + def _interpolate_trajectory(self, interp_order, action, i=0): + tf = self.dt + prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] + prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] + prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] + if interp_order == 1 and action.ndim == 1: + coef = np.array([[1, 0], [1, tf]]) + results = np.vstack([prev_pos, action]) + elif interp_order == 2 and action.ndim == 1: + coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]]) + if np.linalg.norm(action - prev_pos) < 1e-3: + prev_vel = np.zeros_like(prev_vel) + results = np.vstack([prev_pos, action, prev_vel]) + elif interp_order == 3 and action.shape[0] == 2: + coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]]) + results = np.vstack([prev_pos, action[0], prev_vel, action[1]]) + elif interp_order == 4 and action.shape[0] == 2: + coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4], + [0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3], + [0, 0, 2, 0, 0]]) + results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc]) + elif interp_order == 5 and action.shape[0] == 3: + coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5], + [0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4], + [0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]]) + results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]]) + elif interp_order == -1: + # Interpolate position and velocity linearly + pass + else: + raise ValueError("Undefined interpolator order or the action dimension does not match!") + + if interp_order > 0: + A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints) + y = results.reshape(-2, order='F') + weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1) + weights_d = np.polynomial.polynomial.polyder(weights, axis=1) + weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1) + elif interp_order == -1: + weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T + weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T + weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1) + + if interp_order in [3, 4, 5]: + self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep + else: + self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf + + self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T) + self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T) + self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T) + + for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps): + q = np.polynomial.polynomial.polyval(t, weights.T) + qd = np.polynomial.polynomial.polyval(t, weights_d.T) + qdd = np.polynomial.polynomial.polyval(t, weights_dd.T) + yield q, qd, qdd + + def reset(self, obs=None): + obs = super(PositionControl, self).reset(obs) + self.prev_pos = self._data.qpos[self.actuator_joint_ids] + self.prev_vel = self._data.qvel[self.actuator_joint_ids] + self.prev_acc = np.zeros(len(self.actuator_joint_ids)) + self.i_error = np.zeros(len(self.actuator_joint_ids)) + self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids] + + if self.debug: + self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps) + return obs + + def _step_init(self, obs, action): + super(PositionControl, self)._step_init(obs, action) + + if self.n_agents == 1: + self.traj = self._create_traj(self.interp_order[0], action) + else: + def _traj(): + traj_1 = self._create_traj(self.interp_order[0], action[0], 0) + traj_2 = self._create_traj(self.interp_order[1], action[1], 1) + + for a1, a2 in zip(traj_1, traj_2): + yield np.hstack([a1, a2]) + + self.traj = _traj() + + def _create_traj(self, interp_order, action, i=0): + if interp_order is None: + return iter(action) + return self._interpolate_trajectory(interp_order, action, i) + + def _compute_action(self, obs, action): + cur_pos, cur_vel = self.get_joints(obs) + + desired_pos, desired_vel, desired_acc = next(self.traj) + + return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel) + + def _preprocess_action(self, action): + action = super(PositionControl, self)._preprocess_action(action) + + if self.n_agents == 1: + assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \ + f" {action.shape}" + else: + for i in range(self.n_agents): + assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \ + f" {action[i].shape}" + + return action + + +class PositionControlIIWA(PositionControl): + def __init__(self, *args, **kwargs): + p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.] + d_gain = [60, 80, 60, 30, 10, 1, 0.5] + i_gain = [0, 0, 0, 0, 0, 0, 0] + + super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs) + + +class PositionControlPlanar(PositionControl): + def __init__(self, *args, **kwargs): + p_gain = [960, 480, 240] + d_gain = [60, 20, 4] + i_gain = [0, 0, 0] + super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs) + + +class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit): + pass + + +class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend): + pass + + +class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit): + pass + +class IiwaPositionHitAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyHitAirhocKIT2023): + pass + +class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend): + pass + +class IiwaPositionDefendAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyDefendAirhocKIT2023): + pass + +class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament): + pass diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py new file mode 100644 index 0000000..1bcca25 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py @@ -0,0 +1,4 @@ +from .env_base import AirHockeyBase +from .tournament import AirHockeyTournament +from .hit import AirHockeyHit, AirHockeyHitAirhocKIT2023 +from .defend import AirHockeyDefend, AirHockeyDefendAirhocKIT2023 \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py new file mode 100644 index 0000000..18f7f41 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py @@ -0,0 +1,114 @@ +import numpy as np +from gymnasium import spaces + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle +from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian + +class AirhocKIT2023BaseEnv(AirHockeySingle): + def __init__(self, noise=False, **kwargs): + super().__init__(**kwargs) + obs_low = np.hstack([[-np.inf] * 37]) + obs_high = np.hstack([[np.inf] * 37]) + self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64) + self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6)) + self.noise = noise + + # We don't need puck yaw observations + def filter_obs(self, obs): + obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]]) + return obs + + def add_noise(self, obs): + if not self.noise: + return + obs[self.env_info["puck_pos_ids"]] += np.random.normal(0, 0.001, 3) + obs[self.env_info["puck_vel_ids"]] += np.random.normal(0, 0.1, 3) + + def reset(self): + self.last_acceleration = np.repeat(0., 6) + obs = super().reset() + self.add_noise(obs) + self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1] + self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1] + + self.last_planned_world_pos = self._fk(self.interp_pos) + obs = np.hstack([ + obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos + ]) + return self.filter_obs(obs) + + def step(self, action): + action /= 10 + + new_vel = self.interp_vel + action + + jerk = 2 * (new_vel - self.interp_vel - self.last_acceleration * 0.02) / (0.02 ** 2) + new_pos = self.interp_pos + self.interp_vel * 0.02 + (1/2) * self.last_acceleration * (0.02 ** 2) + (1/6) * jerk * (0.02 ** 3) + abs_action = np.vstack([np.hstack([new_pos, 0]), np.hstack([new_vel, 0])]) + + self.interp_pos = new_pos + self.interp_vel = new_vel + self.last_acceleration += jerk * 0.02 + + obs, rew, done, info = super().step(abs_action) + self.add_noise(obs) + self.last_planned_world_pos = self._fk(self.interp_pos) + obs = np.hstack([ + obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos + ]) + + fatal_rew = self.check_fatal(obs) + if fatal_rew != 0: + return self.filter_obs(obs), fatal_rew, True, info + + return self.filter_obs(obs), rew, done, info + + def check_constraints(self, constraint_values): + fatal_rew = 0 + + j_pos_constr = constraint_values["joint_pos_constr"] + if j_pos_constr.max() > 0: + fatal_rew += j_pos_constr.max() + + j_vel_constr = constraint_values["joint_vel_constr"] + if j_vel_constr.max() > 0: + fatal_rew += j_vel_constr.max() + + ee_constr = constraint_values["ee_constr"] + if ee_constr.max() > 0: + fatal_rew += ee_constr.max() + + link_constr = constraint_values["link_constr"] + if link_constr.max() > 0: + fatal_rew += link_constr.max() + + return -fatal_rew + + def check_fatal(self, obs): + fatal_rew = 0 + + q = obs[self.env_info["joint_pos_ids"]] + qd = obs[self.env_info["joint_vel_ids"]] + constraint_values_obs = self.env_info["constraints"].fun(q, qd) + fatal_rew += self.check_constraints(constraint_values_obs) + + return -fatal_rew + + def _fk(self, pos): + res, _ = forward_kinematics(self.env_info["robot"]["robot_model"], + self.env_info["robot"]["robot_data"], pos) + return res.astype(np.float32) + + def _ik(self, world_pos, init_q=None): + success, pos = inverse_kinematics(self.env_info["robot"]["robot_model"], + self.env_info["robot"]["robot_data"], + world_pos, + initial_q=init_q) + pos = pos.astype(np.float32) + assert success + return pos + + def _jacobian(self, pos): + return jacobian(self.env_info["robot"]["robot_model"], + self.env_info["robot"]["robot_data"], + pos).astype(np.float32) diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py new file mode 100644 index 0000000..4df9f60 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py @@ -0,0 +1,166 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle +from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv + + +class AirHockeyDefend(AirHockeySingle): + """ + Class for the air hockey defending task. + The agent should stop the puck at the line x=-0.6. + """ + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + self.init_velocity_range = (1, 3) + self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + def setup(self, obs): + puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0] + + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-0.5, 0.5) + + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-10, 10) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super().setup(obs) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, state): + puck_pos, puck_vel = self.get_puck(state) + # If puck is over the middle line and moving towards opponent + if puck_pos[0] > 0 and puck_vel[0] > 0: + return True + if np.linalg.norm(puck_vel[:2]) < 0.1: + return True + return super().is_absorbing(state) + +class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv): + def __init__(self, gamma=0.99, horizon=200, viewer_params={}, **kwargs): + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs) + self.init_velocity_range = (1, 3) + self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]]) # Table Frame + self._setup_metrics() + + def setup(self, obs): + self._setup_metrics() + puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0] + + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-0.5, 0.5) + + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-10, 10) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super().setup(obs) + + def reset(self, *args): + obs = super().reset() + self.hit_step_flag = False + self.hit_step = False + self.received_hit_reward = False + self.give_reward_next = False + return obs + + def _setup_metrics(self): + self.episode_steps = 0 + self.has_hit = False + + def _simulation_post_step(self): + if not self.has_hit: + self.has_hit = self._check_collision("puck", "robot_1/ee") + + super()._simulation_post_step() + + def _step_finalize(self): + self.episode_steps += 1 + return super()._step_finalize() + + def reward(self, state, action, next_state, absorbing): + puck_pos, puck_vel = self.get_puck(next_state) + ee_pos, _ = self.get_ee() + rew = 0.01 + if -0.7 < puck_pos[0] <= -0.2 and np.linalg.norm(puck_vel[:2]) < 0.1: + assert absorbing + rew += 70 + + if self.has_hit and not self.hit_step_flag: + self.hit_step_flag = True + self.hit_step = True + else: + self.hit_step = False + + f = lambda puck_vel: 30 + 100 * (100 ** (-0.25 * np.linalg.norm(puck_vel[:2]))) + if not self.give_reward_next and not self.received_hit_reward and self.hit_step and ee_pos[0] < puck_pos[0]: + self.hit_this_step = True + if np.linalg.norm(puck_vel[:2]) < 0.1: + return rew + f(puck_vel) + self.give_reward_next = True + return rew + + if not self.received_hit_reward and self.give_reward_next: + self.received_hit_reward = True + if puck_vel[0] >= -0.2: + return rew + f(puck_vel) + return rew + else: + return rew + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # If puck is over the middle line and moving towards opponent + if puck_pos[0] > 0 and puck_vel[0] > 0: + return True + + if self.episode_steps == self._mdp_info.horizon: + return True + + if np.linalg.norm(puck_vel[:2]) < 0.1: + return True + return super().is_absorbing(obs) + + +if __name__ == '__main__': + env = AirHockeyDefend() + + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() + env.render() + while True: + # action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8 + action = np.zeros(7) + observation, reward, done, info = env.step(action) + env.render() + print(observation) + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py new file mode 100644 index 0000000..fef5394 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py @@ -0,0 +1,258 @@ +import os + +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path +from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin +from mushroom_rl.environments.mujoco import MuJoCo, ObservationType +from mushroom_rl.utils.spaces import Box + + +""" + Abstract class for all AirHockey Environments. + +""" +class AirHockeyBase(MuJoCo): + def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1, + n_agents=1, viewer_params={}): + + """ + Constructor. + + Args: + n_agents (int, 1): number of agent to be used in the environment (one or two) + """ + + self.n_agents = n_agents + + action_spec = [] + observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + collision_spec = [("puck", ["puck"]), + ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]), + ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])] + + if 1 <= self.n_agents <= 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml") + + action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5", + "iiwa_1/joint_6", "iiwa_1/joint_7"] + + observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS), + ("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS), + ("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS), + ("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS), + ("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS), + ("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS), + ("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS), + ("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL), + ("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL), + ("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL), + ("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL), + ("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL), + ("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL), + ("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)] + + additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS), + ("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS), + ("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL), + ("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL), + ("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS), + ("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL), + ("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)] + + collision_spec += [("robot_1/ee", ["iiwa_1/ee"])] + + if self.n_agents == 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml") + + observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)] + + action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", + "iiwa_2/joint_5", + "iiwa_2/joint_6", "iiwa_2/joint_7"] + + observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL), + ("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS), + ("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS), + ("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS), + ("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS), + ("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS), + ("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS), + ("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS), + ("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL), + ("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL), + ("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL), + ("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL), + ("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL), + ("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL), + ("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)] + + observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)] + + additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS), + ("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS), + ("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL), + ("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL), + ("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS), + ("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL), + ("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)] + + collision_spec += [("robot_2/ee", ["iiwa_2/ee"])] + else: + raise ValueError('n_agents should be 1 or 2') + + self.env_info = dict() + self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25} + self.env_info['puck'] = {"radius": 0.03165} + self.env_info['mallet'] = {"radius": 0.04815} + self.env_info['n_agents'] = self.n_agents + self.env_info['robot'] = { + "n_joints": 7, + "ee_desired_height": 0.1645, + "joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135], + [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi, + "joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135], + [85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10, + "base_frame": [], + "universal_height": 0.0645, + "control_frequency": 50, + } + + self.env_info['puck_pos_ids'] = [0, 1, 2] + self.env_info['puck_vel_ids'] = [3, 4, 5] + self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12] + self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19] + if self.n_agents == 2: + self.env_info['opponent_ee_ids'] = [20, 21, 22] + else: + self.env_info['opponent_ee_ids'] = [] + + max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents + + super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps, + n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params) + + # Construct the mujoco model at origin + robot_model = mujoco.MjModel.from_xml_path( + os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml")) + robot_model.body('iiwa_1/base').pos = np.zeros(3) + robot_data = mujoco.MjData(robot_model) + + # Add env_info that requires mujoco models + self.env_info['dt'] = self.dt + self.env_info["robot"]["joint_pos_limit"] = np.array( + [self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T + self.env_info["robot"]["robot_model"] = robot_model + self.env_info["robot"]["robot_data"] = robot_data + self.env_info["rl_info"] = self.info + + frame_T = np.eye(4) + temp = np.zeros((9, 1)) + mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("iiwa_1/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + if self.n_agents == 2: + mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("iiwa_2/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + # Ids of the joint, which are controller by the action space + self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec] + + self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info) + + def _modify_mdp_info(self, mdp_info): + obs_low = np.array([0, -1, -np.pi, -20., -20., -100, + *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][0]]) + obs_high = np.array([3.02, 1, np.pi, 20., 20., 100, + *np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][1]]) + if self.n_agents == 2: + obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]]) + obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]]) + mdp_info.observation_space = Box(obs_low, obs_high) + return mdp_info + + def _simulation_pre_step(self): + self.universal_joint_plugin.update() + + def is_absorbing(self, obs): + boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2 + puck_pos, puck_vel = self.get_puck(obs) + + if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100: + return True + return False + + @staticmethod + def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'): + if type == 'pose': + puck_w = np.eye(4) + puck_w[:2, 3] = puck_in[:2] + puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix() + + puck_r = np.linalg.inv(robot_frame) @ puck_w + puck_out = np.concatenate([puck_r[:2, 3], + R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]]) + + if type == 'vel': + rot_mat = robot_frame[:3, :3] + + vel_lin = np.array([*puck_in[:2], 0]) + vel_ang = np.array([0., 0., puck_in[2]]) + + vel_lin_r = rot_mat.T @ vel_lin + vel_ang_r = rot_mat.T @ vel_ang + + puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]]) + return puck_out + + def get_puck(self, obs): + """ + Getting the puck properties from the observations + Args: + obs: The current observation + + Returns: + ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel]) + + """ + puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"), + self.obs_helper.get_from_obs(obs, "puck_y_pos"), + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")]) + puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"), + self.obs_helper.get_from_obs(obs, "puck_y_vel"), + self.obs_helper.get_from_obs(obs, "puck_yaw_vel")]) + return puck_pos, puck_vel + + def get_ee(self): + raise NotImplementedError + + def get_joints(self, obs): + raise NotImplementedError diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py new file mode 100644 index 0000000..54a5252 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py @@ -0,0 +1,171 @@ +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase +from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics + +class AirHockeyDouble(AirHockeyBase): + """ + Base class for two agents air hockey tasks. + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params) + + self._compute_init_state() + + self.filter_ratio = 0.274 + self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"]) + self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"]) + + def _compute_init_state(self): + init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.]) + + success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'], + self.env_info['robot']['robot_data'], + np.array([0.65, 0., 0.1645]), + R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(), + initial_q=init_state) + + assert success is True + + def get_ee(self, robot=1): + """ + Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics + on the current joint position, this function exists to avoid redundant computations. + Args: + robot: ID of robot, either 1 or 2 + + Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z]) + """ + ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos") + + ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel") + + return ee_pos, ee_vel + + def get_joints(self, obs, agent=None): + """ + Get joint position and velocity of the robots + Can choose the robot with agent = 1 / 2. If agent is None both are returned + """ + if agent: + q_pos = np.zeros(7) + q_vel = np.zeros(7) + for i in range(7): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0] + else: + q_pos = np.zeros(14) + q_vel = np.zeros(14) + for i in range(7): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0] + + q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0] + q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0] + + return q_pos, q_vel + + def _create_observation(self, obs): + # Filter the joint velocity + q_pos, q_vel = self.get_joints(obs) + q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev + self.q_pos_prev = q_pos + self.q_vel_prev = q_vel_filter + + for i in range(7): + self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i] + self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7] + + # Wrap puck's rotation angle to [-pi, pi) + yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos") + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi + return obs + + def _modify_observation(self, obs): + new_obs = obs.copy() + + puck_pos, puck_vel = self.get_puck(new_obs) + + puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0]) + puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel') + + self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2] + + self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2] + + opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos') + self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \ + (np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3] + + puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1]) + puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel') + + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2] + + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1] + self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2] + + opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos') + self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \ + (np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3] + + return new_obs + + def setup(self, obs): + for i in range(7): + self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i] + self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i] + + self.q_pos_prev[i] = self.init_state[i] + self.q_pos_prev[i + 7] = self.init_state[i] + self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0] + self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0] + + self.universal_joint_plugin.reset() + + super().setup(obs) + # Update body positions, needed for _compute_universal_joint + mujoco.mj_fwdPosition(self._model, self._data) + + def reward(self, state, action, next_state, absorbing): + return 0 + + +def main(): + env = AirHockeyDouble(viewer_params={'start_paused': True}) + env.reset() + env.render() + R = 0. + J = 0. + gamma = 1. + steps = 0 + while True: + action = np.zeros(14) + observation, reward, done, info = env.step(action) + env.render() + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() + + +if __name__ == '__main__': + main() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py new file mode 100644 index 0000000..accfc83 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py @@ -0,0 +1,104 @@ +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase +from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics + +class AirHockeySingle(AirHockeyBase): + """ + Base class for single agent air hockey tasks. + + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params) + + self._compute_init_state() + + self.filter_ratio = 0.274 + self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"]) + self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"]) + + def _compute_init_state(self): + init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.]) + + success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'], + self.env_info['robot']['robot_data'], + np.array([0.65, 0., 0.1645]), + R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(), + initial_q=init_state) + + assert success is True + + def get_ee(self): + """ + Getting the ee properties from the current internal state. Can also be obtained via forward kinematics + on the current joint position, this function exists to avoid redundant computations. + + Returns: + ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z]) + """ + ee_pos = self._read_data("robot_1/ee_pos") + + ee_vel = self._read_data("robot_1/ee_vel") + + return ee_pos, ee_vel + + def get_joints(self, obs): + """ + Get joint position and velocity of the robot + """ + q_pos = np.zeros(7) + q_vel = np.zeros(7) + for i in range(7): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0] + + return q_pos, q_vel + + def _create_observation(self, obs): + # Filter the joint velocity + q_pos, q_vel = self.get_joints(obs) + q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev + self.q_pos_prev = q_pos + self.q_vel_prev = q_vel_filter + + for i in range(7): + self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i] + + yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos") + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi + return obs + + def _modify_observation(self, obs): + new_obs = obs.copy() + puck_pos, puck_vel = self.get_puck(new_obs) + + puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0]) + + puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel') + + self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2] + + self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2] + + return new_obs + + def setup(self, obs): + for i in range(7): + self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i] + self.q_pos_prev[i] = self.init_state[i] + self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0] + + self.universal_joint_plugin.reset() + + super().setup(obs) + + # Update body positions, needed for _compute_universal_joint + mujoco.mj_fwdPosition(self._model, self._data) diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py new file mode 100644 index 0000000..87fc826 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py @@ -0,0 +1,159 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle +from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv + + +class AirHockeyHit(AirHockeySingle): + """ + Class for the air hockey hitting task. + """ + def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}): + """ + Constructor + Args: + opponent_agent(Agent, None): Agent which controls the opponent + moving_init(bool, False): If true, initialize the puck with inital velocity. + """ + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + self.moving_init = moving_init + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + self.init_velocity_range = (0, 0.5) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + + + def setup(self, obs): + # Initial position of the puck + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + + if self.moving_init: + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1) + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-2, 2) + + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super(AirHockeyHit, self).setup(obs) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # Stop if the puck bounces back on the opponents wall + if puck_pos[0] > 0 and puck_vel[0] < 0: + return True + return super(AirHockeyHit, self).is_absorbing(obs) + +class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv): + def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}, **kwargs): + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs) + + self.moving_init = moving_init + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + self.init_velocity_range = (0, 0.5) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + self._setup_metrics() + + def reset(self, *args): + obs = super().reset() + self.last_ee_pos = self.last_planned_world_pos.copy() + self.last_ee_pos[0] -= 1.51 + return obs + + def setup(self, obs): + self._setup_metrics() + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + + if self.moving_init: + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1) + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-2, 2) + + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super().setup(obs) + + def _setup_metrics(self): + self.episode_steps = 0 + self.has_scored = False + + def _step_finalize(self): + cur_obs = self._create_observation(self.obs_helper._build_obs(self._data)) + puck_pos, _ = self.get_puck(cur_obs) # world frame [x, y, z] and [x', y', z'] + + if not self.has_scored: + boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2 + self.has_scored = np.any(np.abs(puck_pos[:2]) > boundary) and puck_pos[0] > 0 + + self.episode_steps += 1 + return super()._step_finalize() + + def reward(self, state, action, next_state, absorbing): + rew = 0 + puck_pos, puck_vel = self.get_puck(next_state) + ee_pos, _ = self.get_ee() + ee_vel = (ee_pos - self.last_ee_pos) / 0.02 + self.last_ee_pos = ee_pos + + if puck_vel[0] < 0.25 and puck_pos[0] < 0: + ee_puck_dir = (puck_pos - ee_pos)[:2] + ee_puck_dir = ee_puck_dir / np.linalg.norm(ee_puck_dir) + rew += 1 * max(0, np.dot(ee_puck_dir, ee_vel[:2])) + else: + rew += 10 * np.linalg.norm(puck_vel[:2]) + + if self.has_scored: + rew += 2000 + 5000 * np.linalg.norm(puck_vel[:2]) + + return rew + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # Stop if the puck bounces back on the opponents wall + if puck_pos[0] > 0 and puck_vel[0] < 0: + return True + + if self.has_scored: + return True + + if self.episode_steps == self._mdp_info.horizon: + return True + + return super().is_absorbing(obs) + + +if __name__ == '__main__': + env = AirHockeyHit(moving_init=True) + env.reset() + + steps = 0 + while True: + action = np.zeros(7) + + observation, reward, done, info = env.step(action) + env.render() + if done or steps > env.info.horizon: + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py new file mode 100644 index 0000000..49713a0 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py @@ -0,0 +1,111 @@ +import mujoco +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble + + +class AirHockeyTournament(AirHockeyDouble): + """ + Class for the air hockey tournament. Consists of 2 robots which should play against each other. + When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty. + If a player accumulates 3 penalties his score is reduced by 1. + """ + def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"): + self.agent_name = agent_name + self.opponent_name = opponent_name + + self.score = [0, 0] + self.faults = [0, 0] + self.start_side = None + + self.timer = 0 + + def custom_render_callback(viewport, context): + names = f"Agents \nScores \nFaults " + data = f"{self.agent_name} - {self.opponent_name}\n " + data += f"{self.score[0]} - {self.score[1]}\n " + data += f"{self.faults[0]} - {self.faults[1]}" + mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context) + + viewer_params["custom_render_callback"] = custom_render_callback + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + + def setup(self, obs): + if self.start_side == None: + self.start_side = np.random.choice([1, -1]) + self.prev_side = self.start_side + + # Initial position of the puck + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + self._write_data("puck_x_pos", puck_pos[0] * self.start_side) + self._write_data("puck_y_pos", puck_pos[1]) + + self.prev_side = self.start_side + self.timer = 0 + + super(AirHockeyTournament, self).setup(obs) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + + # Puck stuck on one side for more than 15s + if np.sign(puck_pos[0]) == self.prev_side: + self.timer += self.dt + else: + self.prev_side *= -1 + self.timer = 0 + + if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15: + if self.prev_side == -1: + self.faults[0] += 1 + self.start_side = -1 + if self.faults[0] % 3 == 0: + self.score[1] += 1 + else: + self.faults[1] += 1 + self.start_side = 1 + if self.faults[1] % 3 == 0: + self.score[0] += 1 + + return True + + # Puck in Goal + if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0: + if puck_pos[0] > self.env_info['table']['length'] / 2: + self.score[0] += 1 + self.start_side = -1 + return True + + if puck_pos[0] < -self.env_info['table']['length'] / 2: + self.score[1] += 1 + self.start_side = 1 + return True + + # Puck stuck in the middle + if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025: + return True + return super(AirHockeyTournament, self).is_absorbing(obs) + + +if __name__ == '__main__': + env = AirHockeyTournament() + env.reset() + + steps = 0 + while True: + action = np.zeros(14) + steps += 1 + observation, reward, done, info = env.step(action) + env.render() + if done or steps > env.info.horizon: + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py new file mode 100644 index 0000000..db285f1 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py @@ -0,0 +1,3 @@ +from .env_base import AirHockeyBase +from .defend import AirHockeyDefend +from .hit import AirHockeyHit \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py new file mode 100644 index 0000000..1f1224f --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py @@ -0,0 +1,76 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle + + +class AirHockeyDefend(AirHockeySingle): + """ + Class for the air hockey defending task. + The agent should stop the puck at the line x=-0.6. + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + self.init_velocity_range = (1, 3) + + self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + def setup(self, state=None): + # Set initial puck parameters + puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0] + + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-0.5, 0.5) + + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-10, 10) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super(AirHockeyDefend, self).setup(state) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, state): + puck_pos, puck_vel = self.get_puck(state) + # If puck is over the middle line and moving towards opponent + if puck_pos[0] > 0 and puck_vel[0] > 0: + return True + if np.linalg.norm(puck_vel[:2]) < 0.1: + return True + return super().is_absorbing(state) + + +if __name__ == '__main__': + env = AirHockeyDefend() + + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() + while True: + action = np.zeros(3) + observation, reward, done, info = env.step(action) + env.render() + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py new file mode 100644 index 0000000..d9747f3 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py @@ -0,0 +1,224 @@ +import os + +import mujoco +import numpy as np +from scipy.spatial.transform import Rotation as R + +from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path +from mushroom_rl.environments.mujoco import MuJoCo, ObservationType +from mushroom_rl.utils.spaces import Box + + +class AirHockeyBase(MuJoCo): + """ + Abstract class for all AirHockey Environments. + + """ + + def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1, + n_agents=1, viewer_params={}): + """ + Constructor. + + Args: + n_agents (int, 1): number of agent to be used in the environment (one or two) + """ + + self.n_agents = n_agents + + action_spec = [] + + observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)] + + collision_spec = [("puck", ["puck"]), + ("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]), + ("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])] + + if 1 <= self.n_agents <= 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml") + + action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"] + + observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS), + ("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS), + ("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS), + ("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL), + ("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL), + ("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)] + + additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS), + ("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)] + + collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])] + + if self.n_agents == 2: + scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml") + + observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)] + + action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"] + # Add puck pos/vel again to transform into second agents frame + observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS), + ("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS), + ("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS), + ("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL), + ("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL), + ("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL), + ("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS), + ("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS), + ("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS), + ("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL), + ("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL), + ("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)] + + observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)] + + additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS), + ("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)] + + collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])] + else: + raise ValueError('n_agents should be 1 or 2') + + self.env_info = dict() + self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25} + self.env_info['puck'] = {"radius": 0.03165} + self.env_info['mallet'] = {"radius": 0.04815} + self.env_info['n_agents'] = self.n_agents + self.env_info['robot'] = { + "n_joints": 3, + "ee_desired_height": 0.1, + "joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3], + [np.pi / 2, np.pi / 2, np.pi * 2 / 3]]), + + "joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi], + [2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]), + "base_frame": [], + "control_frequency": 50, + } + + self.env_info['puck_pos_ids'] = [0, 1, 2] + self.env_info['puck_vel_ids'] = [3, 4, 5] + self.env_info['joint_pos_ids'] = [6, 7, 8] + self.env_info['joint_vel_ids'] = [9, 10, 11] + if self.n_agents == 2: + self.env_info['opponent_ee_ids'] = [13, 14, 15] + else: + self.env_info['opponent_ee_ids'] = [] + + max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents + + super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps, + n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params) + + # Construct the mujoco model at origin + robot_model = mujoco.MjModel.from_xml_path( + os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml")) + robot_model.body('planar_robot_1/base').pos = np.zeros(3) + robot_data = mujoco.MjData(robot_model) + + # Add env_info that requires mujoco models + self.env_info['dt'] = self.dt + self.env_info["robot"]["joint_pos_limit"] = np.array( + [self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T + self.env_info["robot"]["robot_model"] = robot_model + self.env_info["robot"]["robot_data"] = robot_data + self.env_info["rl_info"] = self.info + + frame_T = np.eye(4) + temp = np.zeros((9, 1)) + mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + if self.n_agents == 2: + mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat) + frame_T[:3, :3] = temp.reshape(3, 3) + frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos + self.env_info['robot']['base_frame'].append(frame_T.copy()) + + # Ids of the joint, which are controller by the action space + self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec] + + def _modify_mdp_info(self, mdp_info): + obs_low = np.array([0, -1, -np.pi, -20., -20., -100, + *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][0]]) + obs_high = np.array([3.02, 1, np.pi, 20., 20., 100, + *np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1] + for i in range(self.env_info['robot']['n_joints'])]), + *self.env_info['robot']['joint_vel_limit'][1]]) + if self.n_agents == 2: + obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]]) + obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]]) + mdp_info.observation_space = Box(obs_low, obs_high) + return mdp_info + + def is_absorbing(self, obs): + boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2 + puck_pos, puck_vel = self.get_puck(obs) + + if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100: + return True + return False + + @staticmethod + def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'): + if type == 'pose': + puck_w = np.eye(4) + puck_w[:2, 3] = puck_in[:2] + puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix() + + puck_r = np.linalg.inv(robot_frame) @ puck_w + puck_out = np.concatenate([puck_r[:2, 3], + R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]]) + + if type == 'vel': + rot_mat = robot_frame[:3, :3] + + vel_lin = np.array([*puck_in[:2], 0]) + vel_ang = np.array([0., 0., puck_in[2]]) + + vel_lin_r = rot_mat.T @ vel_lin + vel_ang_r = rot_mat.T @ vel_ang + + puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]]) + return puck_out + + def get_puck(self, obs): + """ + Getting the puck properties from the observations + Args: + obs: The current observation + + Returns: + ([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel]) + + """ + puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"), + self.obs_helper.get_from_obs(obs, "puck_y_pos"), + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")]) + puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"), + self.obs_helper.get_from_obs(obs, "puck_y_vel"), + self.obs_helper.get_from_obs(obs, "puck_yaw_vel")]) + return puck_pos, puck_vel + + def get_ee(self): + raise NotImplementedError + + def get_joints(self, obs): + raise NotImplementedError \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py new file mode 100644 index 0000000..4dfa117 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py @@ -0,0 +1,91 @@ +import mujoco +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase + + +class AirHockeySingle(AirHockeyBase): + """ + Base class for single agent air hockey tasks. + + """ + + def __init__(self, gamma=0.99, horizon=500, viewer_params={}): + + """ + Constructor. + + """ + self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414]) + super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params) + + self.filter_ratio = 0.274 + self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"]) + self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"]) + + def get_ee(self): + """ + Getting the ee properties from the current internal state. Can also be obtained via forward kinematics + on the current joint position, this function exists to avoid redundant computations. + + Returns: + ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z]) + """ + ee_pos = self._read_data("robot_1/ee_pos") + + ee_vel = self._read_data("robot_1/ee_vel") + + return ee_pos, ee_vel + + def get_joints(self, obs): + """ + Get joint position and velocity of the robot + """ + q_pos = np.zeros(3) + q_vel = np.zeros(3) + for i in range(3): + q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0] + q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0] + + return q_pos, q_vel + + def _modify_observation(self, obs): + new_obs = obs.copy() + puck_pos, puck_vel = self.get_puck(obs) + + puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0]) + + puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel') + + self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2] + + self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0] + self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1] + self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2] + + return new_obs + + def setup(self, state=None): + for i in range(3): + self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i] + self.q_pos_prev[i] = self.init_state[i] + self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0] + + mujoco.mj_fwdPosition(self._model, self._data) + super().setup(state) + + def _create_observation(self, obs): + # Filter the joint velocity + q_pos, q_vel = self.get_joints(obs) + q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev + self.q_pos_prev = q_pos + self.q_vel_prev = q_vel_filter + + for i in range(3): + self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i] + + yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos") + self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi + return obs \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py new file mode 100644 index 0000000..6124fba --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py @@ -0,0 +1,84 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle + +class AirHockeyHit(AirHockeySingle): + """ + Class for the air hockey hitting task. + """ + + def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}): + """ + Constructor + Args: + moving_init(bool, False): If true, initialize the puck with inital velocity. + """ + super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) + + self.moving_init = moving_init + hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ + self.env_info['mallet']['radius'] * 2 + self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame + self.init_velocity_range = (0, 0.5) # Table Frame + self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame + + def setup(self, state=None): + # Initial position of the puck + puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] + + # self.init_state = np.array([-0.9273, 0.9273, np.pi / 2]) + + self._write_data("puck_x_pos", puck_pos[0]) + self._write_data("puck_y_pos", puck_pos[1]) + + if self.moving_init: + lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) + angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1) + puck_vel = np.zeros(3) + puck_vel[0] = -np.cos(angle) * lin_vel + puck_vel[1] = np.sin(angle) * lin_vel + puck_vel[2] = np.random.uniform(-2, 2, 1) + + self._write_data("puck_x_vel", puck_vel[0]) + self._write_data("puck_y_vel", puck_vel[1]) + self._write_data("puck_yaw_vel", puck_vel[2]) + + super(AirHockeyHit, self).setup(state) + + def reward(self, state, action, next_state, absorbing): + return 0 + + def is_absorbing(self, obs): + puck_pos, puck_vel = self.get_puck(obs) + # Stop if the puck bounces back on the opponents wall + if puck_pos[0] > 0 and puck_vel[0] < 0: + return True + return super(AirHockeyHit, self).is_absorbing(obs) + + +if __name__ == '__main__': + env = AirHockeyHit(moving_init=False) + + env.reset() + env.render() + R = 0. + J = 0. + gamma = 1. + steps = 0 + while True: + action = np.zeros(3) + + observation, reward, done, info = env.step(action) + env.render() + + gamma *= env.info.gamma + J += gamma * reward + R += reward + steps += 1 + if done or steps > env.info.horizon: + print("J: ", J, " R: ", R) + R = 0. + J = 0. + gamma = 1. + steps = 0 + env.reset() diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py new file mode 100644 index 0000000..64c297a --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/__init__.py @@ -0,0 +1,2 @@ +from .kinematics import inverse_kinematics, forward_kinematics, jacobian +from .transformations import robot_to_world, world_to_robot diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py new file mode 100644 index 0000000..2156d48 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py @@ -0,0 +1,248 @@ +import mujoco +import numpy as np + + +def forward_kinematics(mj_model, mj_data, q, link="ee"): + """ + Compute the forward kinematics of the robots. + + IMPORTANT: + For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet + parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations + where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the + case this function will return the wrong values. + + Coordinate System: + All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the + base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed + coordinate system + + Args: + mj_model (mujoco.MjModel): + mujoco MjModel of the robot-only model + mj_data (mujoco.MjData): + mujoco MjData object generated from the model + q (np.array): + joint configuration for which the forward kinematics are computed + link (string, "ee"): + Link for which the forward kinematics is calculated. When using the iiwas the choices are + ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"] + + Returns + ------- + position: numpy.ndarray, (3,) + Position of the link in robot's base frame + orientation: numpy.ndarray, (3, 3) + Orientation of the link in robot's base frame + """ + + return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data) + + +def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"): + """ + Compute the inverse kinematics of the robots. + + IMPORTANT: + For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet + parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations + where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is + the case this function will return the wrong values. + + Coordinate System: + All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the + base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed + coordinate system + + Args: + mj_model (mujoco.MjModel): + mujoco MjModel of the robot-only model + mj_data (mujoco.MjData): + mujoco MjData object generated from the model + desired_position (numpy.ndarray, (3,)): + The desired position of the selected link. + desired_rotation (optional, numpy.array, (3,3)): + The desired rotation of the selected link. + initial_q (numpy.ndarray, None): + The initial configuration of the algorithm, if set to None it will take the initial configuration of the + mj_data. + link (str, "ee"): + Link for which the inverse kinematics is calculated. When using the iiwas the choices are + ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"] + """ + q_init = np.zeros(mj_model.nq) + if initial_q is None: + q_init = mj_data.qpos + else: + q_init[:initial_q.size] = initial_q + + q_l = mj_model.jnt_range[:, 0] + q_h = mj_model.jnt_range[:, 1] + lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2 + upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2 + + desired_quat = None + if desired_rotation is not None: + desired_quat = np.zeros(4) + mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1)) + + return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model, + mj_data, lower_limit, upper_limit) + + +def jacobian(mj_model, mj_data, q, link="ee"): + """ + Compute the Jacobian of the robots. + + IMPORTANT: + For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet + parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations + where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the + case this function will return the wrong values. + + Coordinate System: + All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the + base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed + coordinate system + + Args: + mj_model (mujoco.MjModel): + mujoco MjModel of the robot-only model + mj_data (mujoco.MjData): + mujoco MjData object generated from the model + q (numpy.ndarray): + joint configuration for which the forward kinematics are computed + link (string, "ee"): + Link for which the forward kinematics is calculated. When using the iiwas the choices are + ["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"] + + Returns + ------- + numpy.ndarray, (6, num_joints): + The Jacobian matrix for the robot kinematics. + """ + return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data) + + +def link_to_xml_name(mj_model, link): + try: + mj_model.body('iiwa_1/base') + link_to_frame_idx = { + "1": "iiwa_1/link_1", + "2": "iiwa_1/link_2", + "3": "iiwa_1/link_3", + "4": "iiwa_1/link_4", + "5": "iiwa_1/link_5", + "6": "iiwa_1/link_6", + "7": "iiwa_1/link_7", + "ee": "iiwa_1/striker_joint_link", + } + except: + link_to_frame_idx = { + "1": "planar_robot_1/body_1", + "2": "planar_robot_1/body_2", + "3": "planar_robot_1/body_3", + "ee": "planar_robot_1/body_ee", + } + return link_to_frame_idx[link] + + +def _mujoco_fk(q, name, model, data): + data.qpos[:len(q)] = q + mujoco.mj_fwdPosition(model, data) + return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy() + + +def _mujoco_jac(q, name, model, data): + data.qpos[:len(q)] = q + dtype = data.qpos.dtype + jac = np.empty((6, model.nv), dtype=dtype) + jac_pos, jac_rot = jac[:3], jac[3:] + mujoco.mj_fwdPosition(model, data) + mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id) + return jac + + +def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit): + IT_MAX = 1000 + eps = 1e-4 + damp = 1e-3 + progress_thresh = 20.0 + max_update_norm = 0.1 + rot_weight = 1 + i = 0 + + dtype = data.qpos.dtype + + data.qpos = initial_q + + neg_x_quat = np.empty(4, dtype=dtype) + error_x_quat = np.empty(4, dtype=dtype) + + if desired_pos is not None and desired_quat is not None: + jac = np.empty((6, model.nv), dtype=dtype) + err = np.empty(6, dtype=dtype) + jac_pos, jac_rot = jac[:3], jac[3:] + err_pos, err_rot = err[:3], err[3:] + else: + jac = np.empty((3, model.nv), dtype=dtype) + err = np.empty(3, dtype=dtype) + if desired_pos is not None: + jac_pos, jac_rot = jac, None + err_pos, err_rot = err, None + elif desired_quat is not None: + jac_pos, jac_rot = None, jac + err_pos, err_rot = None, err + else: + raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics") + + while True: + # forward kinematics + mujoco.mj_fwdPosition(model, data) + + x_pos = data.body(name).xpos + x_quat = data.body(name).xquat + + error_norm = 0 + if desired_pos is not None: + err_pos[:] = desired_pos - x_pos + error_norm += np.linalg.norm(err_pos) + + if desired_quat is not None: + mujoco.mju_negQuat(neg_x_quat, x_quat) + mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat) + mujoco.mju_quat2Vel(err_rot, error_x_quat, 1) + error_norm += np.linalg.norm(err_rot) * rot_weight + + if error_norm < eps: + success = True + break + if i >= IT_MAX: + success = False + break + + mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id) + + hess_approx = jac.T.dot(jac) + joint_delta = jac.T.dot(err) + + hess_approx += np.eye(hess_approx.shape[0]) * damp + update_joints = np.linalg.solve(hess_approx, joint_delta) + + update_norm = np.linalg.norm(update_joints) + + # Check whether we are still making enough progress, and halt if not. + progress_criterion = error_norm / update_norm + if progress_criterion > progress_thresh: + success = False + break + + if update_norm > max_update_norm: + update_joints *= max_update_norm / update_norm + + mujoco.mj_integratePos(model, data.qpos, update_joints, 1) + data.qpos = np.clip(data.qpos, lower_limit, upper_limit) + i += 1 + q_cur = data.qpos.copy() + + return success, q_cur diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py new file mode 100644 index 0000000..6a6ee84 --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/transformations.py @@ -0,0 +1,65 @@ +import numpy as np + + +def robot_to_world(base_frame, translation, rotation=None): + """ + Transform position or rotation optional from the robot base frame to the world frame + + Args + ---- + base_frame: numpy.ndarray, (4,4) + The transformation matrix from the world to robot base frame + translation: ndarray, (3,) + The 3D position to be transformed + rotation: optional, ndarray, (3, 3) + The rotation in the matrix form to be transformed + + Returns + ------- + position: ndarray (3,) + The transformed 3D position + rotation: ndarray (3, 3) + The transformed rotation in the matrix form + + """ + + target = np.eye(4) + target[:len(translation), 3] = translation + if rotation is not None: + target[:3, :3] = rotation + + target_frame = base_frame @ target + + return target_frame[:len(translation), 3], target_frame[:3, :3] + + +def world_to_robot(base_frame, translation, rotation=None): + """ + Transfrom position and rotation (optional) from the world frame to the robot's base frame + + Args + ---- + base_frame: ndarray, (4,4) + The transformation matrix from the world to robot base frame + translation: ndarray, (3,) + The 3D position to be transformed + rotation: optional, ndarray, (3, 3) + The rotation in the matrix form to be tranformed + + Returns + ------- + position: ndarray, (3,) + The transformed 3D position + rotation: ndarray, (3, 3) + The transformed rotation in the matrix form + + """ + + target = np.eye(4) + target[:len(translation), 3] = translation + if rotation is not None: + target[:3, :3] = rotation + + target_frame = np.linalg.inv(base_frame) @ target + + return target_frame[:len(translation), 3], target_frame[:3, :3] diff --git a/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py new file mode 100644 index 0000000..20b43ac --- /dev/null +++ b/fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py @@ -0,0 +1,120 @@ +import numpy as np + +from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics + + +class UniversalJointPlugin: + def __init__(self, env_model, env_data, env_info): + self.env_info = env_info + self.env_model = env_model + self.env_data = env_data + self.Kp = 20 + self.Kd = 0.31 + + self.universal_joint_ids = [] + self.universal_joint_ctrl_ids = [] + self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id, + env_model.joint("iiwa_1/striker_joint_2").id] + self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id, + env_model.actuator("iiwa_1/striker_joint_2").id] + action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5", + "iiwa_1/joint_6", "iiwa_1/joint_7"] + + if self.env_info['n_agents'] >= 2: + self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id, + env_model.joint("iiwa_2/striker_joint_2").id] + self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id, + env_model.actuator("iiwa_2/striker_joint_2").id] + action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5", + "iiwa_2/joint_6", "iiwa_2/joint_7"] + + self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec] + + self.filter_ratio = 0.273 + self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents']) + self.u_joint_pos_prev = None + self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents']) + + def reset(self): + self.u_joint_pos_prev = None + self._control_universal_joint() + for i in range(self.env_info['n_agents']): + self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids] + + self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des + + def update(self): + self._control_universal_joint() + + def _control_universal_joint(self): + self._compute_universal_joint() + self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids] + self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \ + (1 - self.filter_ratio) * self.u_joint_vel_prev + + Kp = 4 + Kd = 0.31 + torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev + self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque + + def _compute_universal_joint(self): + for i in range(self.env_info['n_agents']): + q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]] + # Have to exclude the puck joints + pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'], + self.env_info['robot']['robot_data'], q) + + v_x = rot_mat[:, 0] + v_y = rot_mat[:, 1] + + # The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T + # and the current y-axis. (0, 0, 1).T x v_y + x_desired = np.array([-v_y[1], v_y[0], 0]) + + # Find the signed angle from the current to the desired x-axis around the y-axis + # https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane + q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired) + if self.u_joint_pos_prev is not None: + if q1 - self.u_joint_pos_prev[0] > np.pi: + q1 -= np.pi * 2 + elif q1 - self.u_joint_pos_prev[0] < -np.pi: + q1 += np.pi * 2 + + # Rotate the X axis by the calculated amount + w = np.array([[0, -v_y[2], v_y[1]], + [v_y[2], 0, -v_y[0]], + [-v_y[1], v_y[0], 0]]) + + r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1)) + v_x_rotated = r @ v_x + + # The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current + # x-axis, which is already rotated around y. The negative is there because the x-axis is flipped. + # -((0, 0, 1).T x v_x)) + y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0]) + + # Find the signed angle from the current to the desired y-axis around the new rotated x-axis + q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired) + + if self.u_joint_pos_prev is not None: + if q2 - self.u_joint_pos_prev[1] > np.pi: + q2 -= np.pi * 2 + elif q2 - self.u_joint_pos_prev[1] < -np.pi: + q2 += np.pi * 2 + + alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95) + alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95) + + if self.u_joint_pos_prev is None: + self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x]) + else: + self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum( + 10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]), + -np.pi * 0.01), np.pi * 0.01) + + self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x]) + + return self.u_joint_pos_des + + def _cross_3d(self, a, b): + return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]]) diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 9b512a8..9ac03dd 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -6,6 +6,7 @@ from gymnasium.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat +from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import calculate_jerk_profile, calculate_mean_squared_jerk, calculate_dimensionless_jerk, calculate_maximum_jerk import mujoco @@ -110,6 +111,26 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): return obs, reward, terminated, truncated, infos + def calculate_smoothness_metrics(self, velocity_profile, dt): + """ + Calculates the smoothness metrics for the given velocity profile. + param velocity_profile: np.array + The array containing the movement velocity profile. + param dt: float + The sampling time interval of the data. + return mean_squared_jerk: float + The mean squared jerk estimate of the given movement's smoothness. + return maximum_jerk: float + The maximum jerk estimate of the given movement's smoothness. + return dimensionless_jerk: float + The dimensionless jerk estimate of the given movement's smoothness. + """ + jerk_profile = calculate_jerk_profile(velocity_profile, dt) + mean_squared_jerk = calculate_mean_squared_jerk(jerk_profile) + maximum_jerk = calculate_maximum_jerk(jerk_profile) + dimensionless_jerk = calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt) + return mean_squared_jerk, maximum_jerk, dimensionless_jerk + def reset_model(self): # rest box to initial position self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing) diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py index 0b1919e..d880421 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py @@ -51,3 +51,19 @@ def rot_to_quat(theta, axis): quant[0] = np.sin(theta / 2.) quant[1:] = np.cos(theta / 2.) * axis return quant + +def calculate_jerk_profile(velocity_profile, dt): + jerk = np.diff(velocity_profile, 2, 0) / pow(dt, 2) + return jerk + +def calculate_mean_squared_jerk(jerk_profile): + return np.mean(pow(jerk_profile, 2)) + +def calculate_maximum_jerk(jerk_profile): + return np.max(abs(jerk_profile)) + +def calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt): + sum_squared_jerk = np.sum(pow(jerk_profile, 2), 0) + duration = len(velocity_profile) * dt + peak_velocity = np.max(abs(velocity_profile), 0) + return np.mean(sum_squared_jerk * pow(duration, 3) / pow(peak_velocity, 2)) \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index ae431ab..96dd3a3 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -262,76 +262,100 @@ class HopperJumpEnv(HopperEnvCustomXML): return True return False -# # TODO is that needed? if so test it -# class HopperJumpStepEnv(HopperJumpEnv): -# -# def __init__(self, -# xml_file='hopper_jump.xml', -# forward_reward_weight=1.0, -# ctrl_cost_weight=1e-3, -# healthy_reward=1.0, -# height_weight=3, -# dist_weight=3, -# terminate_when_unhealthy=False, -# healthy_state_range=(-100.0, 100.0), -# healthy_z_range=(0.5, float('inf')), -# healthy_angle_range=(-float('inf'), float('inf')), -# reset_noise_scale=5e-3, -# exclude_current_positions_from_observation=False -# ): -# -# self._height_weight = height_weight -# self._dist_weight = dist_weight -# super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, -# healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, -# exclude_current_positions_from_observation) -# -# def step(self, action): -# self._steps += 1 -# -# self.do_simulation(action, self.frame_skip) -# -# height_after = self.get_body_com("torso")[2] -# site_pos_after = self.data.site('foot_site').xpos.copy() -# self.max_height = max(height_after, self.max_height) -# -# ctrl_cost = self.control_cost(action) -# healthy_reward = self.healthy_reward -# height_reward = self._height_weight * height_after -# goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0])) -# goal_dist_reward = -self._dist_weight * goal_dist -# dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward) -# -# rewards = dist_reward + healthy_reward -# costs = ctrl_cost -# done = False -# -# # This is only for logging the distance to goal when first having the contact -# has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False -# -# if not self.init_floor_contact: -# self.init_floor_contact = has_floor_contact -# if self.init_floor_contact and not self.has_left_floor: -# self.has_left_floor = not has_floor_contact -# if not self.contact_with_floor and self.has_left_floor: -# self.contact_with_floor = has_floor_contact -# -# if self.contact_dist is None and self.contact_with_floor: -# self.contact_dist = goal_dist -# -# ############################################################## -# -# observation = self._get_obs() -# reward = rewards - costs -# info = { -# 'height': height_after, -# 'x_pos': site_pos_after, -# 'max_height': copy.copy(self.max_height), -# 'goal': copy.copy(self.goal), -# 'goal_dist': goal_dist, -# 'height_rew': height_reward, -# 'healthy_reward': healthy_reward, -# 'healthy': copy.copy(self.is_healthy), -# 'contact_dist': copy.copy(self.contact_dist) or 0 -# } -# return observation, reward, done, info +class HopperJumpMarkovRew(HopperJumpEnv): + def step(self, action): + self._steps += 1 + + self.do_simulation(action, self.frame_skip) + + height_after = self.get_body_com("torso")[2] + # site_pos_after = self.data.get_site_xpos('foot_site') + site_pos_after = self.data.site('foot_site').xpos + self.max_height = max(height_after, self.max_height) + + has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False + + if not self.init_floor_contact: + self.init_floor_contact = has_floor_contact + if self.init_floor_contact and not self.has_left_floor: + self.has_left_floor = not has_floor_contact + if not self.contact_with_floor and self.has_left_floor: + self.contact_with_floor = has_floor_contact + + ctrl_cost = self.control_cost(action) + costs = ctrl_cost + terminated = False + truncated = False + + goal_dist = np.linalg.norm(site_pos_after - self.goal) + if self.contact_dist is None and self.contact_with_floor: + self.contact_dist = goal_dist + + rewards = 0 + if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP): + healthy_reward = self.healthy_reward + distance_reward = -goal_dist * self._dist_weight + height_reward = (self.max_height if self.sparse else height_after) * self._height_weight + contact_reward = -(self.contact_dist or 5) * self._contact_weight + rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward) + + observation = self._get_obs() + + # While loop to simulate the process after jump to make the task Markovian + if self.sparse and self.has_left_floor: + while self._steps < MAX_EPISODE_STEPS_HOPPERJUMP: + # Simulate to the end of the episode + self._steps += 1 + + try: + self.do_simulation(np.zeros_like(action), self.frame_skip) + except Exception as e: + print(e) + + height_after = self.get_body_com("torso")[2] + #site_pos_after = self.data.get_site_xpos('foot_site') + site_pos_after = self.data.site('foot_site').xpos + self.max_height = max(height_after, self.max_height) + + has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False + + if not self.init_floor_contact: + self.init_floor_contact = has_floor_contact + if self.init_floor_contact and not self.has_left_floor: + self.has_left_floor = not has_floor_contact + if not self.contact_with_floor and self.has_left_floor: + self.contact_with_floor = has_floor_contact + + ctrl_cost = self.control_cost(action) + costs = ctrl_cost + done = False + + goal_dist = np.linalg.norm(site_pos_after - self.goal) + if self.contact_dist is None and self.contact_with_floor: + self.contact_dist = goal_dist + + rewards = 0 + + # Task has reached the end, compute the sparse reward + done = True + healthy_reward = self.healthy_reward + distance_reward = -goal_dist * self._dist_weight + height_reward = (self.max_height if self.sparse else height_after) * self._height_weight + contact_reward = -(self.contact_dist or 5) * self._contact_weight + rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward) + + reward = rewards - costs + info = dict( + height=height_after, + x_pos=site_pos_after, + max_height=self.max_height, + goal=self.goal[:1], + goal_dist=goal_dist, + height_rew=self.max_height, + healthy_reward=self.healthy_reward, + healthy=self.is_healthy, + contact_dist=self.contact_dist or 0, + num_steps=self._steps, + has_left_floor=self.has_left_floor + ) + return observation, reward, terminated, truncated, info diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 216ca1f..e085c61 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -5,11 +5,12 @@ from gymnasium import utils, spaces from gymnasium.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force -from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, jnt_vel_low, jnt_vel_high import mujoco MAX_EPISODE_STEPS_TABLE_TENNIS = 350 +MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER = 300 CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]]) CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65], @@ -18,6 +19,9 @@ CONTEXT_BOUNDS_SWICHING = np.array([[-1.0, -0.65, -1.0, 0.], [-0.2, 0.65, -0.2, 0.65]]) +DEFAULT_ROBOT_INIT_POS = np.array([0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 1.5]) +DEFAULT_ROBOT_INIT_VEL = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) + class TableTennisEnv(MujocoEnv, utils.EzPickle): """ 7 DoF table tennis environment @@ -34,7 +38,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, goal_switching_step: int = None, - enable_artificial_wind: bool = False, **kwargs): + enable_artificial_wind: bool = False, + random_pos_scale: float = 0.0, + random_vel_scale: float = 0.0, + ): utils.EzPickle.__init__(**locals()) self._steps = 0 @@ -48,6 +55,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self._id_set = False + # initial robot state + self._random_pos_scale = random_pos_scale + self._random_vel_scale = random_vel_scale + # reward calculation self.ball_landing_pos = None self._goal_pos = np.zeros(2) @@ -156,7 +167,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): "num_steps": self._steps, } - terminated, truncated = self._terminated, False + terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS return self._get_obs(), reward, terminated, truncated, info @@ -183,8 +194,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) - self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5]) - self.data.qvel[:7] = np.zeros(7) + robot_init_pos, robot_init_vel = self.get_initial_robot_state() + + self.data.qpos[:7] = robot_init_pos + self.data.qvel[:7] = robot_init_vel mujoco.mj_forward(self.model, self.data) @@ -257,7 +270,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs, tau_bound, delay_bound): obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj penalty = self._get_traj_invalid_penalty(action, pos_traj, tau_bound, delay_bound) - return obs, penalty, True, False, { + return obs, penalty, False, True, { "hit_ball": [False], "ball_returned_success": [False], "land_dist_error": [10.], @@ -274,6 +287,179 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): return False, pos_traj, vel_traj return True, pos_traj, vel_traj +class TableTennisMarkovian(TableTennisEnv): + def _get_reward2(self, hit_now, land_now): + + # Phase 1 not hit ball + if not self._hit_ball: + # Not hit ball + min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1)) + return 0.005 * (1 - np.tanh(min_r_b_dist**2)) + + # Phase 2 hit ball now + elif self._hit_ball and hit_now: + return 2 + + # Phase 3 hit ball already and not land yet + elif self._hit_ball and self._ball_landing_pos is None: + min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1)) + return 0.02 * (1 - np.tanh(min_b_des_b_dist**2)) + + # Phase 4 hit ball already and land now + elif self._hit_ball and land_now: + over_net_bonus = int(self._ball_landing_pos[0] < 0) + min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2]) + return 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus + + # Phase 5 hit ball already and land already + elif self._hit_ball and not land_now and self._ball_landing_pos is not None: + return 0 + + else: + raise NotImplementedError + + def _get_reward(self, terminated): + # if not terminated: + # return 0 + + min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1)) + if not self._hit_ball: + # Not hit ball + return 0.2 * (1 - np.tanh(min_r_b_dist**2)) + elif self._ball_landing_pos is None: + # Hit ball but not landing pos + min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1)) + return 2 + (1 - np.tanh(min_b_des_b_dist**2)) + else: + # Hit ball and land + min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2]) + over_net_bonus = int(self._ball_landing_pos[0] < 0) + return 2 + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus + + + def _get_traj_invalid_penalty(self, action, pos_traj, tau_bound, delay_bound): + tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) + delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) + violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) + violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0)) + invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ + violate_high_bound_error + violate_low_bound_error + return -invalid_penalty + + def get_invalid_traj_step_penalty(self, pos_traj): + violate_high_bound_error = ( + np.maximum(pos_traj - jnt_pos_high, 0).mean()) + violate_low_bound_error = ( + np.maximum(jnt_pos_low - pos_traj, 0).mean()) + invalid_penalty = violate_high_bound_error + violate_low_bound_error + + + def _update_game_state(self, action): + for _ in range(self.frame_skip): + if self._enable_artificial_wind: + self.data.qfrc_applied[-2] = self._artificial_force + try: + self.do_simulation(action, 1) + except Exception as e: + print("Simulation get unstable return with MujocoException: ", e) + unstable_simulation = True + self._terminated = True + break + + # Update game state + if not self._terminated: + if not self._hit_ball: + self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \ + self._contact_checker(self._ball_contact_id, self._bat_back_id) + if not self._hit_ball: + ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id) + if ball_land_on_floor_no_hit: + self._ball_landing_pos = self.data.body("target_ball").xpos.copy() + self._terminated = True + if self._hit_ball and not self._ball_contact_after_hit: + if self._contact_checker(self._ball_contact_id, self._floor_contact_id): # first check contact with floor + self._ball_contact_after_hit = True + self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy() + self._terminated = True + elif self._contact_checker(self._ball_contact_id, self._table_contact_id): # second check contact with table + self._ball_contact_after_hit = True + self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy() + if self._ball_landing_pos[0] < 0.: # ball lands on the opponent side + self._ball_return_success = True + self._terminated = True + + # update ball trajectory & racket trajectory + self._ball_traj.append(self.data.body("target_ball").xpos.copy()) + self._racket_traj.append(self.data.geom("bat").xpos.copy()) + + def ball_racket_contact(self): + return self._contact_checker(self._ball_contact_id, self._bat_front_id) or \ + self._contact_checker(self._ball_contact_id, self._bat_back_id) + + def step(self, action): + if not self._id_set: + self._set_ids() + + unstable_simulation = False + hit_already = self._hit_ball + if self._steps == self._goal_switching_step and self.np_random.uniform() < 0.5: + new_goal_pos = self._generate_goal_pos(random=True) + new_goal_pos[1] = -new_goal_pos[1] + self._goal_pos = new_goal_pos + self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) + mujoco.mj_forward(self.model, self.data) + + self._update_game_state(action) + self._steps += 1 + + obs = self._get_obs() + + # Compute reward + if unstable_simulation: + reward = -25 + else: + # reward = self._get_reward(self._terminated) + # hit_now = not hit_already and self._hit_ball + hit_finish = self._hit_ball and not self.ball_racket_contact() + + if hit_finish: + # Clean the ball and racket traj before hit + self._ball_traj = [] + self._racket_traj = [] + + # Simulate the rest of the traj + reward = self._get_reward2(True, False) + while self._steps < MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER: + land_already = self._ball_landing_pos is not None + self._update_game_state(np.zeros_like(action)) + self._steps += 1 + + land_now = (not land_already + and self._ball_landing_pos is not None) + temp_reward = self._get_reward2(False, land_now) + # print(temp_reward) + reward += temp_reward + + # Uncomment the line below to visualize the sim after hit + # self.render(mode="human") + else: + reward = self._get_reward2(False, False) + + # Update ball landing error + land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \ + if self._ball_landing_pos is not None else 10. + + info = { + "hit_ball": self._hit_ball, + "ball_returned_success": self._ball_return_success, + "land_dist_error": land_dist_err, + "is_success": self._ball_return_success and land_dist_err < 0.2, + "num_steps": self._steps, + } + + terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER + + return obs, reward, terminated, truncated, info class TableTennisWind(TableTennisEnv): def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, **kwargs): @@ -296,7 +482,16 @@ class TableTennisWind(TableTennisEnv): ]) return obs - class TableTennisGoalSwitching(TableTennisEnv): def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99, **kwargs): super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step, **kwargs) + + +class TableTennisRandomInit(TableTennisEnv): + def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, + random_pos_scale: float = 1.0, + random_vel_scale: float = 0.0): + super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, + random_pos_scale=random_pos_scale, + random_vel_scale=random_vel_scale) + diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py index 4d9a2d2..c548736 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py @@ -2,8 +2,9 @@ import numpy as np jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) -delay_bound = [0.05, 0.15] -tau_bound = [0.5, 1.5] + +jnt_vel_low = np.ones(7) * -7 +jnt_vel_high = np.ones(7) * 7 net_height = 0.1 table_height = 0.77 @@ -48,4 +49,4 @@ def magnus_force(top_spin=0.0, side_spin=0.0, v_ball=np.zeros(3), v_wind=np.zero C_l = 4.68 * 10e-4 - 2.0984 * 10e-5 * (np.linalg.norm(v_ball) - 50) # Lift force coeffient or simply 1.23 w = np.array([0.0, top_spin, side_spin]) # Angular velocity of ball f_m = 0.5 * rho * A * C_l * np.linalg.norm(v_ball-v_wind) * np.cross(w, v_ball-v_wind) - return f_m + return f_m \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index c1be41e..acefa88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fancy_gym" -version = "0.1.0" +version = "0.1.4" description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches." readme = "README.md" authors = [ diff --git a/setup.py b/setup.py index 62b950c..2bd077d 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ extras = { 'box2d': ['gymnasium[box2d]>=0.26.0'], 'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'], 'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"], + 'mushroom-rl': ['mushroom-rl'], } # All dependencies diff --git a/test/test_black_box.py b/test/test_black_box.py index 419691f..fc4f0bf 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -41,7 +41,7 @@ class ToyEnv(gym.Env): obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} return obs, reward, terminated, truncated, info - def render(self, mode="human"): + def render(self): pass diff --git a/test/test_fancy_registry.py b/test/test_fancy_registry.py index aad076b..849b36f 100644 --- a/test/test_fancy_registry.py +++ b/test/test_fancy_registry.py @@ -33,7 +33,7 @@ class ToyEnv(gym.Env): obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} return obs, reward, terminated, truncated, info - def render(self, mode="human"): + def render(self): pass diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index c2edf42..db463c8 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -37,7 +37,7 @@ class ToyEnv(gym.Env): obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {} return obs, reward, terminated, truncated, info - def render(self, mode="human"): + def render(self): pass