Merge branch 'bruce_port_envs' into future
This commit is contained in:
commit
fbbe312bce
13
.github/workflows/publish-to-pypi.yml
vendored
13
.github/workflows/publish-to-pypi.yml
vendored
@ -2,8 +2,6 @@ name: Publish Python package to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
@ -16,6 +14,17 @@ jobs:
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # This fetches all history for all branches and tags
|
||||
|
||||
- name: Verify tag is on master branch
|
||||
run: |
|
||||
TAG_IS_ON_MASTER=$(git branch -r --contains ${{ github.ref }} | grep 'origin/master')
|
||||
if [ -z "$TAG_IS_ON_MASTER" ]; then
|
||||
echo "Tag is not on the master branch. Cancelling the workflow."
|
||||
exit 1
|
||||
fi
|
||||
echo "Tag is on the master branch. Proceeding with the workflow."
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
|
13
.github/workflows/publish-to-test-pypi.yml
vendored
13
.github/workflows/publish-to-test-pypi.yml
vendored
@ -2,8 +2,6 @@ name: Publish Python package to TestPyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
@ -16,6 +14,17 @@ jobs:
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # This fetches all history for all branches and tags
|
||||
|
||||
- name: Verify tag is on master branch
|
||||
run: |
|
||||
TAG_IS_ON_MASTER=$(git branch -r --contains ${{ github.ref }} | grep 'origin/master')
|
||||
if [ -z "$TAG_IS_ON_MASTER" ]; then
|
||||
echo "Tag is not on the master branch. Cancelling the workflow."
|
||||
exit 1
|
||||
fi
|
||||
echo "Tag is on the master branch. Proceeding with the workflow."
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
|
26
README.md
26
README.md
@ -33,7 +33,7 @@ While the overarching objective of MP environments remains the learning of an op
|
||||
|
||||
## Installation
|
||||
|
||||
We recommend installing `fancy_gym` into a virtual environments like [venv](https://docs.python.org/3/library/venv.html). 3rd party alternatives to venv like [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/) can also be used.
|
||||
We recommend installing `fancy_gym` into a virtual environment as provided by [venv](https://docs.python.org/3/library/venv.html). 3rd party alternatives to venv like [Poetry](https://python-poetry.org/) or [Conda](https://docs.conda.io/en/latest/) can also be used.
|
||||
|
||||
### Installation from PyPI (recommended)
|
||||
|
||||
@ -105,17 +105,16 @@ Regular step based environments added by Fancy Gym are added into the `fancy/` n
|
||||
import gymnasium as gym
|
||||
import fancy_gym
|
||||
|
||||
env = gym.make('fancy/Reacher5d-v0')
|
||||
# or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS
|
||||
# or env = gym.make('dm_control/ball_in_cup-catch-v0')
|
||||
# or env = gym.make('Reacher-v2')
|
||||
env = gym.make('fancy/Reacher5d-v0', render_mode='human')
|
||||
# or env = gym.make('metaworld/reach-v2', render_mode='human') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS
|
||||
# or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human')
|
||||
# or env = gym.make('Reacher-v2', render_mode='human')
|
||||
observation = env.reset(seed=1)
|
||||
env.render()
|
||||
|
||||
for i in range(1000):
|
||||
action = env.action_space.sample()
|
||||
observation, reward, terminated, truncated, info = env.step(action)
|
||||
if i % 5 == 0:
|
||||
env.render()
|
||||
|
||||
if terminated or truncated:
|
||||
observation, info = env.reset()
|
||||
@ -149,17 +148,14 @@ Just keep in mind, calling `step()` executes a full trajectory.
|
||||
import gymnasium as gym
|
||||
import fancy_gym
|
||||
|
||||
env = gym.make('fancy_ProMP/Reacher5d-v0')
|
||||
# or env = gym.make('metaworld_ProDMP/reach-v2')
|
||||
# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0')
|
||||
# or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS
|
||||
|
||||
# render() can be called once in the beginning with all necessary arguments.
|
||||
# To turn it of again just call render() without any arguments.
|
||||
env.render(mode='human')
|
||||
env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human")
|
||||
# or env = gym.make('metaworld_ProDMP/reach-v2', render_mode="human")
|
||||
# or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0', render_mode="human")
|
||||
# or env = gym.make('gym_ProMP/Reacher-v2', render_mode="human") # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS
|
||||
|
||||
# This returns the context information, not the full state observation
|
||||
observation, info = env.reset(seed=1)
|
||||
env.render()
|
||||
|
||||
for i in range(5):
|
||||
action = env.action_space.sample()
|
||||
|
@ -26,7 +26,7 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||
from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \
|
||||
BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING
|
||||
from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \
|
||||
MAX_EPISODE_STEPS_TABLE_TENNIS
|
||||
MAX_EPISODE_STEPS_TABLE_TENNIS, MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER
|
||||
from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper as MPWrapper_TableTennis
|
||||
from .mujoco.table_tennis.mp_wrapper import TT_MPWrapper_Replan as MPWrapper_TableTennis_Replan
|
||||
from .mujoco.table_tennis.mp_wrapper import TTVelObs_MPWrapper as MPWrapper_TableTennis_VelObs
|
||||
@ -135,6 +135,19 @@ register(
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='fancy/HopperJumpMarkov-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperJumpMarkovRew',
|
||||
mp_wrapper=mujoco.hopper_jump.MPWrapper,
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||
kwargs={
|
||||
"sparse": False,
|
||||
"healthy_reward": 1.0,
|
||||
"contact_weight": 0.0,
|
||||
"height_weight": 3.0,
|
||||
}
|
||||
)
|
||||
|
||||
# TODO: Add [MPs] later when finished (old TODO I moved here during refactor)
|
||||
register(
|
||||
id='fancy/AntJump-v0',
|
||||
@ -289,3 +302,53 @@ register(
|
||||
'goal_switching_step': 99
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='TableTennisRndRobot-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:TableTennisRandomInit',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS,
|
||||
kwargs={
|
||||
'random_pos_scale': 0.1,
|
||||
'random_vel_scale': 0.0,
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='TableTennisMarkovian-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:TableTennisMarkovian',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER,
|
||||
kwargs={
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='TableTennisRndRobotMarkovian-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:TableTennisMarkovian',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER,
|
||||
kwargs={
|
||||
'random_pos_scale': 0.1,
|
||||
'random_vel_scale': 0.0,
|
||||
}
|
||||
)
|
||||
|
||||
# Air Hockey environments
|
||||
for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]:
|
||||
register(
|
||||
id=f'fancy/AirHockey-{env_mode}-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
|
||||
max_episode_steps=500,
|
||||
add_mp_types=[],
|
||||
kwargs={
|
||||
'env_mode': env_mode
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id=f'fancy/AirHockey-tournament-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
|
||||
max_episode_steps=15000,
|
||||
add_mp_types=[],
|
||||
kwargs={
|
||||
'env_mode': 'tournament'
|
||||
}
|
||||
)
|
||||
|
@ -178,9 +178,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
return False
|
||||
|
||||
def render(self, mode=None):
|
||||
if mode==None:
|
||||
mode = self.render_mode
|
||||
def render(self):
|
||||
if self.fig is None:
|
||||
# Create base figure once on the beginning. Afterwards only update
|
||||
plt.ion()
|
||||
@ -199,7 +197,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
self.fig.gca().set_title(
|
||||
f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}")
|
||||
|
||||
if mode == "human":
|
||||
if self.render_mode == "human":
|
||||
|
||||
# arm
|
||||
self.line.set_data(self._joints[:, 0], self._joints[:, 1])
|
||||
@ -207,7 +205,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
self.fig.canvas.draw()
|
||||
self.fig.canvas.flush_events()
|
||||
|
||||
elif mode == "partial":
|
||||
elif self.render_mode == "partial":
|
||||
if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
|
||||
# Arm
|
||||
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
|
||||
|
@ -98,9 +98,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
|
||||
def _check_collisions(self) -> bool:
|
||||
return self._check_self_collision()
|
||||
|
||||
def render(self, mode=None): # pragma: no cover
|
||||
if mode==None:
|
||||
mode = self.render_mode
|
||||
def render(self): # pragma: no cover
|
||||
if self.fig is None:
|
||||
# Create base figure once on the beginning. Afterwards only update
|
||||
plt.ion()
|
||||
|
@ -123,9 +123,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
def _check_collisions(self) -> bool:
|
||||
return self._check_self_collision()
|
||||
|
||||
def render(self, mode=None):
|
||||
if mode==None:
|
||||
mode = self.render_mode
|
||||
def render(self):
|
||||
goal_pos = self._goal.T
|
||||
via_pos = self._via_point.T
|
||||
|
||||
@ -148,7 +146,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
|
||||
|
||||
if mode == "human":
|
||||
if self.render_mode == "human":
|
||||
# goal
|
||||
if self._steps == 1:
|
||||
self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
|
||||
@ -160,7 +158,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
self.fig.canvas.draw()
|
||||
self.fig.canvas.flush_events()
|
||||
|
||||
elif mode == "partial":
|
||||
elif self.render_mode == "partial":
|
||||
if self._steps == 1:
|
||||
# fig, ax = plt.subplots()
|
||||
# Add the patch to the Axes
|
||||
@ -180,7 +178,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
plt.ylim([-1.1, lim])
|
||||
plt.pause(0.01)
|
||||
|
||||
elif mode == "final":
|
||||
elif self.render_mode == "final":
|
||||
if self._steps == 199 or self._is_collided:
|
||||
# fig, ax = plt.subplots()
|
||||
|
||||
|
@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv
|
||||
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
|
||||
from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
|
||||
from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching
|
||||
|
||||
try:
|
||||
from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv
|
||||
except ModuleNotFoundError:
|
||||
print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)")
|
26
fancy_gym/envs/mujoco/air_hockey/LICENSE
Normal file
26
fancy_gym/envs/mujoco/air_hockey/LICENSE
Normal file
@ -0,0 +1,26 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge).
|
||||
The changes are mostly focused on adapting the provided environments to fancy_gym.
|
0
fancy_gym/envs/mujoco/air_hockey/__init__.py
Normal file
0
fancy_gym/envs/mujoco/air_hockey/__init__.py
Normal file
202
fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
Normal file
202
fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
Normal file
@ -0,0 +1,202 @@
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
from gymnasium import spaces
|
||||
|
||||
import fancy_gym.envs.mujoco.air_hockey.constraints as constraints
|
||||
from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
|
||||
from mushroom_rl.core import Environment
|
||||
|
||||
class AirHockeyEnv(Environment):
|
||||
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
|
||||
|
||||
def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, width=1920, height=1080, **kwargs):
|
||||
"""
|
||||
Environment Constructor
|
||||
|
||||
Args:
|
||||
env [string]:
|
||||
The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament].
|
||||
interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
|
||||
polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
|
||||
Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
|
||||
of position, velocity and acceleration of the shape (20, 3, n_joints)
|
||||
"""
|
||||
|
||||
env_dict = {
|
||||
"tournament": position.IiwaPositionTournament,
|
||||
|
||||
"7dof-hit": position.IiwaPositionHit,
|
||||
"7dof-defend": position.IiwaPositionDefend,
|
||||
|
||||
"3dof-hit": position.PlanarPositionHit,
|
||||
"3dof-defend": position.PlanarPositionDefend,
|
||||
|
||||
"7dof-hit-airhockit2023": position.IiwaPositionHitAirhocKIT2023,
|
||||
"7dof-defend-airhockit2023": position.IiwaPositionDefendAirhocKIT2023,
|
||||
}
|
||||
|
||||
if env_mode not in env_dict:
|
||||
raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!")
|
||||
|
||||
if env_mode == "tournament" and type(interpolation_order) != tuple:
|
||||
interpolation_order = (interpolation_order, interpolation_order)
|
||||
|
||||
self.render_mode = render_mode
|
||||
self.render_human_active = False
|
||||
|
||||
# Determine headless mode based on render_mode
|
||||
headless = self.render_mode == 'rgb_array'
|
||||
|
||||
# Prepare viewer_params
|
||||
viewer_params = kwargs.get('viewer_params', {})
|
||||
viewer_params.update({'headless': headless, 'width': width, 'height': height})
|
||||
kwargs['viewer_params'] = viewer_params
|
||||
|
||||
self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
|
||||
self.env_name = env_mode
|
||||
self.env_info = self.base_env.env_info
|
||||
|
||||
if hasattr(self.base_env, "wrapper_obs_space") and hasattr(self.base_env, "wrapper_act_space"):
|
||||
self.observation_space = self.base_env.wrapper_obs_space
|
||||
self.action_space = self.base_env.wrapper_act_space
|
||||
else:
|
||||
single_robot_obs_size = len(self.base_env.info.observation_space.low)
|
||||
if env_mode == "tournament":
|
||||
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
|
||||
else:
|
||||
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
|
||||
robot_info = self.env_info["robot"]
|
||||
|
||||
if env_mode != "tournament":
|
||||
if interpolation_order in [1, 2]:
|
||||
self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
||||
if interpolation_order in [3, 4, -1]:
|
||||
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
||||
if interpolation_order in [5]:
|
||||
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
||||
else:
|
||||
acts = [None, None]
|
||||
for i in range(2):
|
||||
if interpolation_order[i] in [1, 2]:
|
||||
acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
||||
if interpolation_order[i] in [3, 4, -1]:
|
||||
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
||||
if interpolation_order[i] in [5]:
|
||||
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
||||
self.action_space = spaces.Tuple((acts[0], acts[1]))
|
||||
|
||||
constraint_list = constraints.ConstraintList()
|
||||
constraint_list.add(constraints.JointPositionConstraint(self.env_info))
|
||||
constraint_list.add(constraints.JointVelocityConstraint(self.env_info))
|
||||
constraint_list.add(constraints.EndEffectorConstraint(self.env_info))
|
||||
if "7dof" in self.env_name or self.env_name == "tournament":
|
||||
constraint_list.add(constraints.LinkConstraint(self.env_info))
|
||||
|
||||
self.env_info['constraints'] = constraint_list
|
||||
self.env_info['env_name'] = self.env_name
|
||||
|
||||
super().__init__(self.base_env.info)
|
||||
|
||||
def step(self, action):
|
||||
obs, reward, done, info = self.base_env.step(action)
|
||||
|
||||
if "tournament" in self.env_name:
|
||||
info["constraints_value"] = list()
|
||||
info["jerk"] = list()
|
||||
for i in range(2):
|
||||
obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)]
|
||||
info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun(
|
||||
obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']])))
|
||||
info["jerk"].append(
|
||||
self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][
|
||||
'n_joints']])
|
||||
|
||||
info["score"] = self.base_env.score
|
||||
info["faults"] = self.base_env.faults
|
||||
|
||||
else:
|
||||
info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']],
|
||||
obs[self.env_info['joint_vel_ids']]))
|
||||
info["jerk"] = self.base_env.jerk
|
||||
info["success"] = self.check_success(obs)
|
||||
|
||||
if self.env_info['env_name'] == "tournament":
|
||||
obs = np.array(np.split(obs, 2))
|
||||
|
||||
if self.render_human_active:
|
||||
self.base_env.render()
|
||||
|
||||
return obs, reward, done, False, info
|
||||
|
||||
def render(self):
|
||||
if self.render_mode == 'rgb_array':
|
||||
return self.base_env.render(record = True)
|
||||
elif self.render_mode == 'human':
|
||||
self.render_human_active = True
|
||||
self.base_env.render()
|
||||
else:
|
||||
raise ValueError(f"Unsupported render mode: '{self.render_mode}'")
|
||||
|
||||
def reset(self, seed=None, options={}):
|
||||
self.base_env.seed(seed)
|
||||
obs = self.base_env.reset()
|
||||
if self.env_info['env_name'] == "tournament":
|
||||
obs = np.array(np.split(obs, 2))
|
||||
return obs, {}
|
||||
|
||||
def check_success(self, obs):
|
||||
puck_pos, puck_vel = self.base_env.get_puck(obs)
|
||||
|
||||
puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos)
|
||||
success = 0
|
||||
|
||||
if "hit" in self.env_name:
|
||||
if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \
|
||||
np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0:
|
||||
success = 1
|
||||
|
||||
elif "defend" in self.env_name:
|
||||
if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1:
|
||||
success = 1
|
||||
|
||||
elif "prepare" in self.env_name:
|
||||
if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1:
|
||||
success = 1
|
||||
return success
|
||||
|
||||
@property
|
||||
def unwrapped(self):
|
||||
return self
|
||||
|
||||
def close(self):
|
||||
self.base_env.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = AirHockeyEnv(env_mode="7dof-hit")
|
||||
env.reset()
|
||||
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
1
fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
Normal file
1
fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .constraints import *
|
212
fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
Normal file
212
fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
Normal file
@ -0,0 +1,212 @@
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian
|
||||
|
||||
|
||||
class Constraint:
|
||||
def __init__(self, env_info, output_dim, **kwargs):
|
||||
"""
|
||||
Constructor
|
||||
|
||||
Args
|
||||
----
|
||||
env_info: dict
|
||||
A dictionary contains information about the environment;
|
||||
output_dim: int
|
||||
The output dimension of the constraints.
|
||||
**kwargs: dict
|
||||
A dictionary contains agent related information.
|
||||
"""
|
||||
self._env_info = env_info
|
||||
self._name = None
|
||||
|
||||
self.output_dim = output_dim
|
||||
|
||||
self._fun_value = np.zeros(self.output_dim)
|
||||
self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"]))
|
||||
self._q_prev = None
|
||||
self._dq_prev = None
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""
|
||||
The name of the constraints
|
||||
|
||||
"""
|
||||
return self._name
|
||||
|
||||
def fun(self, q, dq):
|
||||
"""
|
||||
The function of the constraint.
|
||||
|
||||
Args
|
||||
----
|
||||
q: numpy.ndarray, (num_joints,)
|
||||
The joint position of the robot
|
||||
dq: numpy.ndarray, (num_joints,)
|
||||
The joint velocity of the robot
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray, (out_dim,):
|
||||
The value computed by the constraints function.
|
||||
"""
|
||||
if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
|
||||
return self._fun_value
|
||||
else:
|
||||
self._jacobian(q, dq)
|
||||
return self._fun(q, dq)
|
||||
|
||||
def jacobian(self, q, dq):
|
||||
"""
|
||||
Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity.
|
||||
|
||||
Args
|
||||
----
|
||||
q: ndarray, (num_joints,)
|
||||
The joint position of the robot
|
||||
dq: ndarray, (num_joints,)
|
||||
The joint velocity of the robot
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray, (dim_output, num_joints * 2):
|
||||
The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot]
|
||||
|
||||
"""
|
||||
if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
|
||||
return self._fun_value
|
||||
else:
|
||||
self._fun(q, dq)
|
||||
return self._jacobian(q, dq)
|
||||
|
||||
def _fun(self, q, dq):
|
||||
raise NotImplementedError
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ConstraintList:
|
||||
def __init__(self):
|
||||
self.constraints = dict()
|
||||
|
||||
def keys(self):
|
||||
return self.constraints.keys()
|
||||
|
||||
def get(self, key):
|
||||
return self.constraints.get(key)
|
||||
|
||||
def add(self, c):
|
||||
self.constraints.update({c.name: c})
|
||||
|
||||
def delete(self, name):
|
||||
del self.constraints[name]
|
||||
|
||||
def fun(self, q, dq):
|
||||
return {key: self.constraints[key].fun(q, dq) for key in self.constraints}
|
||||
|
||||
def jacobian(self, q, dq):
|
||||
return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints}
|
||||
|
||||
|
||||
class JointPositionConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
|
||||
self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95
|
||||
self._name = 'joint_pos_constr'
|
||||
|
||||
def _fun(self, q, dq):
|
||||
self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1]
|
||||
self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
return self._jac_value
|
||||
|
||||
|
||||
class JointVelocityConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
|
||||
self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95
|
||||
self._name = 'joint_vel_constr'
|
||||
|
||||
def _fun(self, q, dq):
|
||||
self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1]
|
||||
self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
return self._jac_value
|
||||
|
||||
|
||||
class EndEffectorConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
# 1 Dimension on x direction: x > x_lb
|
||||
# 2 Dimension on y direction: y > y_lb, y < y_ub
|
||||
# 2 Dimension on z direction: z > z_lb, z < z_ub
|
||||
super().__init__(env_info, output_dim=5, **kwargs)
|
||||
self._name = "ee_constr"
|
||||
tolerance = 0.02
|
||||
|
||||
self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
|
||||
self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
|
||||
|
||||
self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - (
|
||||
self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius'])
|
||||
self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
|
||||
self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
|
||||
self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance
|
||||
self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance
|
||||
|
||||
def _fun(self, q, dq):
|
||||
ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q)
|
||||
self._fun_value = np.array([-ee_pos[0] + self.x_lb,
|
||||
-ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub,
|
||||
-ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub])
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
jac = jacobian(self.robot_model, self.robot_data, q)
|
||||
dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]])
|
||||
self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']]
|
||||
return self._jac_value
|
||||
|
||||
|
||||
class LinkConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
# 1 Dimension: wrist_z > minimum_height
|
||||
# 2 Dimension: elbow_z > minimum_height
|
||||
super().__init__(env_info, output_dim=2, **kwargs)
|
||||
self._name = "link_constr"
|
||||
|
||||
self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
|
||||
self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
|
||||
|
||||
self.z_lb = 0.25
|
||||
|
||||
def _fun(self, q, dq):
|
||||
wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7")
|
||||
elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4")
|
||||
self._fun_value = np.array([-wrist_pos[2] + self.z_lb,
|
||||
-elbow_pos[2] + self.z_lb])
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7")
|
||||
jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4")
|
||||
self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([
|
||||
-jac_wrist[2, :self._env_info['robot']['n_joints']],
|
||||
-jac_elbow[2, :self._env_info['robot']['n_joints']],
|
||||
])
|
||||
return self._jac_value
|
0
fancy_gym/envs/mujoco/air_hockey/data/__init__.py
Normal file
0
fancy_gym/envs/mujoco/air_hockey/data/__init__.py
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
Normal file
Binary file not shown.
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
Normal file
Binary file not shown.
58
fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
Normal file
58
fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
Normal file
@ -0,0 +1,58 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
<include file="iiwa1.xml"/>
|
||||
|
||||
<include file="iiwa2.xml"/>
|
||||
|
||||
<include file="../table.xml"/>
|
||||
|
||||
<contact>
|
||||
<exclude body1="table_surface" body2="iiwa_1/base"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="rim" body2="iiwa_1/base"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="world" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="table_surface" body2="iiwa_2/base"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_1"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_2"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_3"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_4"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_5"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_6"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_7"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/striker_joint_link"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/striker_mallet"/>
|
||||
|
||||
<exclude body1="rim" body2="iiwa_2/base"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_1"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_2"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_3"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_4"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_5"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_6"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_7"/>
|
||||
<exclude body1="rim" body2="iiwa_2/striker_joint_link"/>
|
||||
<exclude body1="rim" body2="iiwa_2/striker_mallet"/>
|
||||
|
||||
<exclude body1="world" body2="iiwa_2/striker_mallet"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
108
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
Normal file
108
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
Normal file
@ -0,0 +1,108 @@
|
||||
<mujoco model="iiwa_1">
|
||||
<compiler angle="radian" autolimits="true" meshdir="assets"/>
|
||||
|
||||
<default>
|
||||
<default class="vis">
|
||||
<geom contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
<default class="robot">
|
||||
<geom condim="4" solref="0.02 0.3" priority="2"/>
|
||||
</default>
|
||||
</default>
|
||||
<asset>
|
||||
<mesh name="link_0" file="link_0.stl"/>
|
||||
<mesh name="link_1" file="link_1.stl"/>
|
||||
<mesh name="link_2" file="link_2.stl"/>
|
||||
<mesh name="link_3" file="link_3.stl"/>
|
||||
<mesh name="link_4" file="link_4.stl"/>
|
||||
<mesh name="link_5" file="link_5.stl"/>
|
||||
<mesh name="link_6" file="link_6.stl"/>
|
||||
<mesh name="link_7" file="link_7.stl"/>
|
||||
<mesh name="EE_arm" file="EE_arm.stl"/>
|
||||
<mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<body name="iiwa_1/base" pos="-1.51 0 -0.1">
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
|
||||
<body name="iiwa_1/link_1" pos="0 0 0.1575">
|
||||
<inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
|
||||
fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
|
||||
<joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
|
||||
frictionloss="0.384477"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
|
||||
<body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
|
||||
fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
|
||||
<joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
|
||||
frictionloss="0.496333"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
|
||||
<body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
|
||||
fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
|
||||
<joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
|
||||
frictionloss="0.173951"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
|
||||
<body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
|
||||
fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
|
||||
<joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.219041" frictionloss="0.3751"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
|
||||
<body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
|
||||
fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
|
||||
<joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
|
||||
damping="0.185923" frictionloss="0.481099"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
|
||||
<body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
|
||||
fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
|
||||
<joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.1" frictionloss="0.196149"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
|
||||
<body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
|
||||
fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
|
||||
<joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
|
||||
damping="0.1" frictionloss="0.299238" armature="0.01"/>
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
|
||||
<geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
|
||||
class="vis"/>
|
||||
<body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
|
||||
<body name="iiwa_1/striker_mallet" pos="0 0 0">
|
||||
<inertial pos="0 0 0.0682827" mass="0.283"
|
||||
diaginertia="0.005 0.005 0.005"/>
|
||||
<joint name="iiwa_1/striker_joint_1" pos="0 0 0" axis="0 1 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<joint name="iiwa_1/striker_joint_2" pos="0 0 0" axis="1 0 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
|
||||
class="vis"/>
|
||||
<geom name="iiwa_1/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
|
||||
size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
|
||||
<actuator>
|
||||
<motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
|
||||
<motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_1/striker_joint_1" joint="iiwa_1/striker_joint_1" ctrlrange="-10 10"/>
|
||||
<motor name="iiwa_1/striker_joint_2" joint="iiwa_1/striker_joint_2" ctrlrange="-10 10"/>
|
||||
</actuator>
|
||||
</mujoco>
|
89
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
Normal file
89
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
Normal file
@ -0,0 +1,89 @@
|
||||
<mujoco model="iiwa_2">
|
||||
<compiler angle="radian" autolimits="true" meshdir="assets"/>
|
||||
|
||||
<worldbody>
|
||||
<body name="iiwa_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
|
||||
<body name="iiwa_2/link_1" pos="0 0 0.1575">
|
||||
<inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
|
||||
fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
|
||||
<joint name="iiwa_2/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
|
||||
frictionloss="0.384477"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
|
||||
<body name="iiwa_2/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
|
||||
fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
|
||||
<joint name="iiwa_2/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
|
||||
frictionloss="0.496333"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
|
||||
<body name="iiwa_2/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
|
||||
fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
|
||||
<joint name="iiwa_2/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
|
||||
frictionloss="0.173951"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
|
||||
<body name="iiwa_2/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
|
||||
fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
|
||||
<joint name="iiwa_2/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.219041" frictionloss="0.3751"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
|
||||
<body name="iiwa_2/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
|
||||
fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
|
||||
<joint name="iiwa_2/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
|
||||
damping="0.185923" frictionloss="0.481099"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
|
||||
<body name="iiwa_2/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
|
||||
fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
|
||||
<joint name="iiwa_2/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.1" frictionloss="0.196149"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
|
||||
<body name="iiwa_2/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
|
||||
fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
|
||||
<joint name="iiwa_2/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
|
||||
damping="0.1" frictionloss="0.299238" armature="0.01"/>
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
|
||||
<geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
|
||||
class="vis"/>
|
||||
<body name="iiwa_2/striker_joint_link" pos="0 0 0.585">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
|
||||
<body name="iiwa_2/striker_mallet" pos="0 0 0">
|
||||
<inertial pos="0 0 0.0682827" mass="0.283"
|
||||
diaginertia="0.005 0.005 0.005"/>
|
||||
<joint name="iiwa_2/striker_joint_1" pos="0 0 0" axis="0 1 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<joint name="iiwa_2/striker_joint_2" pos="0 0 0" axis="1 0 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
|
||||
class="vis"/>
|
||||
<geom name="iiwa_2/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
|
||||
size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
|
||||
<actuator>
|
||||
<motor name="iiwa_2/joint_1" joint="iiwa_2/joint_1" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_2/joint_2" joint="iiwa_2/joint_2" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_2/joint_3" joint="iiwa_2/joint_3" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_2/joint_4" joint="iiwa_2/joint_4" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_2/joint_5" joint="iiwa_2/joint_5" ctrlrange="-110 110"/>
|
||||
<motor name="iiwa_2/joint_6" joint="iiwa_2/joint_6" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_2/joint_7" joint="iiwa_2/joint_7" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_2/striker_joint_1" joint="iiwa_2/striker_joint_1" ctrlrange="-10 10"/>
|
||||
<motor name="iiwa_2/striker_joint_2" joint="iiwa_2/striker_joint_2" ctrlrange="-10 10"/>
|
||||
</actuator>
|
||||
|
||||
</mujoco>
|
94
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
Normal file
94
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
Normal file
@ -0,0 +1,94 @@
|
||||
<mujoco model="iiwa_1">
|
||||
<compiler angle="radian" autolimits="true" meshdir="assets"/>
|
||||
|
||||
<default>
|
||||
<default class="vis">
|
||||
<geom contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
<default class="robot">
|
||||
<geom condim="4" solref="0.02 0.3" priority="2"/>
|
||||
</default>
|
||||
</default>
|
||||
<asset>
|
||||
<mesh name="link_0" file="link_0.stl"/>
|
||||
<mesh name="link_1" file="link_1.stl"/>
|
||||
<mesh name="link_2" file="link_2.stl"/>
|
||||
<mesh name="link_3" file="link_3.stl"/>
|
||||
<mesh name="link_4" file="link_4.stl"/>
|
||||
<mesh name="link_5" file="link_5.stl"/>
|
||||
<mesh name="link_6" file="link_6.stl"/>
|
||||
<mesh name="link_7" file="link_7.stl"/>
|
||||
<mesh name="EE_arm" file="EE_arm.stl"/>
|
||||
<mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<body name="iiwa_1/base" pos="0.0 0 0.0">
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
|
||||
<body name="iiwa_1/link_1" pos="0 0 0.1575">
|
||||
<inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
|
||||
fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
|
||||
<joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
|
||||
frictionloss="0.384477"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
|
||||
<body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
|
||||
fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
|
||||
<joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
|
||||
frictionloss="0.496333"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
|
||||
<body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
|
||||
fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
|
||||
<joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
|
||||
frictionloss="0.173951"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
|
||||
<body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
|
||||
fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
|
||||
<joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.219041" frictionloss="0.3751"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
|
||||
<body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
|
||||
fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
|
||||
<joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
|
||||
damping="0.185923" frictionloss="0.481099"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
|
||||
<body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
|
||||
fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
|
||||
<joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.1" frictionloss="0.196149"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
|
||||
<body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
|
||||
fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
|
||||
<joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
|
||||
damping="0.1" frictionloss="0.299238" armature="0.01"/>
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
|
||||
<geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
|
||||
class="vis"/>
|
||||
<body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
|
||||
<actuator>
|
||||
<motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
|
||||
<motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
|
||||
</actuator>
|
||||
</mujoco>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
32
fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
Normal file
32
fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
Normal file
@ -0,0 +1,32 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
|
||||
<include file="iiwa1.xml"/>
|
||||
<include file="../table.xml"/>
|
||||
|
||||
<contact>
|
||||
<exclude body1="table_surface" body2="iiwa_1/base"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="rim" body2="iiwa_1/base"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="world" body2="iiwa_1/striker_mallet"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
37
fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
Normal file
37
fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
Normal file
@ -0,0 +1,37 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
|
||||
<include file="planar_robot_1.xml"/>
|
||||
|
||||
<include file="planar_robot_2.xml"/>
|
||||
|
||||
<include file="../table.xml"/>
|
||||
|
||||
<contact>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/base" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/base" body2="rim"/>
|
||||
|
||||
<exclude body1="planar_robot_2/body_ee" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_hand" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_3" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_2" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_1" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/base" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_ee" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_hand" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_3" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_2" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_1" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/base" body2="rim"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
@ -0,0 +1,61 @@
|
||||
<mujoco model="planar_robot_1">
|
||||
<compiler autolimits="true" angle="radian"/>
|
||||
<asset>
|
||||
<material name="dark_red" rgba="0.58 0.03 0.25 1"/>
|
||||
<material name="black" rgba="0.1 0.1 0.11 1"/>
|
||||
</asset>
|
||||
|
||||
<default>
|
||||
<default class="visual">
|
||||
<geom contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
|
||||
<default class="robot">
|
||||
<geom condim="4" solref="0.02 0.3" priority="2"/>
|
||||
</default>
|
||||
</default>
|
||||
|
||||
<worldbody>
|
||||
<body name="planar_robot_1/base" pos="-1.51 0 -0.1">
|
||||
<geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
|
||||
<inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
|
||||
<body name="planar_robot_1/body_1" pos="0 0 0.25">
|
||||
<joint name="planar_robot_1/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
|
||||
<inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
|
||||
<geom class="robot" type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0"
|
||||
pos="0.275 0 0"/>
|
||||
<body name="planar_robot_1/body_2" pos="0.55 0 0">
|
||||
<joint name="planar_robot_1/joint_2" axis="0 0 1" range="-1.8 1.8"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
|
||||
pos="0.22 0 0"/>
|
||||
<geom class="robot" type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_1/body_3" pos="0.44 0 0">
|
||||
<joint name="planar_robot_1/joint_3" axis="0 0 1"
|
||||
range="-2.0943951023931953 2.0943951023931953"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22"
|
||||
euler="0 1.57079632679 0" pos="0.22 0 0"/>
|
||||
<geom class="robot" type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_1/body_hand" pos="0.44 0 0">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom class="robot" type="sphere" material="dark_red" size="0.05"/>
|
||||
<geom class="robot" type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
|
||||
<body name="planar_robot_1/body_ee" pos="0 0 -0.15">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom class="robot" name="planar_robot_1/ee" type="cylinder" material="black"
|
||||
size="0.04815 0.01" pos="0 0 0.01" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
<actuator>
|
||||
<motor name="planar_robot_1/joint_1" joint="planar_robot_1/joint_1" ctrlrange="-100 100"/>
|
||||
<motor name="planar_robot_1/joint_2" joint="planar_robot_1/joint_2" ctrlrange="-50 50"/>
|
||||
<motor name="planar_robot_1/joint_3" joint="planar_robot_1/joint_3" ctrlrange="-30 30"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,46 @@
|
||||
<mujoco model="planar_robot_2">
|
||||
<compiler autolimits="true" angle="radian"/>
|
||||
|
||||
<worldbody>
|
||||
<body name="planar_robot_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
|
||||
<geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
|
||||
<inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
|
||||
<body name="planar_robot_2/body_1" pos="0 0 0.25">
|
||||
<joint name="planar_robot_2/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
|
||||
<inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
|
||||
<geom type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0" pos="0.275 0 0"/>
|
||||
<body name="planar_robot_2/body_2" pos="0.55 0 0">
|
||||
<joint name="planar_robot_2/joint_2" axis="0 0 1" range="-1.8 1.8"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
|
||||
pos="0.22 0 0"/>
|
||||
<geom type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_2/body_3" pos="0.44 0 0">
|
||||
<joint name="planar_robot_2/joint_3" axis="0 0 1"
|
||||
range="-2.0943951023931953 2.0943951023931953"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
|
||||
pos="0.22 0 0"/>
|
||||
<geom type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_2/body_hand" pos="0.44 0 0">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom type="sphere" material="dark_red" size="0.05"/>
|
||||
<geom type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
|
||||
<body name="planar_robot_2/body_ee" pos="0 0 -0.15">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom name="planar_robot_2/ee" type="cylinder" material="black" size="0.04815 0.01"
|
||||
pos="0 0 0.01" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
<actuator>
|
||||
<motor name="planar_robot_2/joint_1" joint="planar_robot_2/joint_1" ctrlrange="-100 100"/>
|
||||
<motor name="planar_robot_2/joint_2" joint="planar_robot_2/joint_2" ctrlrange="-50 50"/>
|
||||
<motor name="planar_robot_2/joint_3" joint="planar_robot_2/joint_3" ctrlrange="-30 30"/>
|
||||
</actuator>
|
||||
</mujoco>
|
24
fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
Normal file
24
fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
|
||||
|
||||
<include file="planar_robot_1.xml"/>
|
||||
|
||||
<include file="../table.xml"/>
|
||||
|
||||
|
||||
<contact>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/base" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/base" body2="rim"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
89
fancy_gym/envs/mujoco/air_hockey/data/table.xml
Normal file
89
fancy_gym/envs/mujoco/air_hockey/data/table.xml
Normal file
@ -0,0 +1,89 @@
|
||||
<mujoco model="table">
|
||||
|
||||
<option timestep="0.001" cone="elliptic" impratio="1"/>
|
||||
|
||||
<asset>
|
||||
<material name="grey" specular="0.5" shininess="0.25" rgba="0.8 0.8 0.8 1"/>
|
||||
<material name="white" specular="0.5" shininess="0.25" rgba="1.0 1.0 1.0 1"/>
|
||||
<material name="red" specular="0.5" shininess="0.25" rgba="1.0 0.0 0.0 1"/>
|
||||
<material name="blue" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 1"/>
|
||||
<material name="transparent" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 0"/>
|
||||
|
||||
<texture type="skybox" builtin="gradient" rgb1="0.3 0.5 0.7" rgb2="0 0 0" width="512"
|
||||
height="3072"/>
|
||||
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.2 0.3 0.4"
|
||||
rgb2="0.1 0.2 0.3" markrgb="0.8 0.8 0.8" width="300" height="300"/>
|
||||
<material name="groundplane" texture="groundplane" texuniform="true" texrepeat="5 5"
|
||||
reflectance="0.2"/>
|
||||
</asset>
|
||||
|
||||
<default>
|
||||
<geom condim="4" solref="0.02 0.3"/>
|
||||
<default class="rim">
|
||||
<geom type="box" material="grey" condim="6" friction="10000 0.0 0.0" priority="1"
|
||||
solref="-2000000 -250" solimp="0.99 0.999 0.001 0.5 2"/>
|
||||
</default>
|
||||
</default>
|
||||
|
||||
<worldbody>
|
||||
<body name="table">
|
||||
<body name="table_surface">
|
||||
<geom name="surface" type="box" material="white" size="1.064 0.609 0.0505" pos="0 0 -0.0505"
|
||||
condim="4" friction="0.08 0.08 0.0" priority="1"
|
||||
solref="0.0125 0.5" solimp="0.9 0.999 0.001 0.5 2"/>
|
||||
</body>
|
||||
<body name="rim">
|
||||
<!-- <geom name="rim_home_l" class="rim" size="0.045 0.197 0.005" pos="-1.019 0.322 0.005"/>-->
|
||||
<!-- <geom name="rim_home_r" class="rim" size="0.045 0.197 0.005" pos="-1.019 -0.322 0.005"/>-->
|
||||
|
||||
<geom name="rim_home_l" class="rim" size="0.045 0.1945 0.005" pos="-1.019 0.3245 0.005"/>
|
||||
<geom name="rim_home_r" class="rim" size="0.045 0.1945 0.005" pos="-1.019 -0.3245 0.005"/>
|
||||
<geom name="rim_home_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 0.13 0.005"/>
|
||||
<geom name="rim_home_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 0.13 0.005"/>
|
||||
<geom name="rim_home_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 -0.13 0.005"/>
|
||||
<geom name="rim_home_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 -0.13 0.005"/>
|
||||
|
||||
<geom name="rim_home_top" class="rim" size="0.045 0.519 0.01" pos="-1.019 0 0.02"/>
|
||||
|
||||
<geom name="rim_left" class="rim" size="1.064 0.045 0.015" pos="0 0.564 0.015"/>
|
||||
<geom name="rim_right" class="rim" size="1.064 0.045 0.015" pos="0 -0.564 0.015"/>
|
||||
|
||||
<!-- <geom name="rim_away_l" class="rim" size="0.045 0.197 0.005" pos="1.019 0.322 0.005"/>-->
|
||||
<!-- <geom name="rim_away_r" class="rim" size="0.045 0.197 0.005" pos="1.019 -0.322 0.005"/>-->
|
||||
|
||||
<geom name="rim_away_l" class="rim" size="0.045 0.1945 0.005" pos="1.019 0.3245 0.005"/>
|
||||
<geom name="rim_away_r" class="rim" size="0.045 0.1945 0.005" pos="1.019 -0.3245 0.005"/>
|
||||
<geom name="rim_away_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 0.13 0.005"/>
|
||||
<geom name="rim_away_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 0.13 0.005"/>
|
||||
<geom name="rim_away_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 -0.13 0.005"/>
|
||||
<geom name="rim_away_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 -0.13 0.005"/>
|
||||
<geom name="rim_away_top" class="rim" size="0.045 0.519 0.01" pos="1.019 0 0.02"/>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<body name="base" pos="0 0 0">
|
||||
<joint name="puck_x" type="slide" axis="1 0 0" damping="0.005" limited="false"/>
|
||||
<joint name="puck_y" type="slide" axis="0 1 0" damping="0.005" limited="false"/>
|
||||
<joint name="puck_yaw" type="hinge" axis="0 0 1" damping="2e-6" limited="false"/>
|
||||
<body name="puck">
|
||||
<geom pos="0 0 0" name="puck" type="cylinder" material="red" size="0.03165 0.003"
|
||||
condim="4" priority="0"/>
|
||||
<geom pos="0.02 0 0" type="cylinder" material="blue" size="0.01 0.0031"
|
||||
condim="4" contype="0" conaffinity="0"/>
|
||||
<inertial pos="0 0 0" mass="0.01" diaginertia="2.5e-6 2.5e-6 5e-6"/>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<site name="puck_vis" type="ellipsoid" size="0.03165 0.03165 0.004" pos="0 0 -0.05"/>
|
||||
<site name="puck_vis_rot" type="cylinder" size="0.0045 0.006" rgba="1 0 0 1" pos="0 0 -0.05"/>
|
||||
</worldbody>
|
||||
|
||||
<contact>
|
||||
<exclude body1="puck" body2="table_surface"/>
|
||||
</contact>
|
||||
|
||||
<worldbody>
|
||||
<light pos="0 0 3" dir="0 0 -1" directional="true"/>
|
||||
<geom pos="0 0 -0.1" name="floor" size="0 0 0.05" type="plane" material="groundplane"/>
|
||||
</worldbody>
|
||||
</mujoco>
|
274
fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
Normal file
274
fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
Normal file
@ -0,0 +1,274 @@
|
||||
from collections import deque
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
import scipy
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey import seven_dof
|
||||
from fancy_gym.envs.mujoco.air_hockey import three_dof
|
||||
|
||||
|
||||
class PositionControl:
|
||||
def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs):
|
||||
"""
|
||||
Mixin that adds position controller to mujoco environments.
|
||||
|
||||
Args:
|
||||
p_gain (float): Proportional controller gain
|
||||
d_gain (float): Differential controller gain
|
||||
i_gain (float): Integral controller gain
|
||||
interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
|
||||
polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
|
||||
Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
|
||||
of position, velocity and acceleration of the shape (20, 3, n_joints)
|
||||
In the case of 2 agents it is a tuple, which describes the interpolation order for each agent
|
||||
debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the
|
||||
entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk.
|
||||
"""
|
||||
|
||||
self.debug = debug
|
||||
|
||||
super(PositionControl, self).__init__(*args, **kwargs)
|
||||
|
||||
self.robot_model = self.env_info['robot']['robot_model']
|
||||
self.robot_data = self.env_info['robot']['robot_data']
|
||||
|
||||
self.p_gain = np.array(p_gain * self.n_agents)
|
||||
self.d_gain = np.array(d_gain * self.n_agents)
|
||||
self.i_gain = np.array(i_gain * self.n_agents)
|
||||
|
||||
self.prev_pos = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_vel = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_acc = np.zeros(len(self.actuator_joint_ids))
|
||||
self.i_error = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids))
|
||||
|
||||
self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,)
|
||||
|
||||
self._num_env_joints = len(self.actuator_joint_ids)
|
||||
self.n_robot_joints = self.env_info['robot']["n_joints"]
|
||||
|
||||
self.action_shape = [None] * self.n_agents
|
||||
|
||||
for i in range(self.n_agents):
|
||||
if self.interp_order[i] is None:
|
||||
self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints)
|
||||
elif self.interp_order[i] in [1, 2]:
|
||||
self.action_shape[i] = (self.n_robot_joints,)
|
||||
elif self.interp_order[i] in [3, 4, -1]:
|
||||
self.action_shape[i] = (2, self.n_robot_joints)
|
||||
elif self.interp_order[i] == 5:
|
||||
self.action_shape[i] = (3, self.n_robot_joints)
|
||||
|
||||
self.traj = None
|
||||
|
||||
self.jerk = np.zeros(self._num_env_joints)
|
||||
|
||||
if self.debug:
|
||||
self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
|
||||
|
||||
def _enforce_safety_limits(self, desired_pos, desired_vel):
|
||||
# ROS safe controller
|
||||
pos = self.prev_controller_cmd_pos
|
||||
k = 20
|
||||
|
||||
joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents))
|
||||
joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents))
|
||||
|
||||
min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1])
|
||||
|
||||
max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1])
|
||||
|
||||
clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel)
|
||||
|
||||
min_pos = pos + min_vel * self._timestep
|
||||
max_pos = pos + max_vel * self._timestep
|
||||
|
||||
clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos)
|
||||
self.prev_controller_cmd_pos = clipped_pos.copy()
|
||||
|
||||
return clipped_pos, clipped_vel
|
||||
|
||||
def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel):
|
||||
clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel)
|
||||
|
||||
error = (clipped_pos - current_pos)
|
||||
|
||||
self.i_error += self.i_gain * error * self._timestep
|
||||
torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error
|
||||
|
||||
# Acceleration FeedForward
|
||||
tau_ff = np.zeros(self.robot_model.nv)
|
||||
for i in range(self.n_agents):
|
||||
robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i
|
||||
self.robot_data.qpos = current_pos[robot_joint_ids]
|
||||
self.robot_data.qvel = current_vel[robot_joint_ids]
|
||||
acc_ff = desired_acc[robot_joint_ids]
|
||||
mujoco.mj_forward(self.robot_model, self.robot_data)
|
||||
|
||||
mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff)
|
||||
torque[robot_joint_ids] += tau_ff
|
||||
|
||||
# Gravity Compensation and Coriolis and Centrifugal force
|
||||
torque[robot_joint_ids] += self.robot_data.qfrc_bias
|
||||
|
||||
torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids],
|
||||
self.robot_model.actuator_ctrlrange[:, 0]),
|
||||
self.robot_model.actuator_ctrlrange[:, 1])
|
||||
|
||||
if self.debug:
|
||||
self.controller_record.append(
|
||||
np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk]))
|
||||
|
||||
return torque
|
||||
|
||||
def _interpolate_trajectory(self, interp_order, action, i=0):
|
||||
tf = self.dt
|
||||
prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
|
||||
prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
|
||||
prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
|
||||
if interp_order == 1 and action.ndim == 1:
|
||||
coef = np.array([[1, 0], [1, tf]])
|
||||
results = np.vstack([prev_pos, action])
|
||||
elif interp_order == 2 and action.ndim == 1:
|
||||
coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]])
|
||||
if np.linalg.norm(action - prev_pos) < 1e-3:
|
||||
prev_vel = np.zeros_like(prev_vel)
|
||||
results = np.vstack([prev_pos, action, prev_vel])
|
||||
elif interp_order == 3 and action.shape[0] == 2:
|
||||
coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]])
|
||||
results = np.vstack([prev_pos, action[0], prev_vel, action[1]])
|
||||
elif interp_order == 4 and action.shape[0] == 2:
|
||||
coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4],
|
||||
[0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3],
|
||||
[0, 0, 2, 0, 0]])
|
||||
results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc])
|
||||
elif interp_order == 5 and action.shape[0] == 3:
|
||||
coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5],
|
||||
[0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4],
|
||||
[0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]])
|
||||
results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]])
|
||||
elif interp_order == -1:
|
||||
# Interpolate position and velocity linearly
|
||||
pass
|
||||
else:
|
||||
raise ValueError("Undefined interpolator order or the action dimension does not match!")
|
||||
|
||||
if interp_order > 0:
|
||||
A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints)
|
||||
y = results.reshape(-2, order='F')
|
||||
weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1)
|
||||
weights_d = np.polynomial.polynomial.polyder(weights, axis=1)
|
||||
weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
|
||||
elif interp_order == -1:
|
||||
weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T
|
||||
weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T
|
||||
weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
|
||||
|
||||
if interp_order in [3, 4, 5]:
|
||||
self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep
|
||||
else:
|
||||
self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf
|
||||
|
||||
self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T)
|
||||
self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T)
|
||||
self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T)
|
||||
|
||||
for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps):
|
||||
q = np.polynomial.polynomial.polyval(t, weights.T)
|
||||
qd = np.polynomial.polynomial.polyval(t, weights_d.T)
|
||||
qdd = np.polynomial.polynomial.polyval(t, weights_dd.T)
|
||||
yield q, qd, qdd
|
||||
|
||||
def reset(self, obs=None):
|
||||
obs = super(PositionControl, self).reset(obs)
|
||||
self.prev_pos = self._data.qpos[self.actuator_joint_ids]
|
||||
self.prev_vel = self._data.qvel[self.actuator_joint_ids]
|
||||
self.prev_acc = np.zeros(len(self.actuator_joint_ids))
|
||||
self.i_error = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids]
|
||||
|
||||
if self.debug:
|
||||
self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
|
||||
return obs
|
||||
|
||||
def _step_init(self, obs, action):
|
||||
super(PositionControl, self)._step_init(obs, action)
|
||||
|
||||
if self.n_agents == 1:
|
||||
self.traj = self._create_traj(self.interp_order[0], action)
|
||||
else:
|
||||
def _traj():
|
||||
traj_1 = self._create_traj(self.interp_order[0], action[0], 0)
|
||||
traj_2 = self._create_traj(self.interp_order[1], action[1], 1)
|
||||
|
||||
for a1, a2 in zip(traj_1, traj_2):
|
||||
yield np.hstack([a1, a2])
|
||||
|
||||
self.traj = _traj()
|
||||
|
||||
def _create_traj(self, interp_order, action, i=0):
|
||||
if interp_order is None:
|
||||
return iter(action)
|
||||
return self._interpolate_trajectory(interp_order, action, i)
|
||||
|
||||
def _compute_action(self, obs, action):
|
||||
cur_pos, cur_vel = self.get_joints(obs)
|
||||
|
||||
desired_pos, desired_vel, desired_acc = next(self.traj)
|
||||
|
||||
return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel)
|
||||
|
||||
def _preprocess_action(self, action):
|
||||
action = super(PositionControl, self)._preprocess_action(action)
|
||||
|
||||
if self.n_agents == 1:
|
||||
assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \
|
||||
f" {action.shape}"
|
||||
else:
|
||||
for i in range(self.n_agents):
|
||||
assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \
|
||||
f" {action[i].shape}"
|
||||
|
||||
return action
|
||||
|
||||
|
||||
class PositionControlIIWA(PositionControl):
|
||||
def __init__(self, *args, **kwargs):
|
||||
p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.]
|
||||
d_gain = [60, 80, 60, 30, 10, 1, 0.5]
|
||||
i_gain = [0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
|
||||
|
||||
|
||||
class PositionControlPlanar(PositionControl):
|
||||
def __init__(self, *args, **kwargs):
|
||||
p_gain = [960, 480, 240]
|
||||
d_gain = [60, 20, 4]
|
||||
i_gain = [0, 0, 0]
|
||||
super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
|
||||
|
||||
|
||||
class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit):
|
||||
pass
|
||||
|
||||
|
||||
class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
|
||||
pass
|
||||
|
||||
|
||||
class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
|
||||
pass
|
||||
|
||||
class IiwaPositionHitAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyHitAirhocKIT2023):
|
||||
pass
|
||||
|
||||
class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
|
||||
pass
|
||||
|
||||
class IiwaPositionDefendAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyDefendAirhocKIT2023):
|
||||
pass
|
||||
|
||||
class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
|
||||
pass
|
4
fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
Normal file
4
fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from .env_base import AirHockeyBase
|
||||
from .tournament import AirHockeyTournament
|
||||
from .hit import AirHockeyHit, AirHockeyHitAirhocKIT2023
|
||||
from .defend import AirHockeyDefend, AirHockeyDefendAirhocKIT2023
|
114
fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
Normal file
114
fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
Normal file
@ -0,0 +1,114 @@
|
||||
import numpy as np
|
||||
from gymnasium import spaces
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian
|
||||
|
||||
class AirhocKIT2023BaseEnv(AirHockeySingle):
|
||||
def __init__(self, noise=False, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
obs_low = np.hstack([[-np.inf] * 37])
|
||||
obs_high = np.hstack([[np.inf] * 37])
|
||||
self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64)
|
||||
self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6))
|
||||
self.noise = noise
|
||||
|
||||
# We don't need puck yaw observations
|
||||
def filter_obs(self, obs):
|
||||
obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]])
|
||||
return obs
|
||||
|
||||
def add_noise(self, obs):
|
||||
if not self.noise:
|
||||
return
|
||||
obs[self.env_info["puck_pos_ids"]] += np.random.normal(0, 0.001, 3)
|
||||
obs[self.env_info["puck_vel_ids"]] += np.random.normal(0, 0.1, 3)
|
||||
|
||||
def reset(self):
|
||||
self.last_acceleration = np.repeat(0., 6)
|
||||
obs = super().reset()
|
||||
self.add_noise(obs)
|
||||
self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1]
|
||||
self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1]
|
||||
|
||||
self.last_planned_world_pos = self._fk(self.interp_pos)
|
||||
obs = np.hstack([
|
||||
obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
|
||||
])
|
||||
return self.filter_obs(obs)
|
||||
|
||||
def step(self, action):
|
||||
action /= 10
|
||||
|
||||
new_vel = self.interp_vel + action
|
||||
|
||||
jerk = 2 * (new_vel - self.interp_vel - self.last_acceleration * 0.02) / (0.02 ** 2)
|
||||
new_pos = self.interp_pos + self.interp_vel * 0.02 + (1/2) * self.last_acceleration * (0.02 ** 2) + (1/6) * jerk * (0.02 ** 3)
|
||||
abs_action = np.vstack([np.hstack([new_pos, 0]), np.hstack([new_vel, 0])])
|
||||
|
||||
self.interp_pos = new_pos
|
||||
self.interp_vel = new_vel
|
||||
self.last_acceleration += jerk * 0.02
|
||||
|
||||
obs, rew, done, info = super().step(abs_action)
|
||||
self.add_noise(obs)
|
||||
self.last_planned_world_pos = self._fk(self.interp_pos)
|
||||
obs = np.hstack([
|
||||
obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos
|
||||
])
|
||||
|
||||
fatal_rew = self.check_fatal(obs)
|
||||
if fatal_rew != 0:
|
||||
return self.filter_obs(obs), fatal_rew, True, info
|
||||
|
||||
return self.filter_obs(obs), rew, done, info
|
||||
|
||||
def check_constraints(self, constraint_values):
|
||||
fatal_rew = 0
|
||||
|
||||
j_pos_constr = constraint_values["joint_pos_constr"]
|
||||
if j_pos_constr.max() > 0:
|
||||
fatal_rew += j_pos_constr.max()
|
||||
|
||||
j_vel_constr = constraint_values["joint_vel_constr"]
|
||||
if j_vel_constr.max() > 0:
|
||||
fatal_rew += j_vel_constr.max()
|
||||
|
||||
ee_constr = constraint_values["ee_constr"]
|
||||
if ee_constr.max() > 0:
|
||||
fatal_rew += ee_constr.max()
|
||||
|
||||
link_constr = constraint_values["link_constr"]
|
||||
if link_constr.max() > 0:
|
||||
fatal_rew += link_constr.max()
|
||||
|
||||
return -fatal_rew
|
||||
|
||||
def check_fatal(self, obs):
|
||||
fatal_rew = 0
|
||||
|
||||
q = obs[self.env_info["joint_pos_ids"]]
|
||||
qd = obs[self.env_info["joint_vel_ids"]]
|
||||
constraint_values_obs = self.env_info["constraints"].fun(q, qd)
|
||||
fatal_rew += self.check_constraints(constraint_values_obs)
|
||||
|
||||
return -fatal_rew
|
||||
|
||||
def _fk(self, pos):
|
||||
res, _ = forward_kinematics(self.env_info["robot"]["robot_model"],
|
||||
self.env_info["robot"]["robot_data"], pos)
|
||||
return res.astype(np.float32)
|
||||
|
||||
def _ik(self, world_pos, init_q=None):
|
||||
success, pos = inverse_kinematics(self.env_info["robot"]["robot_model"],
|
||||
self.env_info["robot"]["robot_data"],
|
||||
world_pos,
|
||||
initial_q=init_q)
|
||||
pos = pos.astype(np.float32)
|
||||
assert success
|
||||
return pos
|
||||
|
||||
def _jacobian(self, pos):
|
||||
return jacobian(self.env_info["robot"]["robot_model"],
|
||||
self.env_info["robot"]["robot_data"],
|
||||
pos).astype(np.float32)
|
166
fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
Normal file
166
fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
Normal file
@ -0,0 +1,166 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
|
||||
|
||||
|
||||
class AirHockeyDefend(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey defending task.
|
||||
The agent should stop the puck at the line x=-0.6.
|
||||
"""
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
self.init_velocity_range = (1, 3)
|
||||
self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
def setup(self, obs):
|
||||
puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
|
||||
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-0.5, 0.5)
|
||||
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-10, 10)
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super().setup(obs)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, state):
|
||||
puck_pos, puck_vel = self.get_puck(state)
|
||||
# If puck is over the middle line and moving towards opponent
|
||||
if puck_pos[0] > 0 and puck_vel[0] > 0:
|
||||
return True
|
||||
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||
return True
|
||||
return super().is_absorbing(state)
|
||||
|
||||
class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv):
|
||||
def __init__(self, gamma=0.99, horizon=200, viewer_params={}, **kwargs):
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
|
||||
self.init_velocity_range = (1, 3)
|
||||
self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]]) # Table Frame
|
||||
self._setup_metrics()
|
||||
|
||||
def setup(self, obs):
|
||||
self._setup_metrics()
|
||||
puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
|
||||
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-0.5, 0.5)
|
||||
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-10, 10)
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super().setup(obs)
|
||||
|
||||
def reset(self, *args):
|
||||
obs = super().reset()
|
||||
self.hit_step_flag = False
|
||||
self.hit_step = False
|
||||
self.received_hit_reward = False
|
||||
self.give_reward_next = False
|
||||
return obs
|
||||
|
||||
def _setup_metrics(self):
|
||||
self.episode_steps = 0
|
||||
self.has_hit = False
|
||||
|
||||
def _simulation_post_step(self):
|
||||
if not self.has_hit:
|
||||
self.has_hit = self._check_collision("puck", "robot_1/ee")
|
||||
|
||||
super()._simulation_post_step()
|
||||
|
||||
def _step_finalize(self):
|
||||
self.episode_steps += 1
|
||||
return super()._step_finalize()
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
puck_pos, puck_vel = self.get_puck(next_state)
|
||||
ee_pos, _ = self.get_ee()
|
||||
rew = 0.01
|
||||
if -0.7 < puck_pos[0] <= -0.2 and np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||
assert absorbing
|
||||
rew += 70
|
||||
|
||||
if self.has_hit and not self.hit_step_flag:
|
||||
self.hit_step_flag = True
|
||||
self.hit_step = True
|
||||
else:
|
||||
self.hit_step = False
|
||||
|
||||
f = lambda puck_vel: 30 + 100 * (100 ** (-0.25 * np.linalg.norm(puck_vel[:2])))
|
||||
if not self.give_reward_next and not self.received_hit_reward and self.hit_step and ee_pos[0] < puck_pos[0]:
|
||||
self.hit_this_step = True
|
||||
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||
return rew + f(puck_vel)
|
||||
self.give_reward_next = True
|
||||
return rew
|
||||
|
||||
if not self.received_hit_reward and self.give_reward_next:
|
||||
self.received_hit_reward = True
|
||||
if puck_vel[0] >= -0.2:
|
||||
return rew + f(puck_vel)
|
||||
return rew
|
||||
else:
|
||||
return rew
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
# If puck is over the middle line and moving towards opponent
|
||||
if puck_pos[0] > 0 and puck_vel[0] > 0:
|
||||
return True
|
||||
|
||||
if self.episode_steps == self._mdp_info.horizon:
|
||||
return True
|
||||
|
||||
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||
return True
|
||||
return super().is_absorbing(obs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyDefend()
|
||||
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
||||
env.render()
|
||||
while True:
|
||||
# action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8
|
||||
action = np.zeros(7)
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
print(observation)
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
258
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
Normal file
258
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
Normal file
@ -0,0 +1,258 @@
|
||||
import os
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin
|
||||
from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
|
||||
from mushroom_rl.utils.spaces import Box
|
||||
|
||||
|
||||
"""
|
||||
Abstract class for all AirHockey Environments.
|
||||
|
||||
"""
|
||||
class AirHockeyBase(MuJoCo):
|
||||
def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
|
||||
n_agents=1, viewer_params={}):
|
||||
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
Args:
|
||||
n_agents (int, 1): number of agent to be used in the environment (one or two)
|
||||
"""
|
||||
|
||||
self.n_agents = n_agents
|
||||
|
||||
action_spec = []
|
||||
observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
collision_spec = [("puck", ["puck"]),
|
||||
("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
|
||||
("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
|
||||
|
||||
if 1 <= self.n_agents <= 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
|
||||
|
||||
action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
|
||||
"iiwa_1/joint_6", "iiwa_1/joint_7"]
|
||||
|
||||
observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS),
|
||||
("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL),
|
||||
("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)]
|
||||
|
||||
collision_spec += [("robot_1/ee", ["iiwa_1/ee"])]
|
||||
|
||||
if self.n_agents == 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
|
||||
|
||||
observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)]
|
||||
|
||||
action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4",
|
||||
"iiwa_2/joint_5",
|
||||
"iiwa_2/joint_6", "iiwa_2/joint_7"]
|
||||
|
||||
observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)]
|
||||
|
||||
observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)]
|
||||
|
||||
additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS),
|
||||
("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL),
|
||||
("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)]
|
||||
|
||||
collision_spec += [("robot_2/ee", ["iiwa_2/ee"])]
|
||||
else:
|
||||
raise ValueError('n_agents should be 1 or 2')
|
||||
|
||||
self.env_info = dict()
|
||||
self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
|
||||
self.env_info['puck'] = {"radius": 0.03165}
|
||||
self.env_info['mallet'] = {"radius": 0.04815}
|
||||
self.env_info['n_agents'] = self.n_agents
|
||||
self.env_info['robot'] = {
|
||||
"n_joints": 7,
|
||||
"ee_desired_height": 0.1645,
|
||||
"joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
|
||||
[85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi,
|
||||
"joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
|
||||
[85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10,
|
||||
"base_frame": [],
|
||||
"universal_height": 0.0645,
|
||||
"control_frequency": 50,
|
||||
}
|
||||
|
||||
self.env_info['puck_pos_ids'] = [0, 1, 2]
|
||||
self.env_info['puck_vel_ids'] = [3, 4, 5]
|
||||
self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12]
|
||||
self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19]
|
||||
if self.n_agents == 2:
|
||||
self.env_info['opponent_ee_ids'] = [20, 21, 22]
|
||||
else:
|
||||
self.env_info['opponent_ee_ids'] = []
|
||||
|
||||
max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents
|
||||
|
||||
super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
|
||||
n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
|
||||
|
||||
# Construct the mujoco model at origin
|
||||
robot_model = mujoco.MjModel.from_xml_path(
|
||||
os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml"))
|
||||
robot_model.body('iiwa_1/base').pos = np.zeros(3)
|
||||
robot_data = mujoco.MjData(robot_model)
|
||||
|
||||
# Add env_info that requires mujoco models
|
||||
self.env_info['dt'] = self.dt
|
||||
self.env_info["robot"]["joint_pos_limit"] = np.array(
|
||||
[self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T
|
||||
self.env_info["robot"]["robot_model"] = robot_model
|
||||
self.env_info["robot"]["robot_data"] = robot_data
|
||||
self.env_info["rl_info"] = self.info
|
||||
|
||||
frame_T = np.eye(4)
|
||||
temp = np.zeros((9, 1))
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("iiwa_1/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
if self.n_agents == 2:
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("iiwa_2/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
# Ids of the joint, which are controller by the action space
|
||||
self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
|
||||
|
||||
self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info)
|
||||
|
||||
def _modify_mdp_info(self, mdp_info):
|
||||
obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
|
||||
*np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][0]])
|
||||
obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
|
||||
*np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][1]])
|
||||
if self.n_agents == 2:
|
||||
obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
|
||||
obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
|
||||
mdp_info.observation_space = Box(obs_low, obs_high)
|
||||
return mdp_info
|
||||
|
||||
def _simulation_pre_step(self):
|
||||
self.universal_joint_plugin.update()
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
|
||||
if type == 'pose':
|
||||
puck_w = np.eye(4)
|
||||
puck_w[:2, 3] = puck_in[:2]
|
||||
puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
|
||||
|
||||
puck_r = np.linalg.inv(robot_frame) @ puck_w
|
||||
puck_out = np.concatenate([puck_r[:2, 3],
|
||||
R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
|
||||
|
||||
if type == 'vel':
|
||||
rot_mat = robot_frame[:3, :3]
|
||||
|
||||
vel_lin = np.array([*puck_in[:2], 0])
|
||||
vel_ang = np.array([0., 0., puck_in[2]])
|
||||
|
||||
vel_lin_r = rot_mat.T @ vel_lin
|
||||
vel_ang_r = rot_mat.T @ vel_ang
|
||||
|
||||
puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
|
||||
return puck_out
|
||||
|
||||
def get_puck(self, obs):
|
||||
"""
|
||||
Getting the puck properties from the observations
|
||||
Args:
|
||||
obs: The current observation
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
|
||||
|
||||
"""
|
||||
puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
|
||||
puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
|
||||
return puck_pos, puck_vel
|
||||
|
||||
def get_ee(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_joints(self, obs):
|
||||
raise NotImplementedError
|
171
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
Normal file
171
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
Normal file
@ -0,0 +1,171 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
|
||||
|
||||
class AirHockeyDouble(AirHockeyBase):
|
||||
"""
|
||||
Base class for two agents air hockey tasks.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params)
|
||||
|
||||
self._compute_init_state()
|
||||
|
||||
self.filter_ratio = 0.274
|
||||
self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
|
||||
self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
|
||||
|
||||
def _compute_init_state(self):
|
||||
init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
|
||||
|
||||
success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
|
||||
self.env_info['robot']['robot_data'],
|
||||
np.array([0.65, 0., 0.1645]),
|
||||
R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
|
||||
initial_q=init_state)
|
||||
|
||||
assert success is True
|
||||
|
||||
def get_ee(self, robot=1):
|
||||
"""
|
||||
Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics
|
||||
on the current joint position, this function exists to avoid redundant computations.
|
||||
Args:
|
||||
robot: ID of robot, either 1 or 2
|
||||
|
||||
Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
|
||||
"""
|
||||
ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos")
|
||||
|
||||
ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel")
|
||||
|
||||
return ee_pos, ee_vel
|
||||
|
||||
def get_joints(self, obs, agent=None):
|
||||
"""
|
||||
Get joint position and velocity of the robots
|
||||
Can choose the robot with agent = 1 / 2. If agent is None both are returned
|
||||
"""
|
||||
if agent:
|
||||
q_pos = np.zeros(7)
|
||||
q_vel = np.zeros(7)
|
||||
for i in range(7):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0]
|
||||
else:
|
||||
q_pos = np.zeros(14)
|
||||
q_vel = np.zeros(14)
|
||||
for i in range(7):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
return q_pos, q_vel
|
||||
|
||||
def _create_observation(self, obs):
|
||||
# Filter the joint velocity
|
||||
q_pos, q_vel = self.get_joints(obs)
|
||||
q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
|
||||
self.q_pos_prev = q_pos
|
||||
self.q_vel_prev = q_vel_filter
|
||||
|
||||
for i in range(7):
|
||||
self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
|
||||
self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7]
|
||||
|
||||
# Wrap puck's rotation angle to [-pi, pi)
|
||||
yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
|
||||
return obs
|
||||
|
||||
def _modify_observation(self, obs):
|
||||
new_obs = obs.copy()
|
||||
|
||||
puck_pos, puck_vel = self.get_puck(new_obs)
|
||||
|
||||
puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
|
||||
puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2]
|
||||
|
||||
opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')
|
||||
self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \
|
||||
(np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3]
|
||||
|
||||
puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1])
|
||||
puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2]
|
||||
|
||||
opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')
|
||||
self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \
|
||||
(np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3]
|
||||
|
||||
return new_obs
|
||||
|
||||
def setup(self, obs):
|
||||
for i in range(7):
|
||||
self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
|
||||
self.q_pos_prev[i] = self.init_state[i]
|
||||
self.q_pos_prev[i + 7] = self.init_state[i]
|
||||
self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
|
||||
self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0]
|
||||
|
||||
self.universal_joint_plugin.reset()
|
||||
|
||||
super().setup(obs)
|
||||
# Update body positions, needed for _compute_universal_joint
|
||||
mujoco.mj_fwdPosition(self._model, self._data)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
env = AirHockeyDouble(viewer_params={'start_paused': True})
|
||||
env.reset()
|
||||
env.render()
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(14)
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
104
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
Normal file
104
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
Normal file
@ -0,0 +1,104 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
|
||||
|
||||
class AirHockeySingle(AirHockeyBase):
|
||||
"""
|
||||
Base class for single agent air hockey tasks.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
|
||||
|
||||
self._compute_init_state()
|
||||
|
||||
self.filter_ratio = 0.274
|
||||
self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
|
||||
def _compute_init_state(self):
|
||||
init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
|
||||
|
||||
success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
|
||||
self.env_info['robot']['robot_data'],
|
||||
np.array([0.65, 0., 0.1645]),
|
||||
R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
|
||||
initial_q=init_state)
|
||||
|
||||
assert success is True
|
||||
|
||||
def get_ee(self):
|
||||
"""
|
||||
Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
|
||||
on the current joint position, this function exists to avoid redundant computations.
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
|
||||
"""
|
||||
ee_pos = self._read_data("robot_1/ee_pos")
|
||||
|
||||
ee_vel = self._read_data("robot_1/ee_vel")
|
||||
|
||||
return ee_pos, ee_vel
|
||||
|
||||
def get_joints(self, obs):
|
||||
"""
|
||||
Get joint position and velocity of the robot
|
||||
"""
|
||||
q_pos = np.zeros(7)
|
||||
q_vel = np.zeros(7)
|
||||
for i in range(7):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
return q_pos, q_vel
|
||||
|
||||
def _create_observation(self, obs):
|
||||
# Filter the joint velocity
|
||||
q_pos, q_vel = self.get_joints(obs)
|
||||
q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
|
||||
self.q_pos_prev = q_pos
|
||||
self.q_vel_prev = q_vel_filter
|
||||
|
||||
for i in range(7):
|
||||
self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
|
||||
|
||||
yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
|
||||
return obs
|
||||
|
||||
def _modify_observation(self, obs):
|
||||
new_obs = obs.copy()
|
||||
puck_pos, puck_vel = self.get_puck(new_obs)
|
||||
|
||||
puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
|
||||
|
||||
puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
|
||||
|
||||
return new_obs
|
||||
|
||||
def setup(self, obs):
|
||||
for i in range(7):
|
||||
self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
self.q_pos_prev[i] = self.init_state[i]
|
||||
self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
|
||||
|
||||
self.universal_joint_plugin.reset()
|
||||
|
||||
super().setup(obs)
|
||||
|
||||
# Update body positions, needed for _compute_universal_joint
|
||||
mujoco.mj_fwdPosition(self._model, self._data)
|
159
fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
Normal file
159
fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
Normal file
@ -0,0 +1,159 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv
|
||||
|
||||
|
||||
class AirHockeyHit(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey hitting task.
|
||||
"""
|
||||
def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}):
|
||||
"""
|
||||
Constructor
|
||||
Args:
|
||||
opponent_agent(Agent, None): Agent which controls the opponent
|
||||
moving_init(bool, False): If true, initialize the puck with inital velocity.
|
||||
"""
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
self.moving_init = moving_init
|
||||
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||
self.env_info['mallet']['radius'] * 2
|
||||
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||
self.init_velocity_range = (0, 0.5) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
|
||||
|
||||
def setup(self, obs):
|
||||
# Initial position of the puck
|
||||
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
|
||||
if self.moving_init:
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-2, 2)
|
||||
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super(AirHockeyHit, self).setup(obs)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
# Stop if the puck bounces back on the opponents wall
|
||||
if puck_pos[0] > 0 and puck_vel[0] < 0:
|
||||
return True
|
||||
return super(AirHockeyHit, self).is_absorbing(obs)
|
||||
|
||||
class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv):
|
||||
def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}, **kwargs):
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs)
|
||||
|
||||
self.moving_init = moving_init
|
||||
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||
self.env_info['mallet']['radius'] * 2
|
||||
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||
self.init_velocity_range = (0, 0.5) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
self._setup_metrics()
|
||||
|
||||
def reset(self, *args):
|
||||
obs = super().reset()
|
||||
self.last_ee_pos = self.last_planned_world_pos.copy()
|
||||
self.last_ee_pos[0] -= 1.51
|
||||
return obs
|
||||
|
||||
def setup(self, obs):
|
||||
self._setup_metrics()
|
||||
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
|
||||
if self.moving_init:
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-2, 2)
|
||||
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super().setup(obs)
|
||||
|
||||
def _setup_metrics(self):
|
||||
self.episode_steps = 0
|
||||
self.has_scored = False
|
||||
|
||||
def _step_finalize(self):
|
||||
cur_obs = self._create_observation(self.obs_helper._build_obs(self._data))
|
||||
puck_pos, _ = self.get_puck(cur_obs) # world frame [x, y, z] and [x', y', z']
|
||||
|
||||
if not self.has_scored:
|
||||
boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
|
||||
self.has_scored = np.any(np.abs(puck_pos[:2]) > boundary) and puck_pos[0] > 0
|
||||
|
||||
self.episode_steps += 1
|
||||
return super()._step_finalize()
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
rew = 0
|
||||
puck_pos, puck_vel = self.get_puck(next_state)
|
||||
ee_pos, _ = self.get_ee()
|
||||
ee_vel = (ee_pos - self.last_ee_pos) / 0.02
|
||||
self.last_ee_pos = ee_pos
|
||||
|
||||
if puck_vel[0] < 0.25 and puck_pos[0] < 0:
|
||||
ee_puck_dir = (puck_pos - ee_pos)[:2]
|
||||
ee_puck_dir = ee_puck_dir / np.linalg.norm(ee_puck_dir)
|
||||
rew += 1 * max(0, np.dot(ee_puck_dir, ee_vel[:2]))
|
||||
else:
|
||||
rew += 10 * np.linalg.norm(puck_vel[:2])
|
||||
|
||||
if self.has_scored:
|
||||
rew += 2000 + 5000 * np.linalg.norm(puck_vel[:2])
|
||||
|
||||
return rew
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
# Stop if the puck bounces back on the opponents wall
|
||||
if puck_pos[0] > 0 and puck_vel[0] < 0:
|
||||
return True
|
||||
|
||||
if self.has_scored:
|
||||
return True
|
||||
|
||||
if self.episode_steps == self._mdp_info.horizon:
|
||||
return True
|
||||
|
||||
return super().is_absorbing(obs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyHit(moving_init=True)
|
||||
env.reset()
|
||||
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(7)
|
||||
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
if done or steps > env.info.horizon:
|
||||
steps = 0
|
||||
env.reset()
|
111
fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
Normal file
111
fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
Normal file
@ -0,0 +1,111 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble
|
||||
|
||||
|
||||
class AirHockeyTournament(AirHockeyDouble):
|
||||
"""
|
||||
Class for the air hockey tournament. Consists of 2 robots which should play against each other.
|
||||
When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty.
|
||||
If a player accumulates 3 penalties his score is reduced by 1.
|
||||
"""
|
||||
def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"):
|
||||
self.agent_name = agent_name
|
||||
self.opponent_name = opponent_name
|
||||
|
||||
self.score = [0, 0]
|
||||
self.faults = [0, 0]
|
||||
self.start_side = None
|
||||
|
||||
self.timer = 0
|
||||
|
||||
def custom_render_callback(viewport, context):
|
||||
names = f"Agents \nScores \nFaults "
|
||||
data = f"{self.agent_name} - {self.opponent_name}\n "
|
||||
data += f"{self.score[0]} - {self.score[1]}\n "
|
||||
data += f"{self.faults[0]} - {self.faults[1]}"
|
||||
mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context)
|
||||
|
||||
viewer_params["custom_render_callback"] = custom_render_callback
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||
self.env_info['mallet']['radius'] * 2
|
||||
|
||||
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||
|
||||
def setup(self, obs):
|
||||
if self.start_side == None:
|
||||
self.start_side = np.random.choice([1, -1])
|
||||
self.prev_side = self.start_side
|
||||
|
||||
# Initial position of the puck
|
||||
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0] * self.start_side)
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
|
||||
self.prev_side = self.start_side
|
||||
self.timer = 0
|
||||
|
||||
super(AirHockeyTournament, self).setup(obs)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
# Puck stuck on one side for more than 15s
|
||||
if np.sign(puck_pos[0]) == self.prev_side:
|
||||
self.timer += self.dt
|
||||
else:
|
||||
self.prev_side *= -1
|
||||
self.timer = 0
|
||||
|
||||
if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15:
|
||||
if self.prev_side == -1:
|
||||
self.faults[0] += 1
|
||||
self.start_side = -1
|
||||
if self.faults[0] % 3 == 0:
|
||||
self.score[1] += 1
|
||||
else:
|
||||
self.faults[1] += 1
|
||||
self.start_side = 1
|
||||
if self.faults[1] % 3 == 0:
|
||||
self.score[0] += 1
|
||||
|
||||
return True
|
||||
|
||||
# Puck in Goal
|
||||
if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0:
|
||||
if puck_pos[0] > self.env_info['table']['length'] / 2:
|
||||
self.score[0] += 1
|
||||
self.start_side = -1
|
||||
return True
|
||||
|
||||
if puck_pos[0] < -self.env_info['table']['length'] / 2:
|
||||
self.score[1] += 1
|
||||
self.start_side = 1
|
||||
return True
|
||||
|
||||
# Puck stuck in the middle
|
||||
if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025:
|
||||
return True
|
||||
return super(AirHockeyTournament, self).is_absorbing(obs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyTournament()
|
||||
env.reset()
|
||||
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(14)
|
||||
steps += 1
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
if done or steps > env.info.horizon:
|
||||
steps = 0
|
||||
env.reset()
|
3
fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
Normal file
3
fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .env_base import AirHockeyBase
|
||||
from .defend import AirHockeyDefend
|
||||
from .hit import AirHockeyHit
|
76
fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
Normal file
76
fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
Normal file
@ -0,0 +1,76 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
|
||||
|
||||
|
||||
class AirHockeyDefend(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey defending task.
|
||||
The agent should stop the puck at the line x=-0.6.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
self.init_velocity_range = (1, 3)
|
||||
|
||||
self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
def setup(self, state=None):
|
||||
# Set initial puck parameters
|
||||
puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
|
||||
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-0.5, 0.5)
|
||||
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-10, 10)
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super(AirHockeyDefend, self).setup(state)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, state):
|
||||
puck_pos, puck_vel = self.get_puck(state)
|
||||
# If puck is over the middle line and moving towards opponent
|
||||
if puck_pos[0] > 0 and puck_vel[0] > 0:
|
||||
return True
|
||||
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||
return True
|
||||
return super().is_absorbing(state)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyDefend()
|
||||
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
||||
while True:
|
||||
action = np.zeros(3)
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
224
fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
Normal file
224
fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
Normal file
@ -0,0 +1,224 @@
|
||||
import os
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path
|
||||
from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
|
||||
from mushroom_rl.utils.spaces import Box
|
||||
|
||||
|
||||
class AirHockeyBase(MuJoCo):
|
||||
"""
|
||||
Abstract class for all AirHockey Environments.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
|
||||
n_agents=1, viewer_params={}):
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
Args:
|
||||
n_agents (int, 1): number of agent to be used in the environment (one or two)
|
||||
"""
|
||||
|
||||
self.n_agents = n_agents
|
||||
|
||||
action_spec = []
|
||||
|
||||
observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
collision_spec = [("puck", ["puck"]),
|
||||
("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
|
||||
("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
|
||||
|
||||
if 1 <= self.n_agents <= 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
|
||||
|
||||
action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"]
|
||||
|
||||
observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS),
|
||||
("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)]
|
||||
|
||||
collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])]
|
||||
|
||||
if self.n_agents == 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
|
||||
|
||||
observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)]
|
||||
|
||||
action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"]
|
||||
# Add puck pos/vel again to transform into second agents frame
|
||||
observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)]
|
||||
|
||||
observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)]
|
||||
|
||||
additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS),
|
||||
("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)]
|
||||
|
||||
collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])]
|
||||
else:
|
||||
raise ValueError('n_agents should be 1 or 2')
|
||||
|
||||
self.env_info = dict()
|
||||
self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
|
||||
self.env_info['puck'] = {"radius": 0.03165}
|
||||
self.env_info['mallet'] = {"radius": 0.04815}
|
||||
self.env_info['n_agents'] = self.n_agents
|
||||
self.env_info['robot'] = {
|
||||
"n_joints": 3,
|
||||
"ee_desired_height": 0.1,
|
||||
"joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3],
|
||||
[np.pi / 2, np.pi / 2, np.pi * 2 / 3]]),
|
||||
|
||||
"joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi],
|
||||
[2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]),
|
||||
"base_frame": [],
|
||||
"control_frequency": 50,
|
||||
}
|
||||
|
||||
self.env_info['puck_pos_ids'] = [0, 1, 2]
|
||||
self.env_info['puck_vel_ids'] = [3, 4, 5]
|
||||
self.env_info['joint_pos_ids'] = [6, 7, 8]
|
||||
self.env_info['joint_vel_ids'] = [9, 10, 11]
|
||||
if self.n_agents == 2:
|
||||
self.env_info['opponent_ee_ids'] = [13, 14, 15]
|
||||
else:
|
||||
self.env_info['opponent_ee_ids'] = []
|
||||
|
||||
max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents
|
||||
|
||||
super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
|
||||
n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
|
||||
|
||||
# Construct the mujoco model at origin
|
||||
robot_model = mujoco.MjModel.from_xml_path(
|
||||
os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml"))
|
||||
robot_model.body('planar_robot_1/base').pos = np.zeros(3)
|
||||
robot_data = mujoco.MjData(robot_model)
|
||||
|
||||
# Add env_info that requires mujoco models
|
||||
self.env_info['dt'] = self.dt
|
||||
self.env_info["robot"]["joint_pos_limit"] = np.array(
|
||||
[self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T
|
||||
self.env_info["robot"]["robot_model"] = robot_model
|
||||
self.env_info["robot"]["robot_data"] = robot_data
|
||||
self.env_info["rl_info"] = self.info
|
||||
|
||||
frame_T = np.eye(4)
|
||||
temp = np.zeros((9, 1))
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
if self.n_agents == 2:
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
# Ids of the joint, which are controller by the action space
|
||||
self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
|
||||
|
||||
def _modify_mdp_info(self, mdp_info):
|
||||
obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
|
||||
*np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][0]])
|
||||
obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
|
||||
*np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][1]])
|
||||
if self.n_agents == 2:
|
||||
obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
|
||||
obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
|
||||
mdp_info.observation_space = Box(obs_low, obs_high)
|
||||
return mdp_info
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
|
||||
if type == 'pose':
|
||||
puck_w = np.eye(4)
|
||||
puck_w[:2, 3] = puck_in[:2]
|
||||
puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
|
||||
|
||||
puck_r = np.linalg.inv(robot_frame) @ puck_w
|
||||
puck_out = np.concatenate([puck_r[:2, 3],
|
||||
R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
|
||||
|
||||
if type == 'vel':
|
||||
rot_mat = robot_frame[:3, :3]
|
||||
|
||||
vel_lin = np.array([*puck_in[:2], 0])
|
||||
vel_ang = np.array([0., 0., puck_in[2]])
|
||||
|
||||
vel_lin_r = rot_mat.T @ vel_lin
|
||||
vel_ang_r = rot_mat.T @ vel_ang
|
||||
|
||||
puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
|
||||
return puck_out
|
||||
|
||||
def get_puck(self, obs):
|
||||
"""
|
||||
Getting the puck properties from the observations
|
||||
Args:
|
||||
obs: The current observation
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
|
||||
|
||||
"""
|
||||
puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
|
||||
puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
|
||||
return puck_pos, puck_vel
|
||||
|
||||
def get_ee(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_joints(self, obs):
|
||||
raise NotImplementedError
|
91
fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
Normal file
91
fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
Normal file
@ -0,0 +1,91 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase
|
||||
|
||||
|
||||
class AirHockeySingle(AirHockeyBase):
|
||||
"""
|
||||
Base class for single agent air hockey tasks.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
"""
|
||||
self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414])
|
||||
super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
|
||||
|
||||
self.filter_ratio = 0.274
|
||||
self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
|
||||
def get_ee(self):
|
||||
"""
|
||||
Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
|
||||
on the current joint position, this function exists to avoid redundant computations.
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
|
||||
"""
|
||||
ee_pos = self._read_data("robot_1/ee_pos")
|
||||
|
||||
ee_vel = self._read_data("robot_1/ee_vel")
|
||||
|
||||
return ee_pos, ee_vel
|
||||
|
||||
def get_joints(self, obs):
|
||||
"""
|
||||
Get joint position and velocity of the robot
|
||||
"""
|
||||
q_pos = np.zeros(3)
|
||||
q_vel = np.zeros(3)
|
||||
for i in range(3):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
return q_pos, q_vel
|
||||
|
||||
def _modify_observation(self, obs):
|
||||
new_obs = obs.copy()
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
|
||||
|
||||
puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
|
||||
|
||||
return new_obs
|
||||
|
||||
def setup(self, state=None):
|
||||
for i in range(3):
|
||||
self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
self.q_pos_prev[i] = self.init_state[i]
|
||||
self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0]
|
||||
|
||||
mujoco.mj_fwdPosition(self._model, self._data)
|
||||
super().setup(state)
|
||||
|
||||
def _create_observation(self, obs):
|
||||
# Filter the joint velocity
|
||||
q_pos, q_vel = self.get_joints(obs)
|
||||
q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
|
||||
self.q_pos_prev = q_pos
|
||||
self.q_vel_prev = q_vel_filter
|
||||
|
||||
for i in range(3):
|
||||
self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
|
||||
|
||||
yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
|
||||
return obs
|
84
fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
Normal file
84
fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
Normal file
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
|
||||
|
||||
class AirHockeyHit(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey hitting task.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}):
|
||||
"""
|
||||
Constructor
|
||||
Args:
|
||||
moving_init(bool, False): If true, initialize the puck with inital velocity.
|
||||
"""
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
self.moving_init = moving_init
|
||||
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||
self.env_info['mallet']['radius'] * 2
|
||||
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||
self.init_velocity_range = (0, 0.5) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
|
||||
def setup(self, state=None):
|
||||
# Initial position of the puck
|
||||
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||
|
||||
# self.init_state = np.array([-0.9273, 0.9273, np.pi / 2])
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
|
||||
if self.moving_init:
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-2, 2, 1)
|
||||
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super(AirHockeyHit, self).setup(state)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
# Stop if the puck bounces back on the opponents wall
|
||||
if puck_pos[0] > 0 and puck_vel[0] < 0:
|
||||
return True
|
||||
return super(AirHockeyHit, self).is_absorbing(obs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyHit(moving_init=False)
|
||||
|
||||
env.reset()
|
||||
env.render()
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(3)
|
||||
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
2
fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
Normal file
2
fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .kinematics import inverse_kinematics, forward_kinematics, jacobian
|
||||
from .transformations import robot_to_world, world_to_robot
|
248
fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
Normal file
248
fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
Normal file
@ -0,0 +1,248 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
|
||||
|
||||
def forward_kinematics(mj_model, mj_data, q, link="ee"):
|
||||
"""
|
||||
Compute the forward kinematics of the robots.
|
||||
|
||||
IMPORTANT:
|
||||
For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
|
||||
parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
|
||||
where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
|
||||
case this function will return the wrong values.
|
||||
|
||||
Coordinate System:
|
||||
All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
|
||||
base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
|
||||
coordinate system
|
||||
|
||||
Args:
|
||||
mj_model (mujoco.MjModel):
|
||||
mujoco MjModel of the robot-only model
|
||||
mj_data (mujoco.MjData):
|
||||
mujoco MjData object generated from the model
|
||||
q (np.array):
|
||||
joint configuration for which the forward kinematics are computed
|
||||
link (string, "ee"):
|
||||
Link for which the forward kinematics is calculated. When using the iiwas the choices are
|
||||
["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
|
||||
|
||||
Returns
|
||||
-------
|
||||
position: numpy.ndarray, (3,)
|
||||
Position of the link in robot's base frame
|
||||
orientation: numpy.ndarray, (3, 3)
|
||||
Orientation of the link in robot's base frame
|
||||
"""
|
||||
|
||||
return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
|
||||
|
||||
|
||||
def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"):
|
||||
"""
|
||||
Compute the inverse kinematics of the robots.
|
||||
|
||||
IMPORTANT:
|
||||
For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
|
||||
parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
|
||||
where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is
|
||||
the case this function will return the wrong values.
|
||||
|
||||
Coordinate System:
|
||||
All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
|
||||
base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
|
||||
coordinate system
|
||||
|
||||
Args:
|
||||
mj_model (mujoco.MjModel):
|
||||
mujoco MjModel of the robot-only model
|
||||
mj_data (mujoco.MjData):
|
||||
mujoco MjData object generated from the model
|
||||
desired_position (numpy.ndarray, (3,)):
|
||||
The desired position of the selected link.
|
||||
desired_rotation (optional, numpy.array, (3,3)):
|
||||
The desired rotation of the selected link.
|
||||
initial_q (numpy.ndarray, None):
|
||||
The initial configuration of the algorithm, if set to None it will take the initial configuration of the
|
||||
mj_data.
|
||||
link (str, "ee"):
|
||||
Link for which the inverse kinematics is calculated. When using the iiwas the choices are
|
||||
["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
|
||||
"""
|
||||
q_init = np.zeros(mj_model.nq)
|
||||
if initial_q is None:
|
||||
q_init = mj_data.qpos
|
||||
else:
|
||||
q_init[:initial_q.size] = initial_q
|
||||
|
||||
q_l = mj_model.jnt_range[:, 0]
|
||||
q_h = mj_model.jnt_range[:, 1]
|
||||
lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2
|
||||
upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2
|
||||
|
||||
desired_quat = None
|
||||
if desired_rotation is not None:
|
||||
desired_quat = np.zeros(4)
|
||||
mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1))
|
||||
|
||||
return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model,
|
||||
mj_data, lower_limit, upper_limit)
|
||||
|
||||
|
||||
def jacobian(mj_model, mj_data, q, link="ee"):
|
||||
"""
|
||||
Compute the Jacobian of the robots.
|
||||
|
||||
IMPORTANT:
|
||||
For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
|
||||
parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
|
||||
where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
|
||||
case this function will return the wrong values.
|
||||
|
||||
Coordinate System:
|
||||
All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
|
||||
base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
|
||||
coordinate system
|
||||
|
||||
Args:
|
||||
mj_model (mujoco.MjModel):
|
||||
mujoco MjModel of the robot-only model
|
||||
mj_data (mujoco.MjData):
|
||||
mujoco MjData object generated from the model
|
||||
q (numpy.ndarray):
|
||||
joint configuration for which the forward kinematics are computed
|
||||
link (string, "ee"):
|
||||
Link for which the forward kinematics is calculated. When using the iiwas the choices are
|
||||
["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray, (6, num_joints):
|
||||
The Jacobian matrix for the robot kinematics.
|
||||
"""
|
||||
return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
|
||||
|
||||
|
||||
def link_to_xml_name(mj_model, link):
|
||||
try:
|
||||
mj_model.body('iiwa_1/base')
|
||||
link_to_frame_idx = {
|
||||
"1": "iiwa_1/link_1",
|
||||
"2": "iiwa_1/link_2",
|
||||
"3": "iiwa_1/link_3",
|
||||
"4": "iiwa_1/link_4",
|
||||
"5": "iiwa_1/link_5",
|
||||
"6": "iiwa_1/link_6",
|
||||
"7": "iiwa_1/link_7",
|
||||
"ee": "iiwa_1/striker_joint_link",
|
||||
}
|
||||
except:
|
||||
link_to_frame_idx = {
|
||||
"1": "planar_robot_1/body_1",
|
||||
"2": "planar_robot_1/body_2",
|
||||
"3": "planar_robot_1/body_3",
|
||||
"ee": "planar_robot_1/body_ee",
|
||||
}
|
||||
return link_to_frame_idx[link]
|
||||
|
||||
|
||||
def _mujoco_fk(q, name, model, data):
|
||||
data.qpos[:len(q)] = q
|
||||
mujoco.mj_fwdPosition(model, data)
|
||||
return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy()
|
||||
|
||||
|
||||
def _mujoco_jac(q, name, model, data):
|
||||
data.qpos[:len(q)] = q
|
||||
dtype = data.qpos.dtype
|
||||
jac = np.empty((6, model.nv), dtype=dtype)
|
||||
jac_pos, jac_rot = jac[:3], jac[3:]
|
||||
mujoco.mj_fwdPosition(model, data)
|
||||
mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
|
||||
return jac
|
||||
|
||||
|
||||
def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit):
|
||||
IT_MAX = 1000
|
||||
eps = 1e-4
|
||||
damp = 1e-3
|
||||
progress_thresh = 20.0
|
||||
max_update_norm = 0.1
|
||||
rot_weight = 1
|
||||
i = 0
|
||||
|
||||
dtype = data.qpos.dtype
|
||||
|
||||
data.qpos = initial_q
|
||||
|
||||
neg_x_quat = np.empty(4, dtype=dtype)
|
||||
error_x_quat = np.empty(4, dtype=dtype)
|
||||
|
||||
if desired_pos is not None and desired_quat is not None:
|
||||
jac = np.empty((6, model.nv), dtype=dtype)
|
||||
err = np.empty(6, dtype=dtype)
|
||||
jac_pos, jac_rot = jac[:3], jac[3:]
|
||||
err_pos, err_rot = err[:3], err[3:]
|
||||
else:
|
||||
jac = np.empty((3, model.nv), dtype=dtype)
|
||||
err = np.empty(3, dtype=dtype)
|
||||
if desired_pos is not None:
|
||||
jac_pos, jac_rot = jac, None
|
||||
err_pos, err_rot = err, None
|
||||
elif desired_quat is not None:
|
||||
jac_pos, jac_rot = None, jac
|
||||
err_pos, err_rot = None, err
|
||||
else:
|
||||
raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics")
|
||||
|
||||
while True:
|
||||
# forward kinematics
|
||||
mujoco.mj_fwdPosition(model, data)
|
||||
|
||||
x_pos = data.body(name).xpos
|
||||
x_quat = data.body(name).xquat
|
||||
|
||||
error_norm = 0
|
||||
if desired_pos is not None:
|
||||
err_pos[:] = desired_pos - x_pos
|
||||
error_norm += np.linalg.norm(err_pos)
|
||||
|
||||
if desired_quat is not None:
|
||||
mujoco.mju_negQuat(neg_x_quat, x_quat)
|
||||
mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat)
|
||||
mujoco.mju_quat2Vel(err_rot, error_x_quat, 1)
|
||||
error_norm += np.linalg.norm(err_rot) * rot_weight
|
||||
|
||||
if error_norm < eps:
|
||||
success = True
|
||||
break
|
||||
if i >= IT_MAX:
|
||||
success = False
|
||||
break
|
||||
|
||||
mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
|
||||
|
||||
hess_approx = jac.T.dot(jac)
|
||||
joint_delta = jac.T.dot(err)
|
||||
|
||||
hess_approx += np.eye(hess_approx.shape[0]) * damp
|
||||
update_joints = np.linalg.solve(hess_approx, joint_delta)
|
||||
|
||||
update_norm = np.linalg.norm(update_joints)
|
||||
|
||||
# Check whether we are still making enough progress, and halt if not.
|
||||
progress_criterion = error_norm / update_norm
|
||||
if progress_criterion > progress_thresh:
|
||||
success = False
|
||||
break
|
||||
|
||||
if update_norm > max_update_norm:
|
||||
update_joints *= max_update_norm / update_norm
|
||||
|
||||
mujoco.mj_integratePos(model, data.qpos, update_joints, 1)
|
||||
data.qpos = np.clip(data.qpos, lower_limit, upper_limit)
|
||||
i += 1
|
||||
q_cur = data.qpos.copy()
|
||||
|
||||
return success, q_cur
|
65
fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
Normal file
65
fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
Normal file
@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def robot_to_world(base_frame, translation, rotation=None):
|
||||
"""
|
||||
Transform position or rotation optional from the robot base frame to the world frame
|
||||
|
||||
Args
|
||||
----
|
||||
base_frame: numpy.ndarray, (4,4)
|
||||
The transformation matrix from the world to robot base frame
|
||||
translation: ndarray, (3,)
|
||||
The 3D position to be transformed
|
||||
rotation: optional, ndarray, (3, 3)
|
||||
The rotation in the matrix form to be transformed
|
||||
|
||||
Returns
|
||||
-------
|
||||
position: ndarray (3,)
|
||||
The transformed 3D position
|
||||
rotation: ndarray (3, 3)
|
||||
The transformed rotation in the matrix form
|
||||
|
||||
"""
|
||||
|
||||
target = np.eye(4)
|
||||
target[:len(translation), 3] = translation
|
||||
if rotation is not None:
|
||||
target[:3, :3] = rotation
|
||||
|
||||
target_frame = base_frame @ target
|
||||
|
||||
return target_frame[:len(translation), 3], target_frame[:3, :3]
|
||||
|
||||
|
||||
def world_to_robot(base_frame, translation, rotation=None):
|
||||
"""
|
||||
Transfrom position and rotation (optional) from the world frame to the robot's base frame
|
||||
|
||||
Args
|
||||
----
|
||||
base_frame: ndarray, (4,4)
|
||||
The transformation matrix from the world to robot base frame
|
||||
translation: ndarray, (3,)
|
||||
The 3D position to be transformed
|
||||
rotation: optional, ndarray, (3, 3)
|
||||
The rotation in the matrix form to be tranformed
|
||||
|
||||
Returns
|
||||
-------
|
||||
position: ndarray, (3,)
|
||||
The transformed 3D position
|
||||
rotation: ndarray, (3, 3)
|
||||
The transformed rotation in the matrix form
|
||||
|
||||
"""
|
||||
|
||||
target = np.eye(4)
|
||||
target[:len(translation), 3] = translation
|
||||
if rotation is not None:
|
||||
target[:3, :3] = rotation
|
||||
|
||||
target_frame = np.linalg.inv(base_frame) @ target
|
||||
|
||||
return target_frame[:len(translation), 3], target_frame[:3, :3]
|
120
fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
Normal file
120
fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
Normal file
@ -0,0 +1,120 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics
|
||||
|
||||
|
||||
class UniversalJointPlugin:
|
||||
def __init__(self, env_model, env_data, env_info):
|
||||
self.env_info = env_info
|
||||
self.env_model = env_model
|
||||
self.env_data = env_data
|
||||
self.Kp = 20
|
||||
self.Kd = 0.31
|
||||
|
||||
self.universal_joint_ids = []
|
||||
self.universal_joint_ctrl_ids = []
|
||||
self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id,
|
||||
env_model.joint("iiwa_1/striker_joint_2").id]
|
||||
self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id,
|
||||
env_model.actuator("iiwa_1/striker_joint_2").id]
|
||||
action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
|
||||
"iiwa_1/joint_6", "iiwa_1/joint_7"]
|
||||
|
||||
if self.env_info['n_agents'] >= 2:
|
||||
self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id,
|
||||
env_model.joint("iiwa_2/striker_joint_2").id]
|
||||
self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id,
|
||||
env_model.actuator("iiwa_2/striker_joint_2").id]
|
||||
action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5",
|
||||
"iiwa_2/joint_6", "iiwa_2/joint_7"]
|
||||
|
||||
self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec]
|
||||
|
||||
self.filter_ratio = 0.273
|
||||
self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents'])
|
||||
self.u_joint_pos_prev = None
|
||||
self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents'])
|
||||
|
||||
def reset(self):
|
||||
self.u_joint_pos_prev = None
|
||||
self._control_universal_joint()
|
||||
for i in range(self.env_info['n_agents']):
|
||||
self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids]
|
||||
|
||||
self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des
|
||||
|
||||
def update(self):
|
||||
self._control_universal_joint()
|
||||
|
||||
def _control_universal_joint(self):
|
||||
self._compute_universal_joint()
|
||||
self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids]
|
||||
self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \
|
||||
(1 - self.filter_ratio) * self.u_joint_vel_prev
|
||||
|
||||
Kp = 4
|
||||
Kd = 0.31
|
||||
torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev
|
||||
self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque
|
||||
|
||||
def _compute_universal_joint(self):
|
||||
for i in range(self.env_info['n_agents']):
|
||||
q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]]
|
||||
# Have to exclude the puck joints
|
||||
pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'],
|
||||
self.env_info['robot']['robot_data'], q)
|
||||
|
||||
v_x = rot_mat[:, 0]
|
||||
v_y = rot_mat[:, 1]
|
||||
|
||||
# The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T
|
||||
# and the current y-axis. (0, 0, 1).T x v_y
|
||||
x_desired = np.array([-v_y[1], v_y[0], 0])
|
||||
|
||||
# Find the signed angle from the current to the desired x-axis around the y-axis
|
||||
# https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane
|
||||
q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired)
|
||||
if self.u_joint_pos_prev is not None:
|
||||
if q1 - self.u_joint_pos_prev[0] > np.pi:
|
||||
q1 -= np.pi * 2
|
||||
elif q1 - self.u_joint_pos_prev[0] < -np.pi:
|
||||
q1 += np.pi * 2
|
||||
|
||||
# Rotate the X axis by the calculated amount
|
||||
w = np.array([[0, -v_y[2], v_y[1]],
|
||||
[v_y[2], 0, -v_y[0]],
|
||||
[-v_y[1], v_y[0], 0]])
|
||||
|
||||
r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1))
|
||||
v_x_rotated = r @ v_x
|
||||
|
||||
# The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current
|
||||
# x-axis, which is already rotated around y. The negative is there because the x-axis is flipped.
|
||||
# -((0, 0, 1).T x v_x))
|
||||
y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0])
|
||||
|
||||
# Find the signed angle from the current to the desired y-axis around the new rotated x-axis
|
||||
q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired)
|
||||
|
||||
if self.u_joint_pos_prev is not None:
|
||||
if q2 - self.u_joint_pos_prev[1] > np.pi:
|
||||
q2 -= np.pi * 2
|
||||
elif q2 - self.u_joint_pos_prev[1] < -np.pi:
|
||||
q2 += np.pi * 2
|
||||
|
||||
alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
|
||||
alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
|
||||
|
||||
if self.u_joint_pos_prev is None:
|
||||
self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
|
||||
else:
|
||||
self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum(
|
||||
10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]),
|
||||
-np.pi * 0.01), np.pi * 0.01)
|
||||
|
||||
self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
|
||||
|
||||
return self.u_joint_pos_des
|
||||
|
||||
def _cross_3d(self, a, b):
|
||||
return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]])
|
@ -6,6 +6,7 @@ from gymnasium.envs.mujoco import MujocoEnv
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import desired_rod_quat
|
||||
from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import calculate_jerk_profile, calculate_mean_squared_jerk, calculate_dimensionless_jerk, calculate_maximum_jerk
|
||||
|
||||
import mujoco
|
||||
|
||||
@ -110,6 +111,26 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
|
||||
|
||||
return obs, reward, terminated, truncated, infos
|
||||
|
||||
def calculate_smoothness_metrics(self, velocity_profile, dt):
|
||||
"""
|
||||
Calculates the smoothness metrics for the given velocity profile.
|
||||
param velocity_profile: np.array
|
||||
The array containing the movement velocity profile.
|
||||
param dt: float
|
||||
The sampling time interval of the data.
|
||||
return mean_squared_jerk: float
|
||||
The mean squared jerk estimate of the given movement's smoothness.
|
||||
return maximum_jerk: float
|
||||
The maximum jerk estimate of the given movement's smoothness.
|
||||
return dimensionless_jerk: float
|
||||
The dimensionless jerk estimate of the given movement's smoothness.
|
||||
"""
|
||||
jerk_profile = calculate_jerk_profile(velocity_profile, dt)
|
||||
mean_squared_jerk = calculate_mean_squared_jerk(jerk_profile)
|
||||
maximum_jerk = calculate_maximum_jerk(jerk_profile)
|
||||
dimensionless_jerk = calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt)
|
||||
return mean_squared_jerk, maximum_jerk, dimensionless_jerk
|
||||
|
||||
def reset_model(self):
|
||||
# rest box to initial position
|
||||
self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing)
|
||||
|
@ -51,3 +51,19 @@ def rot_to_quat(theta, axis):
|
||||
quant[0] = np.sin(theta / 2.)
|
||||
quant[1:] = np.cos(theta / 2.) * axis
|
||||
return quant
|
||||
|
||||
def calculate_jerk_profile(velocity_profile, dt):
|
||||
jerk = np.diff(velocity_profile, 2, 0) / pow(dt, 2)
|
||||
return jerk
|
||||
|
||||
def calculate_mean_squared_jerk(jerk_profile):
|
||||
return np.mean(pow(jerk_profile, 2))
|
||||
|
||||
def calculate_maximum_jerk(jerk_profile):
|
||||
return np.max(abs(jerk_profile))
|
||||
|
||||
def calculate_dimensionless_jerk(jerk_profile, velocity_profile, dt):
|
||||
sum_squared_jerk = np.sum(pow(jerk_profile, 2), 0)
|
||||
duration = len(velocity_profile) * dt
|
||||
peak_velocity = np.max(abs(velocity_profile), 0)
|
||||
return np.mean(sum_squared_jerk * pow(duration, 3) / pow(peak_velocity, 2))
|
@ -262,76 +262,100 @@ class HopperJumpEnv(HopperEnvCustomXML):
|
||||
return True
|
||||
return False
|
||||
|
||||
# # TODO is that needed? if so test it
|
||||
# class HopperJumpStepEnv(HopperJumpEnv):
|
||||
#
|
||||
# def __init__(self,
|
||||
# xml_file='hopper_jump.xml',
|
||||
# forward_reward_weight=1.0,
|
||||
# ctrl_cost_weight=1e-3,
|
||||
# healthy_reward=1.0,
|
||||
# height_weight=3,
|
||||
# dist_weight=3,
|
||||
# terminate_when_unhealthy=False,
|
||||
# healthy_state_range=(-100.0, 100.0),
|
||||
# healthy_z_range=(0.5, float('inf')),
|
||||
# healthy_angle_range=(-float('inf'), float('inf')),
|
||||
# reset_noise_scale=5e-3,
|
||||
# exclude_current_positions_from_observation=False
|
||||
# ):
|
||||
#
|
||||
# self._height_weight = height_weight
|
||||
# self._dist_weight = dist_weight
|
||||
# super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||
# healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||
# exclude_current_positions_from_observation)
|
||||
#
|
||||
# def step(self, action):
|
||||
# self._steps += 1
|
||||
#
|
||||
# self.do_simulation(action, self.frame_skip)
|
||||
#
|
||||
# height_after = self.get_body_com("torso")[2]
|
||||
# site_pos_after = self.data.site('foot_site').xpos.copy()
|
||||
# self.max_height = max(height_after, self.max_height)
|
||||
#
|
||||
# ctrl_cost = self.control_cost(action)
|
||||
# healthy_reward = self.healthy_reward
|
||||
# height_reward = self._height_weight * height_after
|
||||
# goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
|
||||
# goal_dist_reward = -self._dist_weight * goal_dist
|
||||
# dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
|
||||
#
|
||||
# rewards = dist_reward + healthy_reward
|
||||
# costs = ctrl_cost
|
||||
# done = False
|
||||
#
|
||||
# # This is only for logging the distance to goal when first having the contact
|
||||
# has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
||||
#
|
||||
# if not self.init_floor_contact:
|
||||
# self.init_floor_contact = has_floor_contact
|
||||
# if self.init_floor_contact and not self.has_left_floor:
|
||||
# self.has_left_floor = not has_floor_contact
|
||||
# if not self.contact_with_floor and self.has_left_floor:
|
||||
# self.contact_with_floor = has_floor_contact
|
||||
#
|
||||
# if self.contact_dist is None and self.contact_with_floor:
|
||||
# self.contact_dist = goal_dist
|
||||
#
|
||||
# ##############################################################
|
||||
#
|
||||
# observation = self._get_obs()
|
||||
# reward = rewards - costs
|
||||
# info = {
|
||||
# 'height': height_after,
|
||||
# 'x_pos': site_pos_after,
|
||||
# 'max_height': copy.copy(self.max_height),
|
||||
# 'goal': copy.copy(self.goal),
|
||||
# 'goal_dist': goal_dist,
|
||||
# 'height_rew': height_reward,
|
||||
# 'healthy_reward': healthy_reward,
|
||||
# 'healthy': copy.copy(self.is_healthy),
|
||||
# 'contact_dist': copy.copy(self.contact_dist) or 0
|
||||
# }
|
||||
# return observation, reward, done, info
|
||||
class HopperJumpMarkovRew(HopperJumpEnv):
|
||||
def step(self, action):
|
||||
self._steps += 1
|
||||
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
|
||||
height_after = self.get_body_com("torso")[2]
|
||||
# site_pos_after = self.data.get_site_xpos('foot_site')
|
||||
site_pos_after = self.data.site('foot_site').xpos
|
||||
self.max_height = max(height_after, self.max_height)
|
||||
|
||||
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
||||
|
||||
if not self.init_floor_contact:
|
||||
self.init_floor_contact = has_floor_contact
|
||||
if self.init_floor_contact and not self.has_left_floor:
|
||||
self.has_left_floor = not has_floor_contact
|
||||
if not self.contact_with_floor and self.has_left_floor:
|
||||
self.contact_with_floor = has_floor_contact
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
costs = ctrl_cost
|
||||
terminated = False
|
||||
truncated = False
|
||||
|
||||
goal_dist = np.linalg.norm(site_pos_after - self.goal)
|
||||
if self.contact_dist is None and self.contact_with_floor:
|
||||
self.contact_dist = goal_dist
|
||||
|
||||
rewards = 0
|
||||
if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP):
|
||||
healthy_reward = self.healthy_reward
|
||||
distance_reward = -goal_dist * self._dist_weight
|
||||
height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
|
||||
contact_reward = -(self.contact_dist or 5) * self._contact_weight
|
||||
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
|
||||
|
||||
observation = self._get_obs()
|
||||
|
||||
# While loop to simulate the process after jump to make the task Markovian
|
||||
if self.sparse and self.has_left_floor:
|
||||
while self._steps < MAX_EPISODE_STEPS_HOPPERJUMP:
|
||||
# Simulate to the end of the episode
|
||||
self._steps += 1
|
||||
|
||||
try:
|
||||
self.do_simulation(np.zeros_like(action), self.frame_skip)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
height_after = self.get_body_com("torso")[2]
|
||||
#site_pos_after = self.data.get_site_xpos('foot_site')
|
||||
site_pos_after = self.data.site('foot_site').xpos
|
||||
self.max_height = max(height_after, self.max_height)
|
||||
|
||||
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
||||
|
||||
if not self.init_floor_contact:
|
||||
self.init_floor_contact = has_floor_contact
|
||||
if self.init_floor_contact and not self.has_left_floor:
|
||||
self.has_left_floor = not has_floor_contact
|
||||
if not self.contact_with_floor and self.has_left_floor:
|
||||
self.contact_with_floor = has_floor_contact
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
costs = ctrl_cost
|
||||
done = False
|
||||
|
||||
goal_dist = np.linalg.norm(site_pos_after - self.goal)
|
||||
if self.contact_dist is None and self.contact_with_floor:
|
||||
self.contact_dist = goal_dist
|
||||
|
||||
rewards = 0
|
||||
|
||||
# Task has reached the end, compute the sparse reward
|
||||
done = True
|
||||
healthy_reward = self.healthy_reward
|
||||
distance_reward = -goal_dist * self._dist_weight
|
||||
height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
|
||||
contact_reward = -(self.contact_dist or 5) * self._contact_weight
|
||||
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
|
||||
|
||||
reward = rewards - costs
|
||||
info = dict(
|
||||
height=height_after,
|
||||
x_pos=site_pos_after,
|
||||
max_height=self.max_height,
|
||||
goal=self.goal[:1],
|
||||
goal_dist=goal_dist,
|
||||
height_rew=self.max_height,
|
||||
healthy_reward=self.healthy_reward,
|
||||
healthy=self.is_healthy,
|
||||
contact_dist=self.contact_dist or 0,
|
||||
num_steps=self._steps,
|
||||
has_left_floor=self.has_left_floor
|
||||
)
|
||||
return observation, reward, terminated, truncated, info
|
||||
|
@ -5,11 +5,12 @@ from gymnasium import utils, spaces
|
||||
from gymnasium.envs.mujoco import MujocoEnv
|
||||
|
||||
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force
|
||||
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high
|
||||
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, jnt_vel_low, jnt_vel_high
|
||||
|
||||
import mujoco
|
||||
|
||||
MAX_EPISODE_STEPS_TABLE_TENNIS = 350
|
||||
MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER = 300
|
||||
|
||||
CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]])
|
||||
CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65],
|
||||
@ -18,6 +19,9 @@ CONTEXT_BOUNDS_SWICHING = np.array([[-1.0, -0.65, -1.0, 0.],
|
||||
[-0.2, 0.65, -0.2, 0.65]])
|
||||
|
||||
|
||||
DEFAULT_ROBOT_INIT_POS = np.array([0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 1.5])
|
||||
DEFAULT_ROBOT_INIT_VEL = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
"""
|
||||
7 DoF table tennis environment
|
||||
@ -34,7 +38,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
|
||||
goal_switching_step: int = None,
|
||||
enable_artificial_wind: bool = False, **kwargs):
|
||||
enable_artificial_wind: bool = False,
|
||||
random_pos_scale: float = 0.0,
|
||||
random_vel_scale: float = 0.0,
|
||||
):
|
||||
utils.EzPickle.__init__(**locals())
|
||||
self._steps = 0
|
||||
|
||||
@ -48,6 +55,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
self._id_set = False
|
||||
|
||||
# initial robot state
|
||||
self._random_pos_scale = random_pos_scale
|
||||
self._random_vel_scale = random_vel_scale
|
||||
|
||||
# reward calculation
|
||||
self.ball_landing_pos = None
|
||||
self._goal_pos = np.zeros(2)
|
||||
@ -156,7 +167,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
"num_steps": self._steps,
|
||||
}
|
||||
|
||||
terminated, truncated = self._terminated, False
|
||||
terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS
|
||||
|
||||
return self._get_obs(), reward, terminated, truncated, info
|
||||
|
||||
@ -183,8 +194,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
|
||||
|
||||
self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5])
|
||||
self.data.qvel[:7] = np.zeros(7)
|
||||
robot_init_pos, robot_init_vel = self.get_initial_robot_state()
|
||||
|
||||
self.data.qpos[:7] = robot_init_pos
|
||||
self.data.qvel[:7] = robot_init_vel
|
||||
|
||||
mujoco.mj_forward(self.model, self.data)
|
||||
|
||||
@ -257,7 +270,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs, tau_bound, delay_bound):
|
||||
obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj
|
||||
penalty = self._get_traj_invalid_penalty(action, pos_traj, tau_bound, delay_bound)
|
||||
return obs, penalty, True, False, {
|
||||
return obs, penalty, False, True, {
|
||||
"hit_ball": [False],
|
||||
"ball_returned_success": [False],
|
||||
"land_dist_error": [10.],
|
||||
@ -274,6 +287,179 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
return False, pos_traj, vel_traj
|
||||
return True, pos_traj, vel_traj
|
||||
|
||||
class TableTennisMarkovian(TableTennisEnv):
|
||||
def _get_reward2(self, hit_now, land_now):
|
||||
|
||||
# Phase 1 not hit ball
|
||||
if not self._hit_ball:
|
||||
# Not hit ball
|
||||
min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1))
|
||||
return 0.005 * (1 - np.tanh(min_r_b_dist**2))
|
||||
|
||||
# Phase 2 hit ball now
|
||||
elif self._hit_ball and hit_now:
|
||||
return 2
|
||||
|
||||
# Phase 3 hit ball already and not land yet
|
||||
elif self._hit_ball and self._ball_landing_pos is None:
|
||||
min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1))
|
||||
return 0.02 * (1 - np.tanh(min_b_des_b_dist**2))
|
||||
|
||||
# Phase 4 hit ball already and land now
|
||||
elif self._hit_ball and land_now:
|
||||
over_net_bonus = int(self._ball_landing_pos[0] < 0)
|
||||
min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2])
|
||||
return 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
|
||||
|
||||
# Phase 5 hit ball already and land already
|
||||
elif self._hit_ball and not land_now and self._ball_landing_pos is not None:
|
||||
return 0
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
def _get_reward(self, terminated):
|
||||
# if not terminated:
|
||||
# return 0
|
||||
|
||||
min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1))
|
||||
if not self._hit_ball:
|
||||
# Not hit ball
|
||||
return 0.2 * (1 - np.tanh(min_r_b_dist**2))
|
||||
elif self._ball_landing_pos is None:
|
||||
# Hit ball but not landing pos
|
||||
min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1))
|
||||
return 2 + (1 - np.tanh(min_b_des_b_dist**2))
|
||||
else:
|
||||
# Hit ball and land
|
||||
min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2])
|
||||
over_net_bonus = int(self._ball_landing_pos[0] < 0)
|
||||
return 2 + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
|
||||
|
||||
|
||||
def _get_traj_invalid_penalty(self, action, pos_traj, tau_bound, delay_bound):
|
||||
tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]]))
|
||||
delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]]))
|
||||
violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0))
|
||||
violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0))
|
||||
invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \
|
||||
violate_high_bound_error + violate_low_bound_error
|
||||
return -invalid_penalty
|
||||
|
||||
def get_invalid_traj_step_penalty(self, pos_traj):
|
||||
violate_high_bound_error = (
|
||||
np.maximum(pos_traj - jnt_pos_high, 0).mean())
|
||||
violate_low_bound_error = (
|
||||
np.maximum(jnt_pos_low - pos_traj, 0).mean())
|
||||
invalid_penalty = violate_high_bound_error + violate_low_bound_error
|
||||
|
||||
|
||||
def _update_game_state(self, action):
|
||||
for _ in range(self.frame_skip):
|
||||
if self._enable_artificial_wind:
|
||||
self.data.qfrc_applied[-2] = self._artificial_force
|
||||
try:
|
||||
self.do_simulation(action, 1)
|
||||
except Exception as e:
|
||||
print("Simulation get unstable return with MujocoException: ", e)
|
||||
unstable_simulation = True
|
||||
self._terminated = True
|
||||
break
|
||||
|
||||
# Update game state
|
||||
if not self._terminated:
|
||||
if not self._hit_ball:
|
||||
self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \
|
||||
self._contact_checker(self._ball_contact_id, self._bat_back_id)
|
||||
if not self._hit_ball:
|
||||
ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id)
|
||||
if ball_land_on_floor_no_hit:
|
||||
self._ball_landing_pos = self.data.body("target_ball").xpos.copy()
|
||||
self._terminated = True
|
||||
if self._hit_ball and not self._ball_contact_after_hit:
|
||||
if self._contact_checker(self._ball_contact_id, self._floor_contact_id): # first check contact with floor
|
||||
self._ball_contact_after_hit = True
|
||||
self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
|
||||
self._terminated = True
|
||||
elif self._contact_checker(self._ball_contact_id, self._table_contact_id): # second check contact with table
|
||||
self._ball_contact_after_hit = True
|
||||
self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
|
||||
if self._ball_landing_pos[0] < 0.: # ball lands on the opponent side
|
||||
self._ball_return_success = True
|
||||
self._terminated = True
|
||||
|
||||
# update ball trajectory & racket trajectory
|
||||
self._ball_traj.append(self.data.body("target_ball").xpos.copy())
|
||||
self._racket_traj.append(self.data.geom("bat").xpos.copy())
|
||||
|
||||
def ball_racket_contact(self):
|
||||
return self._contact_checker(self._ball_contact_id, self._bat_front_id) or \
|
||||
self._contact_checker(self._ball_contact_id, self._bat_back_id)
|
||||
|
||||
def step(self, action):
|
||||
if not self._id_set:
|
||||
self._set_ids()
|
||||
|
||||
unstable_simulation = False
|
||||
hit_already = self._hit_ball
|
||||
if self._steps == self._goal_switching_step and self.np_random.uniform() < 0.5:
|
||||
new_goal_pos = self._generate_goal_pos(random=True)
|
||||
new_goal_pos[1] = -new_goal_pos[1]
|
||||
self._goal_pos = new_goal_pos
|
||||
self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
|
||||
mujoco.mj_forward(self.model, self.data)
|
||||
|
||||
self._update_game_state(action)
|
||||
self._steps += 1
|
||||
|
||||
obs = self._get_obs()
|
||||
|
||||
# Compute reward
|
||||
if unstable_simulation:
|
||||
reward = -25
|
||||
else:
|
||||
# reward = self._get_reward(self._terminated)
|
||||
# hit_now = not hit_already and self._hit_ball
|
||||
hit_finish = self._hit_ball and not self.ball_racket_contact()
|
||||
|
||||
if hit_finish:
|
||||
# Clean the ball and racket traj before hit
|
||||
self._ball_traj = []
|
||||
self._racket_traj = []
|
||||
|
||||
# Simulate the rest of the traj
|
||||
reward = self._get_reward2(True, False)
|
||||
while self._steps < MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER:
|
||||
land_already = self._ball_landing_pos is not None
|
||||
self._update_game_state(np.zeros_like(action))
|
||||
self._steps += 1
|
||||
|
||||
land_now = (not land_already
|
||||
and self._ball_landing_pos is not None)
|
||||
temp_reward = self._get_reward2(False, land_now)
|
||||
# print(temp_reward)
|
||||
reward += temp_reward
|
||||
|
||||
# Uncomment the line below to visualize the sim after hit
|
||||
# self.render(mode="human")
|
||||
else:
|
||||
reward = self._get_reward2(False, False)
|
||||
|
||||
# Update ball landing error
|
||||
land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \
|
||||
if self._ball_landing_pos is not None else 10.
|
||||
|
||||
info = {
|
||||
"hit_ball": self._hit_ball,
|
||||
"ball_returned_success": self._ball_return_success,
|
||||
"land_dist_error": land_dist_err,
|
||||
"is_success": self._ball_return_success and land_dist_err < 0.2,
|
||||
"num_steps": self._steps,
|
||||
}
|
||||
|
||||
terminated, truncated = self._terminated, self._steps == MAX_EPISODE_STEPS_TABLE_TENNIS_MARKOV_VER
|
||||
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
class TableTennisWind(TableTennisEnv):
|
||||
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, **kwargs):
|
||||
@ -296,7 +482,16 @@ class TableTennisWind(TableTennisEnv):
|
||||
])
|
||||
return obs
|
||||
|
||||
|
||||
class TableTennisGoalSwitching(TableTennisEnv):
|
||||
def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99, **kwargs):
|
||||
super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step, **kwargs)
|
||||
|
||||
|
||||
class TableTennisRandomInit(TableTennisEnv):
|
||||
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
|
||||
random_pos_scale: float = 1.0,
|
||||
random_vel_scale: float = 0.0):
|
||||
super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip,
|
||||
random_pos_scale=random_pos_scale,
|
||||
random_vel_scale=random_vel_scale)
|
||||
|
||||
|
@ -2,8 +2,9 @@ import numpy as np
|
||||
|
||||
jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
|
||||
jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
|
||||
delay_bound = [0.05, 0.15]
|
||||
tau_bound = [0.5, 1.5]
|
||||
|
||||
jnt_vel_low = np.ones(7) * -7
|
||||
jnt_vel_high = np.ones(7) * 7
|
||||
|
||||
net_height = 0.1
|
||||
table_height = 0.77
|
||||
|
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "fancy_gym"
|
||||
version = "0.1.0"
|
||||
version = "0.1.4"
|
||||
description = "Fancy Gym: Unifying interface for various RL benchmarks with support for Black Box approaches."
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
|
1
setup.py
1
setup.py
@ -14,6 +14,7 @@ extras = {
|
||||
'box2d': ['gymnasium[box2d]>=0.26.0'],
|
||||
'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'],
|
||||
'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"],
|
||||
'mushroom-rl': ['mushroom-rl'],
|
||||
}
|
||||
|
||||
# All dependencies
|
||||
|
@ -41,7 +41,7 @@ class ToyEnv(gym.Env):
|
||||
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
def render(self, mode="human"):
|
||||
def render(self):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -33,7 +33,7 @@ class ToyEnv(gym.Env):
|
||||
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
def render(self, mode="human"):
|
||||
def render(self):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@ class ToyEnv(gym.Env):
|
||||
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
def render(self, mode="human"):
|
||||
def render(self):
|
||||
pass
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user