Merge branch '81_fix_docs_mp' into pypi-package
This commit is contained in:
commit
13e6a3b53a
@ -75,7 +75,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
self.observation_space = self._get_observation_space()
|
||||
|
||||
# rendering
|
||||
self.render_kwargs = {}
|
||||
self.do_render = False
|
||||
self.verbose = verbose
|
||||
|
||||
# condition value
|
||||
@ -164,7 +164,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
dtype=self.env.observation_space.dtype)
|
||||
|
||||
infos = dict()
|
||||
done = False
|
||||
terminated, truncated = False, False
|
||||
|
||||
if not traj_is_valid:
|
||||
obs, trajectory_return, terminated, truncated, infos = self.env.invalid_traj_callback(action, position, velocity,
|
||||
@ -190,8 +190,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
elems[t] = v
|
||||
infos[k] = elems
|
||||
|
||||
if self.render_kwargs:
|
||||
self.env.render(**self.render_kwargs)
|
||||
if self.do_render:
|
||||
self.env.render()
|
||||
|
||||
|
||||
if terminated or truncated or (self.replanning_schedule(self.env.get_wrapper_attr('current_pos'), self.env.get_wrapper_attr('current_vel'), obs, c_action, t + 1 + self.current_traj_steps) and self.plan_steps < self.max_planning_times):
|
||||
|
||||
@ -215,10 +216,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
trajectory_return = self.reward_aggregation(rewards[:t + 1])
|
||||
return self.observation(obs), trajectory_return, terminated, truncated, infos
|
||||
|
||||
def render(self, **kwargs):
|
||||
"""Only set render options here, such that they can be used during the rollout.
|
||||
This only needs to be called once"""
|
||||
self.render_kwargs = kwargs
|
||||
def render(self):
|
||||
self.do_render = True
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \
|
||||
-> Tuple[ObsType, Dict[str, Any]]:
|
||||
|
@ -289,3 +289,25 @@ register(
|
||||
'goal_switching_step': 99
|
||||
}
|
||||
)
|
||||
|
||||
# Air Hockey environments
|
||||
for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]:
|
||||
register(
|
||||
id=f'fancy/AirHockey-{env_mode}-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
|
||||
max_episode_steps=500,
|
||||
add_mp_types=[],
|
||||
kwargs={
|
||||
'env_mode': env_mode
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id=f'fancy/AirHockey-tournament-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:AirHockeyEnv',
|
||||
max_episode_steps=15000,
|
||||
add_mp_types=[],
|
||||
kwargs={
|
||||
'env_mode': 'tournament'
|
||||
}
|
||||
)
|
||||
|
@ -14,12 +14,14 @@ class BaseReacherEnv(gym.Env):
|
||||
Base class for all reaching environments.
|
||||
"""
|
||||
|
||||
def __init__(self, n_links: int, random_start: bool = True, allow_self_collision: bool = False):
|
||||
def __init__(self, n_links: int, random_start: bool = True, allow_self_collision: bool = False, render_mode: str = None):
|
||||
super().__init__()
|
||||
self.link_lengths = np.ones(n_links)
|
||||
self.n_links = n_links
|
||||
self._dt = 0.01
|
||||
|
||||
self.render_mode = render_mode
|
||||
|
||||
self.random_start = random_start
|
||||
|
||||
self.allow_self_collision = allow_self_collision
|
||||
|
@ -10,8 +10,8 @@ class BaseReacherDirectEnv(BaseReacherEnv):
|
||||
"""
|
||||
|
||||
def __init__(self, n_links: int, random_start: bool = True,
|
||||
allow_self_collision: bool = False):
|
||||
super().__init__(n_links, random_start, allow_self_collision)
|
||||
allow_self_collision: bool = False, **kwargs):
|
||||
super().__init__(n_links, random_start, allow_self_collision, **kwargs)
|
||||
|
||||
self.max_vel = 2 * np.pi
|
||||
action_bound = np.ones((self.n_links,)) * self.max_vel
|
||||
|
@ -10,8 +10,8 @@ class BaseReacherTorqueEnv(BaseReacherEnv):
|
||||
"""
|
||||
|
||||
def __init__(self, n_links: int, random_start: bool = True,
|
||||
allow_self_collision: bool = False):
|
||||
super().__init__(n_links, random_start, allow_self_collision)
|
||||
allow_self_collision: bool = False, **kwargs):
|
||||
super().__init__(n_links, random_start, allow_self_collision, **kwargs)
|
||||
|
||||
self.max_torque = 1000
|
||||
action_bound = np.ones((self.n_links,)) * self.max_torque
|
||||
|
@ -17,9 +17,9 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
|
||||
hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
|
||||
allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"):
|
||||
allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple", **kwargs):
|
||||
|
||||
super().__init__(n_links, random_start, allow_self_collision)
|
||||
super().__init__(n_links, random_start, allow_self_collision, **kwargs)
|
||||
|
||||
# provided initial parameters
|
||||
self.initial_x = hole_x # x-position of center of hole
|
||||
@ -178,7 +178,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
return False
|
||||
|
||||
def render(self, mode='human'):
|
||||
def render(self):
|
||||
if self.fig is None:
|
||||
# Create base figure once on the beginning. Afterwards only update
|
||||
plt.ion()
|
||||
@ -197,7 +197,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
self.fig.gca().set_title(
|
||||
f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}")
|
||||
|
||||
if mode == "human":
|
||||
if self.render_mode == "human":
|
||||
|
||||
# arm
|
||||
self.line.set_data(self._joints[:, 0], self._joints[:, 1])
|
||||
@ -205,7 +205,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
self.fig.canvas.draw()
|
||||
self.fig.canvas.flush_events()
|
||||
|
||||
elif mode == "partial":
|
||||
elif self.render_mode == "partial":
|
||||
if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided:
|
||||
# Arm
|
||||
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k',
|
||||
|
@ -17,8 +17,8 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
|
||||
"""
|
||||
|
||||
def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True,
|
||||
allow_self_collision: bool = False, ):
|
||||
super().__init__(n_links, random_start, allow_self_collision)
|
||||
allow_self_collision: bool = False, **kwargs):
|
||||
super().__init__(n_links, random_start, allow_self_collision, **kwargs)
|
||||
|
||||
# provided initial parameters
|
||||
self.inital_target = target
|
||||
@ -98,7 +98,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
|
||||
def _check_collisions(self) -> bool:
|
||||
return self._check_self_collision()
|
||||
|
||||
def render(self, mode='human'): # pragma: no cover
|
||||
def render(self): # pragma: no cover
|
||||
if self.fig is None:
|
||||
# Create base figure once on the beginning. Afterwards only update
|
||||
plt.ion()
|
||||
|
@ -13,9 +13,9 @@ from . import MPWrapper
|
||||
class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None,
|
||||
target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):
|
||||
target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000, **kwargs):
|
||||
|
||||
super().__init__(n_links, random_start, allow_self_collision)
|
||||
super().__init__(n_links, random_start, allow_self_collision, **kwargs)
|
||||
|
||||
# provided initial parameters
|
||||
self.intitial_target = target # provided target value
|
||||
@ -123,7 +123,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
def _check_collisions(self) -> bool:
|
||||
return self._check_self_collision()
|
||||
|
||||
def render(self, mode='human'):
|
||||
def render(self):
|
||||
goal_pos = self._goal.T
|
||||
via_pos = self._via_point.T
|
||||
|
||||
@ -146,7 +146,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
self.fig.gca().set_title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}")
|
||||
|
||||
if mode == "human":
|
||||
if self.render_mode == "human":
|
||||
# goal
|
||||
if self._steps == 1:
|
||||
self.goal_point_plot.set_data(goal_pos[0], goal_pos[1])
|
||||
@ -158,7 +158,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
self.fig.canvas.draw()
|
||||
self.fig.canvas.flush_events()
|
||||
|
||||
elif mode == "partial":
|
||||
elif self.render_mode == "partial":
|
||||
if self._steps == 1:
|
||||
# fig, ax = plt.subplots()
|
||||
# Add the patch to the Axes
|
||||
@ -178,7 +178,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
plt.ylim([-1.1, lim])
|
||||
plt.pause(0.01)
|
||||
|
||||
elif mode == "final":
|
||||
elif self.render_mode == "final":
|
||||
if self._steps == 199 or self._is_collided:
|
||||
# fig, ax = plt.subplots()
|
||||
|
||||
|
@ -9,3 +9,8 @@ from .reacher.reacher import ReacherEnv
|
||||
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
|
||||
from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
|
||||
from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching
|
||||
|
||||
try:
|
||||
from .air_hockey.air_hockey_env_wrapper import AirHockeyEnv
|
||||
except ModuleNotFoundError:
|
||||
print("[FANCY GYM] Air Hockey not available (depends on mushroom-rl, dmc, mujoco)")
|
26
fancy_gym/envs/mujoco/air_hockey/LICENSE
Normal file
26
fancy_gym/envs/mujoco/air_hockey/LICENSE
Normal file
@ -0,0 +1,26 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Puze Liu, Jonas Guenster, Davide Tateo.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
This project is a derivative of [AirHockeyChallenge](https://github.com/AirHockeyChallenge/air_hockey_challenge).
|
||||
The changes are mostly focused on adapting the provided environments to fancy_gym.
|
0
fancy_gym/envs/mujoco/air_hockey/__init__.py
Normal file
0
fancy_gym/envs/mujoco/air_hockey/__init__.py
Normal file
180
fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
Normal file
180
fancy_gym/envs/mujoco/air_hockey/air_hockey_env_wrapper.py
Normal file
@ -0,0 +1,180 @@
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
from gymnasium import spaces
|
||||
|
||||
import fancy_gym.envs.mujoco.air_hockey.constraints as constraints
|
||||
from fancy_gym.envs.mujoco.air_hockey import position_control_wrapper as position
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world
|
||||
from mushroom_rl.core import Environment
|
||||
|
||||
class AirHockeyEnv(Environment):
|
||||
metadata = {"render_modes": ["human"], "render_fps": 50}
|
||||
|
||||
def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs):
|
||||
"""
|
||||
Environment Constructor
|
||||
|
||||
Args:
|
||||
env [string]:
|
||||
The string to specify the running environments. Available environments: [3dof-hit, 3dof-defend, 7dof-hit, 7dof-defend, tournament].
|
||||
interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
|
||||
polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
|
||||
Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
|
||||
of position, velocity and acceleration of the shape (20, 3, n_joints)
|
||||
"""
|
||||
|
||||
env_dict = {
|
||||
"tournament": position.IiwaPositionTournament,
|
||||
|
||||
"7dof-hit": position.IiwaPositionHit,
|
||||
"7dof-defend": position.IiwaPositionDefend,
|
||||
|
||||
"3dof-hit": position.PlanarPositionHit,
|
||||
"3dof-defend": position.PlanarPositionDefend
|
||||
}
|
||||
|
||||
if env_mode not in env_dict:
|
||||
raise Exception(f"Please specify one of the environments in {list(env_dict.keys())} for env_mode parameter!")
|
||||
|
||||
if env_mode == "tournament" and type(interpolation_order) != tuple:
|
||||
interpolation_order = (interpolation_order, interpolation_order)
|
||||
|
||||
self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs)
|
||||
self.env_name = env_mode
|
||||
self.env_info = self.base_env.env_info
|
||||
single_robot_obs_size = len(self.base_env.info.observation_space.low)
|
||||
if env_mode == "tournament":
|
||||
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64)
|
||||
else:
|
||||
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64)
|
||||
robot_info = self.env_info["robot"]
|
||||
|
||||
if env_mode != "tournament":
|
||||
if interpolation_order in [1, 2]:
|
||||
self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
||||
if interpolation_order in [3, 4, -1]:
|
||||
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
||||
if interpolation_order in [5]:
|
||||
self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
||||
else:
|
||||
acts = [None, None]
|
||||
for i in range(2):
|
||||
if interpolation_order[i] in [1, 2]:
|
||||
acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1])
|
||||
if interpolation_order[i] in [3, 4, -1]:
|
||||
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]]))
|
||||
if interpolation_order[i] in [5]:
|
||||
acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),
|
||||
high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]]))
|
||||
self.action_space = spaces.Tuple((acts[0], acts[1]))
|
||||
|
||||
constraint_list = constraints.ConstraintList()
|
||||
constraint_list.add(constraints.JointPositionConstraint(self.env_info))
|
||||
constraint_list.add(constraints.JointVelocityConstraint(self.env_info))
|
||||
constraint_list.add(constraints.EndEffectorConstraint(self.env_info))
|
||||
if "7dof" in self.env_name or self.env_name == "tournament":
|
||||
constraint_list.add(constraints.LinkConstraint(self.env_info))
|
||||
|
||||
self.env_info['constraints'] = constraint_list
|
||||
self.env_info['env_name'] = self.env_name
|
||||
|
||||
self.render_mode = render_mode
|
||||
self.render_human_active = False
|
||||
|
||||
super().__init__(self.base_env.info)
|
||||
|
||||
def step(self, action):
|
||||
obs, reward, done, info = self.base_env.step(action)
|
||||
|
||||
if "tournament" in self.env_name:
|
||||
info["constraints_value"] = list()
|
||||
info["jerk"] = list()
|
||||
for i in range(2):
|
||||
obs_agent = obs[i * int(len(obs) / 2): (i + 1) * int(len(obs) / 2)]
|
||||
info["constraints_value"].append(deepcopy(self.env_info['constraints'].fun(
|
||||
obs_agent[self.env_info['joint_pos_ids']], obs_agent[self.env_info['joint_vel_ids']])))
|
||||
info["jerk"].append(
|
||||
self.base_env.jerk[i * self.env_info['robot']['n_joints']:(i + 1) * self.env_info['robot'][
|
||||
'n_joints']])
|
||||
|
||||
info["score"] = self.base_env.score
|
||||
info["faults"] = self.base_env.faults
|
||||
|
||||
else:
|
||||
info["constraints_value"] = deepcopy(self.env_info['constraints'].fun(obs[self.env_info['joint_pos_ids']],
|
||||
obs[self.env_info['joint_vel_ids']]))
|
||||
info["jerk"] = self.base_env.jerk
|
||||
info["success"] = self.check_success(obs)
|
||||
|
||||
if self.env_info['env_name'] == "tournament":
|
||||
obs = np.array(np.split(obs, 2))
|
||||
|
||||
if self.render_human_active:
|
||||
self.base_env.render()
|
||||
|
||||
return obs, reward, done, False, info
|
||||
|
||||
def render(self):
|
||||
self.render_human_active = True
|
||||
|
||||
def reset(self, seed=None, options={}):
|
||||
self.base_env.seed(seed)
|
||||
obs = self.base_env.reset()
|
||||
if self.env_info['env_name'] == "tournament":
|
||||
obs = np.array(np.split(obs, 2))
|
||||
return obs, {}
|
||||
|
||||
def check_success(self, obs):
|
||||
puck_pos, puck_vel = self.base_env.get_puck(obs)
|
||||
|
||||
puck_pos, _ = robot_to_world(self.base_env.env_info["robot"]["base_frame"][0], translation=puck_pos)
|
||||
success = 0
|
||||
|
||||
if "hit" in self.env_name:
|
||||
if puck_pos[0] - self.base_env.env_info['table']['length'] / 2 > 0 and \
|
||||
np.abs(puck_pos[1]) - self.base_env.env_info['table']['goal_width'] / 2 < 0:
|
||||
success = 1
|
||||
|
||||
elif "defend" in self.env_name:
|
||||
if -0.8 < puck_pos[0] <= -0.2 and puck_vel[0] < 0.1:
|
||||
success = 1
|
||||
|
||||
elif "prepare" in self.env_name:
|
||||
if -0.8 < puck_pos[0] <= -0.2 and np.abs(puck_pos[1]) < 0.39105 and puck_vel[0] < 0.1:
|
||||
success = 1
|
||||
return success
|
||||
|
||||
@property
|
||||
def unwrapped(self):
|
||||
return self
|
||||
|
||||
def close(self):
|
||||
self.base_env.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = AirHockeyEnv(env_mode="7dof-hit")
|
||||
env.reset()
|
||||
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.random.uniform(-1, 1, (2, env.env_info['robot']['n_joints'])) * 3
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
1
fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
Normal file
1
fancy_gym/envs/mujoco/air_hockey/constraints/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .constraints import *
|
212
fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
Normal file
212
fancy_gym/envs/mujoco/air_hockey/constraints/constraints.py
Normal file
@ -0,0 +1,212 @@
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics, jacobian
|
||||
|
||||
|
||||
class Constraint:
|
||||
def __init__(self, env_info, output_dim, **kwargs):
|
||||
"""
|
||||
Constructor
|
||||
|
||||
Args
|
||||
----
|
||||
env_info: dict
|
||||
A dictionary contains information about the environment;
|
||||
output_dim: int
|
||||
The output dimension of the constraints.
|
||||
**kwargs: dict
|
||||
A dictionary contains agent related information.
|
||||
"""
|
||||
self._env_info = env_info
|
||||
self._name = None
|
||||
|
||||
self.output_dim = output_dim
|
||||
|
||||
self._fun_value = np.zeros(self.output_dim)
|
||||
self._jac_value = np.zeros((self.output_dim, 2 * env_info["robot"]["n_joints"]))
|
||||
self._q_prev = None
|
||||
self._dq_prev = None
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""
|
||||
The name of the constraints
|
||||
|
||||
"""
|
||||
return self._name
|
||||
|
||||
def fun(self, q, dq):
|
||||
"""
|
||||
The function of the constraint.
|
||||
|
||||
Args
|
||||
----
|
||||
q: numpy.ndarray, (num_joints,)
|
||||
The joint position of the robot
|
||||
dq: numpy.ndarray, (num_joints,)
|
||||
The joint velocity of the robot
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray, (out_dim,):
|
||||
The value computed by the constraints function.
|
||||
"""
|
||||
if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
|
||||
return self._fun_value
|
||||
else:
|
||||
self._jacobian(q, dq)
|
||||
return self._fun(q, dq)
|
||||
|
||||
def jacobian(self, q, dq):
|
||||
"""
|
||||
Jacobian is the derivative of the constraint function w.r.t the robot joint position and velocity.
|
||||
|
||||
Args
|
||||
----
|
||||
q: ndarray, (num_joints,)
|
||||
The joint position of the robot
|
||||
dq: ndarray, (num_joints,)
|
||||
The joint velocity of the robot
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray, (dim_output, num_joints * 2):
|
||||
The flattened jacobian of the constraint function J = [dc / dq, dc / dq_dot]
|
||||
|
||||
"""
|
||||
if np.equal(q, self._q_prev).all() and np.equal(dq, self._dq_prev):
|
||||
return self._fun_value
|
||||
else:
|
||||
self._fun(q, dq)
|
||||
return self._jacobian(q, dq)
|
||||
|
||||
def _fun(self, q, dq):
|
||||
raise NotImplementedError
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ConstraintList:
|
||||
def __init__(self):
|
||||
self.constraints = dict()
|
||||
|
||||
def keys(self):
|
||||
return self.constraints.keys()
|
||||
|
||||
def get(self, key):
|
||||
return self.constraints.get(key)
|
||||
|
||||
def add(self, c):
|
||||
self.constraints.update({c.name: c})
|
||||
|
||||
def delete(self, name):
|
||||
del self.constraints[name]
|
||||
|
||||
def fun(self, q, dq):
|
||||
return {key: self.constraints[key].fun(q, dq) for key in self.constraints}
|
||||
|
||||
def jacobian(self, q, dq):
|
||||
return {key: self.constraints[key].jacobian(q, dq) for key in self.constraints}
|
||||
|
||||
|
||||
class JointPositionConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
|
||||
self.joint_limits = self._env_info['robot']['joint_pos_limit'] * 0.95
|
||||
self._name = 'joint_pos_constr'
|
||||
|
||||
def _fun(self, q, dq):
|
||||
self._fun_value[:int(self.output_dim / 2)] = q - self.joint_limits[1]
|
||||
self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - q
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
self._jac_value[:int(self.output_dim / 2), :int(self.output_dim / 2)] = np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
self._jac_value[int(self.output_dim / 2):, :int(self.output_dim / 2)] = -np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
return self._jac_value
|
||||
|
||||
|
||||
class JointVelocityConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
super().__init__(env_info, output_dim=2 * env_info["robot"]["n_joints"], **kwargs)
|
||||
self.joint_limits = self._env_info['robot']['joint_vel_limit'] * 0.95
|
||||
self._name = 'joint_vel_constr'
|
||||
|
||||
def _fun(self, q, dq):
|
||||
self._fun_value[:int(self.output_dim / 2)] = dq - self.joint_limits[1]
|
||||
self._fun_value[int(self.output_dim / 2):] = self.joint_limits[0] - dq
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
self._jac_value[:int(self.output_dim / 2), int(self.output_dim / 2):] = np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
self._jac_value[int(self.output_dim / 2):, int(self.output_dim / 2):] = -np.eye(
|
||||
self._env_info['robot']['n_joints'])
|
||||
return self._jac_value
|
||||
|
||||
|
||||
class EndEffectorConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
# 1 Dimension on x direction: x > x_lb
|
||||
# 2 Dimension on y direction: y > y_lb, y < y_ub
|
||||
# 2 Dimension on z direction: z > z_lb, z < z_ub
|
||||
super().__init__(env_info, output_dim=5, **kwargs)
|
||||
self._name = "ee_constr"
|
||||
tolerance = 0.02
|
||||
|
||||
self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
|
||||
self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
|
||||
|
||||
self.x_lb = - self._env_info['robot']['base_frame'][0][0, 3] - (
|
||||
self._env_info['table']['length'] / 2 - self._env_info['mallet']['radius'])
|
||||
self.y_lb = - (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
|
||||
self.y_ub = (self._env_info['table']['width'] / 2 - self._env_info['mallet']['radius'])
|
||||
self.z_lb = self._env_info['robot']['ee_desired_height'] - tolerance
|
||||
self.z_ub = self._env_info['robot']['ee_desired_height'] + tolerance
|
||||
|
||||
def _fun(self, q, dq):
|
||||
ee_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q)
|
||||
self._fun_value = np.array([-ee_pos[0] + self.x_lb,
|
||||
-ee_pos[1] + self.y_lb, ee_pos[1] - self.y_ub,
|
||||
-ee_pos[2] + self.z_lb, ee_pos[2] - self.z_ub])
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
jac = jacobian(self.robot_model, self.robot_data, q)
|
||||
dc_dx = np.array([[-1, 0., 0.], [0., -1., 0.], [0., 1., 0.], [0., 0., -1.], [0., 0., 1.]])
|
||||
self._jac_value[:, :self._env_info['robot']['n_joints']] = dc_dx @ jac[:3, :self._env_info['robot']['n_joints']]
|
||||
return self._jac_value
|
||||
|
||||
|
||||
class LinkConstraint(Constraint):
|
||||
def __init__(self, env_info, **kwargs):
|
||||
# 1 Dimension: wrist_z > minimum_height
|
||||
# 2 Dimension: elbow_z > minimum_height
|
||||
super().__init__(env_info, output_dim=2, **kwargs)
|
||||
self._name = "link_constr"
|
||||
|
||||
self.robot_model = copy.deepcopy(self._env_info['robot']['robot_model'])
|
||||
self.robot_data = copy.deepcopy(self._env_info['robot']['robot_data'])
|
||||
|
||||
self.z_lb = 0.25
|
||||
|
||||
def _fun(self, q, dq):
|
||||
wrist_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="7")
|
||||
elbow_pos, _ = forward_kinematics(self.robot_model, self.robot_data, q, link="4")
|
||||
self._fun_value = np.array([-wrist_pos[2] + self.z_lb,
|
||||
-elbow_pos[2] + self.z_lb])
|
||||
return self._fun_value
|
||||
|
||||
def _jacobian(self, q, dq):
|
||||
jac_wrist = jacobian(self.robot_model, self.robot_data, q, link="7")
|
||||
jac_elbow = jacobian(self.robot_model, self.robot_data, q, link="4")
|
||||
self._jac_value[:, :self._env_info['robot']['n_joints']] = np.vstack([
|
||||
-jac_wrist[2, :self._env_info['robot']['n_joints']],
|
||||
-jac_elbow[2, :self._env_info['robot']['n_joints']],
|
||||
])
|
||||
return self._jac_value
|
0
fancy_gym/envs/mujoco/air_hockey/data/__init__.py
Normal file
0
fancy_gym/envs/mujoco/air_hockey/data/__init__.py
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/EE_arm.stl
Normal file
Binary file not shown.
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_0.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_1.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_2.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_3.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_4.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_5.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_6.stl
Normal file
Binary file not shown.
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
Normal file
BIN
fancy_gym/envs/mujoco/air_hockey/data/iiwas/assets/link_7.stl
Normal file
Binary file not shown.
58
fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
Normal file
58
fancy_gym/envs/mujoco/air_hockey/data/iiwas/double.xml
Normal file
@ -0,0 +1,58 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
<include file="iiwa1.xml"/>
|
||||
|
||||
<include file="iiwa2.xml"/>
|
||||
|
||||
<include file="../table.xml"/>
|
||||
|
||||
<contact>
|
||||
<exclude body1="table_surface" body2="iiwa_1/base"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="rim" body2="iiwa_1/base"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="world" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="table_surface" body2="iiwa_2/base"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_1"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_2"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_3"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_4"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_5"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_6"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/link_7"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/striker_joint_link"/>
|
||||
<exclude body1="table_surface" body2="iiwa_2/striker_mallet"/>
|
||||
|
||||
<exclude body1="rim" body2="iiwa_2/base"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_1"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_2"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_3"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_4"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_5"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_6"/>
|
||||
<exclude body1="rim" body2="iiwa_2/link_7"/>
|
||||
<exclude body1="rim" body2="iiwa_2/striker_joint_link"/>
|
||||
<exclude body1="rim" body2="iiwa_2/striker_mallet"/>
|
||||
|
||||
<exclude body1="world" body2="iiwa_2/striker_mallet"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
108
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
Normal file
108
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa1.xml
Normal file
@ -0,0 +1,108 @@
|
||||
<mujoco model="iiwa_1">
|
||||
<compiler angle="radian" autolimits="true" meshdir="assets"/>
|
||||
|
||||
<default>
|
||||
<default class="vis">
|
||||
<geom contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
<default class="robot">
|
||||
<geom condim="4" solref="0.02 0.3" priority="2"/>
|
||||
</default>
|
||||
</default>
|
||||
<asset>
|
||||
<mesh name="link_0" file="link_0.stl"/>
|
||||
<mesh name="link_1" file="link_1.stl"/>
|
||||
<mesh name="link_2" file="link_2.stl"/>
|
||||
<mesh name="link_3" file="link_3.stl"/>
|
||||
<mesh name="link_4" file="link_4.stl"/>
|
||||
<mesh name="link_5" file="link_5.stl"/>
|
||||
<mesh name="link_6" file="link_6.stl"/>
|
||||
<mesh name="link_7" file="link_7.stl"/>
|
||||
<mesh name="EE_arm" file="EE_arm.stl"/>
|
||||
<mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<body name="iiwa_1/base" pos="-1.51 0 -0.1">
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
|
||||
<body name="iiwa_1/link_1" pos="0 0 0.1575">
|
||||
<inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
|
||||
fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
|
||||
<joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
|
||||
frictionloss="0.384477"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
|
||||
<body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
|
||||
fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
|
||||
<joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
|
||||
frictionloss="0.496333"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
|
||||
<body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
|
||||
fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
|
||||
<joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
|
||||
frictionloss="0.173951"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
|
||||
<body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
|
||||
fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
|
||||
<joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.219041" frictionloss="0.3751"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
|
||||
<body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
|
||||
fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
|
||||
<joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
|
||||
damping="0.185923" frictionloss="0.481099"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
|
||||
<body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
|
||||
fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
|
||||
<joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.1" frictionloss="0.196149"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
|
||||
<body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
|
||||
fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
|
||||
<joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
|
||||
damping="0.1" frictionloss="0.299238" armature="0.01"/>
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
|
||||
<geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
|
||||
class="vis"/>
|
||||
<body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
|
||||
<body name="iiwa_1/striker_mallet" pos="0 0 0">
|
||||
<inertial pos="0 0 0.0682827" mass="0.283"
|
||||
diaginertia="0.005 0.005 0.005"/>
|
||||
<joint name="iiwa_1/striker_joint_1" pos="0 0 0" axis="0 1 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<joint name="iiwa_1/striker_joint_2" pos="0 0 0" axis="1 0 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
|
||||
class="vis"/>
|
||||
<geom name="iiwa_1/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
|
||||
size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
|
||||
<actuator>
|
||||
<motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
|
||||
<motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_1/striker_joint_1" joint="iiwa_1/striker_joint_1" ctrlrange="-10 10"/>
|
||||
<motor name="iiwa_1/striker_joint_2" joint="iiwa_1/striker_joint_2" ctrlrange="-10 10"/>
|
||||
</actuator>
|
||||
</mujoco>
|
89
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
Normal file
89
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa2.xml
Normal file
@ -0,0 +1,89 @@
|
||||
<mujoco model="iiwa_2">
|
||||
<compiler angle="radian" autolimits="true" meshdir="assets"/>
|
||||
|
||||
<worldbody>
|
||||
<body name="iiwa_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
|
||||
<body name="iiwa_2/link_1" pos="0 0 0.1575">
|
||||
<inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
|
||||
fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
|
||||
<joint name="iiwa_2/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
|
||||
frictionloss="0.384477"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
|
||||
<body name="iiwa_2/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
|
||||
fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
|
||||
<joint name="iiwa_2/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
|
||||
frictionloss="0.496333"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
|
||||
<body name="iiwa_2/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
|
||||
fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
|
||||
<joint name="iiwa_2/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
|
||||
frictionloss="0.173951"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
|
||||
<body name="iiwa_2/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
|
||||
fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
|
||||
<joint name="iiwa_2/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.219041" frictionloss="0.3751"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
|
||||
<body name="iiwa_2/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
|
||||
fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
|
||||
<joint name="iiwa_2/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
|
||||
damping="0.185923" frictionloss="0.481099"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
|
||||
<body name="iiwa_2/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
|
||||
fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
|
||||
<joint name="iiwa_2/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.1" frictionloss="0.196149"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
|
||||
<body name="iiwa_2/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
|
||||
fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
|
||||
<joint name="iiwa_2/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
|
||||
damping="0.1" frictionloss="0.299238" armature="0.01"/>
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
|
||||
<geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
|
||||
class="vis"/>
|
||||
<body name="iiwa_2/striker_joint_link" pos="0 0 0.585">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
|
||||
<body name="iiwa_2/striker_mallet" pos="0 0 0">
|
||||
<inertial pos="0 0 0.0682827" mass="0.283"
|
||||
diaginertia="0.005 0.005 0.005"/>
|
||||
<joint name="iiwa_2/striker_joint_1" pos="0 0 0" axis="0 1 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<joint name="iiwa_2/striker_joint_2" pos="0 0 0" axis="1 0 0"
|
||||
range="-1.5708 1.5708" damping="0.0"/>
|
||||
<geom type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_mallet_foam"
|
||||
class="vis"/>
|
||||
<geom name="iiwa_2/ee" type="cylinder" rgba="0.3 0.3 0.3 0.1"
|
||||
size="0.04815 0.03" pos="0 0 0.0505" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
|
||||
<actuator>
|
||||
<motor name="iiwa_2/joint_1" joint="iiwa_2/joint_1" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_2/joint_2" joint="iiwa_2/joint_2" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_2/joint_3" joint="iiwa_2/joint_3" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_2/joint_4" joint="iiwa_2/joint_4" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_2/joint_5" joint="iiwa_2/joint_5" ctrlrange="-110 110"/>
|
||||
<motor name="iiwa_2/joint_6" joint="iiwa_2/joint_6" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_2/joint_7" joint="iiwa_2/joint_7" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_2/striker_joint_1" joint="iiwa_2/striker_joint_1" ctrlrange="-10 10"/>
|
||||
<motor name="iiwa_2/striker_joint_2" joint="iiwa_2/striker_joint_2" ctrlrange="-10 10"/>
|
||||
</actuator>
|
||||
|
||||
</mujoco>
|
94
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
Normal file
94
fancy_gym/envs/mujoco/air_hockey/data/iiwas/iiwa_only.xml
Normal file
@ -0,0 +1,94 @@
|
||||
<mujoco model="iiwa_1">
|
||||
<compiler angle="radian" autolimits="true" meshdir="assets"/>
|
||||
|
||||
<default>
|
||||
<default class="vis">
|
||||
<geom contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
<default class="robot">
|
||||
<geom condim="4" solref="0.02 0.3" priority="2"/>
|
||||
</default>
|
||||
</default>
|
||||
<asset>
|
||||
<mesh name="link_0" file="link_0.stl"/>
|
||||
<mesh name="link_1" file="link_1.stl"/>
|
||||
<mesh name="link_2" file="link_2.stl"/>
|
||||
<mesh name="link_3" file="link_3.stl"/>
|
||||
<mesh name="link_4" file="link_4.stl"/>
|
||||
<mesh name="link_5" file="link_5.stl"/>
|
||||
<mesh name="link_6" file="link_6.stl"/>
|
||||
<mesh name="link_7" file="link_7.stl"/>
|
||||
<mesh name="EE_arm" file="EE_arm.stl"/>
|
||||
<mesh name="EE_mallet_foam" file="EE_mallet_foam.stl"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<body name="iiwa_1/base" pos="0.0 0 0.0">
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_0" class="vis"/>
|
||||
<body name="iiwa_1/link_1" pos="0 0 0.1575">
|
||||
<inertial pos="4.007709e-06 -0.033936 0.122467" mass="8.240527"
|
||||
fullinertia="0.021981 0.022182 0.008234 -2.897243e-07 6.3165236e-07 0.003285"/>
|
||||
<joint name="iiwa_1/joint_1" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.33032"
|
||||
frictionloss="0.384477"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_1" class="vis"/>
|
||||
<body name="iiwa_1/link_2" pos="0 0 0.2025" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.003402 0.034792 0.046725" mass="6.357896"
|
||||
fullinertia="0.015565 0.005180 0.015484 -4.147301e-06 1.192255e-05 0.002538"/>
|
||||
<joint name="iiwa_1/joint_2" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944" damping="0.21216"
|
||||
frictionloss="0.496333"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_2" class="vis"/>
|
||||
<body name="iiwa_1/link_3" pos="0 0.2045 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="-0.001452 0.031526 0.133584" mass="4.042756"
|
||||
fullinertia="0.010914 0.010381 0.003139 -3.540575e-06 -9.059062e-06 -0.002128"/>
|
||||
<joint name="iiwa_1/joint_3" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706" damping="0.1"
|
||||
frictionloss="0.173951"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_3" class="vis"/>
|
||||
<body name="iiwa_1/link_4" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.002527 0.053508 0.037205" mass="3.642249"
|
||||
fullinertia="0.007536 0.002538 0.007206 -5.707028e-06 2.781894e-06 0.001256"/>
|
||||
<joint name="iiwa_1/joint_4" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.219041" frictionloss="0.3751"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_4" class="vis"/>
|
||||
<body name="iiwa_1/link_5" pos="0 0.1845 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.001855 0.024573 0.080131" mass="2.580896"
|
||||
fullinertia="0.005201 0.004488 0.002242 1.089316e-07 9.035623e-07 -0.001613"/>
|
||||
<joint name="iiwa_1/joint_5" pos="0 0 0" axis="0 0 1" range="-2.96706 2.96706"
|
||||
damping="0.185923" frictionloss="0.481099"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_5" class="vis"/>
|
||||
<body name="iiwa_1/link_6" pos="0 0 0.2155" quat="0.707107 0.707107 0 0">
|
||||
<inertial pos="-0.001739 -0.001973 -0.002502" mass="2.760564"
|
||||
fullinertia="0.002534 0.001821 0.002393 -1.311766e-06 9.508242e-07 0.000134"/>
|
||||
<joint name="iiwa_1/joint_6" pos="0 0 0" axis="0 0 1" range="-2.0944 2.0944"
|
||||
damping="0.1" frictionloss="0.196149"/>
|
||||
<geom type="mesh" rgba="1 0.423529 0.0392157 1" mesh="link_6" class="vis"/>
|
||||
<body name="iiwa_1/link_7" pos="0 0.081 0" quat="0 0 0.707107 0.707107">
|
||||
<inertial pos="0.000735 0.000387 0.026460" mass="1.285417"
|
||||
fullinertia="0.000151 0.000150 0.000187 -7.223100e-08 2.038333e-06 -3.396830e-07"/>
|
||||
<joint name="iiwa_1/joint_7" pos="0 0 0" axis="0 0 1" range="-3.05433 3.05433"
|
||||
damping="0.1" frictionloss="0.299238" armature="0.01"/>
|
||||
<geom type="mesh" rgba="0.4 0.4 0.4 1" mesh="link_7" class="vis"/>
|
||||
<geom pos="0 0 0.07" type="mesh" rgba="0.3 0.3 0.3 1" mesh="EE_arm"
|
||||
class="vis"/>
|
||||
<body name="iiwa_1/striker_joint_link" pos="0 0 0.585">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.001 0.001 0.001"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
|
||||
<actuator>
|
||||
<motor name="iiwa_1/joint_1" joint="iiwa_1/joint_1" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_2" joint="iiwa_1/joint_2" ctrlrange="-320 320"/>
|
||||
<motor name="iiwa_1/joint_3" joint="iiwa_1/joint_3" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_4" joint="iiwa_1/joint_4" ctrlrange="-176 176"/>
|
||||
<motor name="iiwa_1/joint_5" joint="iiwa_1/joint_5" ctrlrange="-110 110"/>
|
||||
<motor name="iiwa_1/joint_6" joint="iiwa_1/joint_6" ctrlrange="-40 40"/>
|
||||
<motor name="iiwa_1/joint_7" joint="iiwa_1/joint_7" ctrlrange="-40 40"/>
|
||||
</actuator>
|
||||
</mujoco>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
32
fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
Normal file
32
fancy_gym/envs/mujoco/air_hockey/data/iiwas/single.xml
Normal file
@ -0,0 +1,32 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
|
||||
<include file="iiwa1.xml"/>
|
||||
<include file="../table.xml"/>
|
||||
|
||||
<contact>
|
||||
<exclude body1="table_surface" body2="iiwa_1/base"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="table_surface" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="rim" body2="iiwa_1/base"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_1"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_2"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_3"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_4"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_5"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_6"/>
|
||||
<exclude body1="rim" body2="iiwa_1/link_7"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_joint_link"/>
|
||||
<exclude body1="rim" body2="iiwa_1/striker_mallet"/>
|
||||
|
||||
<exclude body1="world" body2="iiwa_1/striker_mallet"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
37
fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
Normal file
37
fancy_gym/envs/mujoco/air_hockey/data/planar/double.xml
Normal file
@ -0,0 +1,37 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
|
||||
<include file="planar_robot_1.xml"/>
|
||||
|
||||
<include file="planar_robot_2.xml"/>
|
||||
|
||||
<include file="../table.xml"/>
|
||||
|
||||
<contact>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/base" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/base" body2="rim"/>
|
||||
|
||||
<exclude body1="planar_robot_2/body_ee" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_hand" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_3" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_2" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_1" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/base" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_2/body_ee" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_hand" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_3" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_2" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/body_1" body2="rim"/>
|
||||
<exclude body1="planar_robot_2/base" body2="rim"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
@ -0,0 +1,61 @@
|
||||
<mujoco model="planar_robot_1">
|
||||
<compiler autolimits="true" angle="radian"/>
|
||||
<asset>
|
||||
<material name="dark_red" rgba="0.58 0.03 0.25 1"/>
|
||||
<material name="black" rgba="0.1 0.1 0.11 1"/>
|
||||
</asset>
|
||||
|
||||
<default>
|
||||
<default class="visual">
|
||||
<geom contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
|
||||
<default class="robot">
|
||||
<geom condim="4" solref="0.02 0.3" priority="2"/>
|
||||
</default>
|
||||
</default>
|
||||
|
||||
<worldbody>
|
||||
<body name="planar_robot_1/base" pos="-1.51 0 -0.1">
|
||||
<geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
|
||||
<inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
|
||||
<body name="planar_robot_1/body_1" pos="0 0 0.25">
|
||||
<joint name="planar_robot_1/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
|
||||
<inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
|
||||
<geom class="robot" type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0"
|
||||
pos="0.275 0 0"/>
|
||||
<body name="planar_robot_1/body_2" pos="0.55 0 0">
|
||||
<joint name="planar_robot_1/joint_2" axis="0 0 1" range="-1.8 1.8"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
|
||||
pos="0.22 0 0"/>
|
||||
<geom class="robot" type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_1/body_3" pos="0.44 0 0">
|
||||
<joint name="planar_robot_1/joint_3" axis="0 0 1"
|
||||
range="-2.0943951023931953 2.0943951023931953"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom class="robot" type="cylinder" material="dark_red" size="0.04 0.22"
|
||||
euler="0 1.57079632679 0" pos="0.22 0 0"/>
|
||||
<geom class="robot" type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_1/body_hand" pos="0.44 0 0">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom class="robot" type="sphere" material="dark_red" size="0.05"/>
|
||||
<geom class="robot" type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
|
||||
<body name="planar_robot_1/body_ee" pos="0 0 -0.15">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom class="robot" name="planar_robot_1/ee" type="cylinder" material="black"
|
||||
size="0.04815 0.01" pos="0 0 0.01" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
<actuator>
|
||||
<motor name="planar_robot_1/joint_1" joint="planar_robot_1/joint_1" ctrlrange="-100 100"/>
|
||||
<motor name="planar_robot_1/joint_2" joint="planar_robot_1/joint_2" ctrlrange="-50 50"/>
|
||||
<motor name="planar_robot_1/joint_3" joint="planar_robot_1/joint_3" ctrlrange="-30 30"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,46 @@
|
||||
<mujoco model="planar_robot_2">
|
||||
<compiler autolimits="true" angle="radian"/>
|
||||
|
||||
<worldbody>
|
||||
<body name="planar_robot_2/base" pos="1.51 0 -0.1" quat="0 0 0 1">
|
||||
<geom type="cylinder" material="black" size="0.075 0.2" pos="0 0 0.2" class="visual"/>
|
||||
<inertial pos="0 0 0" mass="1" diaginertia="0.05 0.018 0.044"/>
|
||||
<body name="planar_robot_2/body_1" pos="0 0 0.25">
|
||||
<joint name="planar_robot_2/joint_1" axis="0 0 1" range="-2.9670597283903604 2.9670597283903604"/>
|
||||
<inertial pos="0.25 0 0" mass="3" diaginertia="0.064375 0.064375 0.00375"/>
|
||||
<geom type="cylinder" material="dark_red" size="0.04 0.275" euler="0 1.57079632679 0" pos="0.275 0 0"/>
|
||||
<body name="planar_robot_2/body_2" pos="0.55 0 0">
|
||||
<joint name="planar_robot_2/joint_2" axis="0 0 1" range="-1.8 1.8"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
|
||||
pos="0.22 0 0"/>
|
||||
<geom type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_2/body_3" pos="0.44 0 0">
|
||||
<joint name="planar_robot_2/joint_3" axis="0 0 1"
|
||||
range="-2.0943951023931953 2.0943951023931953"/>
|
||||
<inertial pos="0.2 0 0" mass="2" diaginertia="0.0335 0.0335 0.003"/>
|
||||
<geom type="cylinder" material="dark_red" size="0.04 0.22" euler="0 1.57079632679 0"
|
||||
pos="0.22 0 0"/>
|
||||
<geom type="sphere" material="dark_red" size="0.05"/>
|
||||
<body name="planar_robot_2/body_hand" pos="0.44 0 0">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom type="sphere" material="dark_red" size="0.05"/>
|
||||
<geom type="cylinder" material="black" size="0.01 0.075" pos="0 0 -0.075"/>
|
||||
<body name="planar_robot_2/body_ee" pos="0 0 -0.15">
|
||||
<inertial pos="0 0 0" mass="0.1" diaginertia="0.0008 0.0023 0.0023"/>
|
||||
<geom name="planar_robot_2/ee" type="cylinder" material="black" size="0.04815 0.01"
|
||||
pos="0 0 0.01" friction="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
<actuator>
|
||||
<motor name="planar_robot_2/joint_1" joint="planar_robot_2/joint_1" ctrlrange="-100 100"/>
|
||||
<motor name="planar_robot_2/joint_2" joint="planar_robot_2/joint_2" ctrlrange="-50 50"/>
|
||||
<motor name="planar_robot_2/joint_3" joint="planar_robot_2/joint_3" ctrlrange="-30 30"/>
|
||||
</actuator>
|
||||
</mujoco>
|
24
fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
Normal file
24
fancy_gym/envs/mujoco/air_hockey/data/planar/single.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<mujoco model="AirHockeySingle">
|
||||
|
||||
|
||||
<include file="planar_robot_1.xml"/>
|
||||
|
||||
<include file="../table.xml"/>
|
||||
|
||||
|
||||
<contact>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/base" body2="table_surface"/>
|
||||
<exclude body1="planar_robot_1/body_ee" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_hand" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_3" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_2" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/body_1" body2="rim"/>
|
||||
<exclude body1="planar_robot_1/base" body2="rim"/>
|
||||
</contact>
|
||||
|
||||
</mujoco>
|
89
fancy_gym/envs/mujoco/air_hockey/data/table.xml
Normal file
89
fancy_gym/envs/mujoco/air_hockey/data/table.xml
Normal file
@ -0,0 +1,89 @@
|
||||
<mujoco model="table">
|
||||
|
||||
<option timestep="0.001" cone="elliptic" impratio="1"/>
|
||||
|
||||
<asset>
|
||||
<material name="grey" specular="0.5" shininess="0.25" rgba="0.8 0.8 0.8 1"/>
|
||||
<material name="white" specular="0.5" shininess="0.25" rgba="1.0 1.0 1.0 1"/>
|
||||
<material name="red" specular="0.5" shininess="0.25" rgba="1.0 0.0 0.0 1"/>
|
||||
<material name="blue" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 1"/>
|
||||
<material name="transparent" specular="0.5" shininess="0.25" rgba="0.0 0.0 1.0 0"/>
|
||||
|
||||
<texture type="skybox" builtin="gradient" rgb1="0.3 0.5 0.7" rgb2="0 0 0" width="512"
|
||||
height="3072"/>
|
||||
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.2 0.3 0.4"
|
||||
rgb2="0.1 0.2 0.3" markrgb="0.8 0.8 0.8" width="300" height="300"/>
|
||||
<material name="groundplane" texture="groundplane" texuniform="true" texrepeat="5 5"
|
||||
reflectance="0.2"/>
|
||||
</asset>
|
||||
|
||||
<default>
|
||||
<geom condim="4" solref="0.02 0.3"/>
|
||||
<default class="rim">
|
||||
<geom type="box" material="grey" condim="6" friction="10000 0.0 0.0" priority="1"
|
||||
solref="-2000000 -250" solimp="0.99 0.999 0.001 0.5 2"/>
|
||||
</default>
|
||||
</default>
|
||||
|
||||
<worldbody>
|
||||
<body name="table">
|
||||
<body name="table_surface">
|
||||
<geom name="surface" type="box" material="white" size="1.064 0.609 0.0505" pos="0 0 -0.0505"
|
||||
condim="4" friction="0.08 0.08 0.0" priority="1"
|
||||
solref="0.0125 0.5" solimp="0.9 0.999 0.001 0.5 2"/>
|
||||
</body>
|
||||
<body name="rim">
|
||||
<!-- <geom name="rim_home_l" class="rim" size="0.045 0.197 0.005" pos="-1.019 0.322 0.005"/>-->
|
||||
<!-- <geom name="rim_home_r" class="rim" size="0.045 0.197 0.005" pos="-1.019 -0.322 0.005"/>-->
|
||||
|
||||
<geom name="rim_home_l" class="rim" size="0.045 0.1945 0.005" pos="-1.019 0.3245 0.005"/>
|
||||
<geom name="rim_home_r" class="rim" size="0.045 0.1945 0.005" pos="-1.019 -0.3245 0.005"/>
|
||||
<geom name="rim_home_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 0.13 0.005"/>
|
||||
<geom name="rim_home_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 0.13 0.005"/>
|
||||
<geom name="rim_home_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="-0.979 -0.13 0.005"/>
|
||||
<geom name="rim_home_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="-1.0215 -0.13 0.005"/>
|
||||
|
||||
<geom name="rim_home_top" class="rim" size="0.045 0.519 0.01" pos="-1.019 0 0.02"/>
|
||||
|
||||
<geom name="rim_left" class="rim" size="1.064 0.045 0.015" pos="0 0.564 0.015"/>
|
||||
<geom name="rim_right" class="rim" size="1.064 0.045 0.015" pos="0 -0.564 0.015"/>
|
||||
|
||||
<!-- <geom name="rim_away_l" class="rim" size="0.045 0.197 0.005" pos="1.019 0.322 0.005"/>-->
|
||||
<!-- <geom name="rim_away_r" class="rim" size="0.045 0.197 0.005" pos="1.019 -0.322 0.005"/>-->
|
||||
|
||||
<geom name="rim_away_l" class="rim" size="0.045 0.1945 0.005" pos="1.019 0.3245 0.005"/>
|
||||
<geom name="rim_away_r" class="rim" size="0.045 0.1945 0.005" pos="1.019 -0.3245 0.005"/>
|
||||
<geom name="rim_away_bound_l" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 0.13 0.005"/>
|
||||
<geom name="rim_away_bound_l_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 0.13 0.005"/>
|
||||
<geom name="rim_away_bound_r" class="rim" type="cylinder" size="0.005 0.005" pos="0.979 -0.13 0.005"/>
|
||||
<geom name="rim_away_bound_r_tail" class="rim" size="0.0425 0.005 0.005" pos="1.0215 -0.13 0.005"/>
|
||||
<geom name="rim_away_top" class="rim" size="0.045 0.519 0.01" pos="1.019 0 0.02"/>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<body name="base" pos="0 0 0">
|
||||
<joint name="puck_x" type="slide" axis="1 0 0" damping="0.005" limited="false"/>
|
||||
<joint name="puck_y" type="slide" axis="0 1 0" damping="0.005" limited="false"/>
|
||||
<joint name="puck_yaw" type="hinge" axis="0 0 1" damping="2e-6" limited="false"/>
|
||||
<body name="puck">
|
||||
<geom pos="0 0 0" name="puck" type="cylinder" material="red" size="0.03165 0.003"
|
||||
condim="4" priority="0"/>
|
||||
<geom pos="0.02 0 0" type="cylinder" material="blue" size="0.01 0.0031"
|
||||
condim="4" contype="0" conaffinity="0"/>
|
||||
<inertial pos="0 0 0" mass="0.01" diaginertia="2.5e-6 2.5e-6 5e-6"/>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<site name="puck_vis" type="ellipsoid" size="0.03165 0.03165 0.004" pos="0 0 -0.05"/>
|
||||
<site name="puck_vis_rot" type="cylinder" size="0.0045 0.006" rgba="1 0 0 1" pos="0 0 -0.05"/>
|
||||
</worldbody>
|
||||
|
||||
<contact>
|
||||
<exclude body1="puck" body2="table_surface"/>
|
||||
</contact>
|
||||
|
||||
<worldbody>
|
||||
<light pos="0 0 3" dir="0 0 -1" directional="true"/>
|
||||
<geom pos="0 0 -0.1" name="floor" size="0 0 0.05" type="plane" material="groundplane"/>
|
||||
</worldbody>
|
||||
</mujoco>
|
270
fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
Normal file
270
fancy_gym/envs/mujoco/air_hockey/position_control_wrapper.py
Normal file
@ -0,0 +1,270 @@
|
||||
from collections import deque
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
import scipy
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey import seven_dof
|
||||
from fancy_gym.envs.mujoco.air_hockey import three_dof
|
||||
|
||||
|
||||
class PositionControl:
|
||||
def __init__(self, p_gain, d_gain, i_gain, interpolation_order=3, debug=False, *args, **kwargs):
|
||||
"""
|
||||
Mixin that adds position controller to mujoco environments.
|
||||
|
||||
Args:
|
||||
p_gain (float): Proportional controller gain
|
||||
d_gain (float): Differential controller gain
|
||||
i_gain (float): Integral controller gain
|
||||
interpolation_order (int, 3): Type of interpolation used, has to correspond to action shape. Order 1-5 are
|
||||
polynomial interpolation of the degree. Order -1 is linear interpolation of position and velocity.
|
||||
Set Order to None in order to turn off interpolation. In this case the action has to be a trajectory
|
||||
of position, velocity and acceleration of the shape (20, 3, n_joints)
|
||||
In the case of 2 agents it is a tuple, which describes the interpolation order for each agent
|
||||
debug (bool, True): If true it logs the controller performance into controller_record queue. The order of the
|
||||
entries is desired_pos, current_pos, desired_vel, current_vel, desired_acc, jerk.
|
||||
"""
|
||||
|
||||
self.debug = debug
|
||||
|
||||
super(PositionControl, self).__init__(*args, **kwargs)
|
||||
|
||||
self.robot_model = self.env_info['robot']['robot_model']
|
||||
self.robot_data = self.env_info['robot']['robot_data']
|
||||
|
||||
self.p_gain = np.array(p_gain * self.n_agents)
|
||||
self.d_gain = np.array(d_gain * self.n_agents)
|
||||
self.i_gain = np.array(i_gain * self.n_agents)
|
||||
|
||||
self.prev_pos = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_vel = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_acc = np.zeros(len(self.actuator_joint_ids))
|
||||
self.i_error = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_controller_cmd_pos = np.zeros(len(self.actuator_joint_ids))
|
||||
|
||||
self.interp_order = interpolation_order if type(interpolation_order) is tuple else (interpolation_order,)
|
||||
|
||||
self._num_env_joints = len(self.actuator_joint_ids)
|
||||
self.n_robot_joints = self.env_info['robot']["n_joints"]
|
||||
|
||||
self.action_shape = [None] * self.n_agents
|
||||
|
||||
for i in range(self.n_agents):
|
||||
if self.interp_order[i] is None:
|
||||
self.action_shape[i] = (int(self.dt / self._timestep), 3, self.n_robot_joints)
|
||||
elif self.interp_order[i] in [1, 2]:
|
||||
self.action_shape[i] = (self.n_robot_joints,)
|
||||
elif self.interp_order[i] in [3, 4, -1]:
|
||||
self.action_shape[i] = (2, self.n_robot_joints)
|
||||
elif self.interp_order[i] == 5:
|
||||
self.action_shape[i] = (3, self.n_robot_joints)
|
||||
|
||||
self.traj = None
|
||||
|
||||
self.jerk = np.zeros(self._num_env_joints)
|
||||
|
||||
if self.debug:
|
||||
self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
|
||||
|
||||
def _enforce_safety_limits(self, desired_pos, desired_vel):
|
||||
# ROS safe controller
|
||||
pos = self.prev_controller_cmd_pos
|
||||
k = 20
|
||||
|
||||
joint_pos_lim = np.tile(self.env_info['robot']['joint_pos_limit'], (1, self.n_agents))
|
||||
joint_vel_lim = np.tile(self.env_info['robot']['joint_vel_limit'], (1, self.n_agents))
|
||||
|
||||
min_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[0]), joint_vel_lim[0]), joint_vel_lim[1])
|
||||
|
||||
max_vel = np.minimum(np.maximum(-k * (pos - joint_pos_lim[1]), joint_vel_lim[0]), joint_vel_lim[1])
|
||||
|
||||
clipped_vel = np.minimum(np.maximum(desired_vel, min_vel), max_vel)
|
||||
|
||||
min_pos = pos + min_vel * self._timestep
|
||||
max_pos = pos + max_vel * self._timestep
|
||||
|
||||
clipped_pos = np.minimum(np.maximum(desired_pos, min_pos), max_pos)
|
||||
self.prev_controller_cmd_pos = clipped_pos.copy()
|
||||
|
||||
return clipped_pos, clipped_vel
|
||||
|
||||
def _controller(self, desired_pos, desired_vel, desired_acc, current_pos, current_vel):
|
||||
clipped_pos, clipped_vel = self._enforce_safety_limits(desired_pos, desired_vel)
|
||||
|
||||
error = (clipped_pos - current_pos)
|
||||
|
||||
self.i_error += self.i_gain * error * self._timestep
|
||||
torque = self.p_gain * error + self.d_gain * (clipped_vel - current_vel) + self.i_error
|
||||
|
||||
# Acceleration FeedForward
|
||||
tau_ff = np.zeros(self.robot_model.nv)
|
||||
for i in range(self.n_agents):
|
||||
robot_joint_ids = np.arange(self.n_robot_joints) + self.n_robot_joints * i
|
||||
self.robot_data.qpos = current_pos[robot_joint_ids]
|
||||
self.robot_data.qvel = current_vel[robot_joint_ids]
|
||||
acc_ff = desired_acc[robot_joint_ids]
|
||||
mujoco.mj_forward(self.robot_model, self.robot_data)
|
||||
|
||||
mujoco.mj_mulM(self.robot_model, self.robot_data, tau_ff, acc_ff)
|
||||
torque[robot_joint_ids] += tau_ff
|
||||
|
||||
# Gravity Compensation and Coriolis and Centrifugal force
|
||||
torque[robot_joint_ids] += self.robot_data.qfrc_bias
|
||||
|
||||
torque[robot_joint_ids] = np.minimum(np.maximum(torque[robot_joint_ids],
|
||||
self.robot_model.actuator_ctrlrange[:, 0]),
|
||||
self.robot_model.actuator_ctrlrange[:, 1])
|
||||
|
||||
if self.debug:
|
||||
self.controller_record.append(
|
||||
np.concatenate([desired_pos, current_pos, desired_vel, current_vel, desired_acc, self.jerk]))
|
||||
|
||||
return torque
|
||||
|
||||
def _interpolate_trajectory(self, interp_order, action, i=0):
|
||||
tf = self.dt
|
||||
prev_pos = self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
|
||||
prev_vel = self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
|
||||
prev_acc = self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints]
|
||||
if interp_order == 1 and action.ndim == 1:
|
||||
coef = np.array([[1, 0], [1, tf]])
|
||||
results = np.vstack([prev_pos, action])
|
||||
elif interp_order == 2 and action.ndim == 1:
|
||||
coef = np.array([[1, 0, 0], [1, tf, tf ** 2], [0, 1, 0]])
|
||||
if np.linalg.norm(action - prev_pos) < 1e-3:
|
||||
prev_vel = np.zeros_like(prev_vel)
|
||||
results = np.vstack([prev_pos, action, prev_vel])
|
||||
elif interp_order == 3 and action.shape[0] == 2:
|
||||
coef = np.array([[1, 0, 0, 0], [1, tf, tf ** 2, tf ** 3], [0, 1, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2]])
|
||||
results = np.vstack([prev_pos, action[0], prev_vel, action[1]])
|
||||
elif interp_order == 4 and action.shape[0] == 2:
|
||||
coef = np.array([[1, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4],
|
||||
[0, 1, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3],
|
||||
[0, 0, 2, 0, 0]])
|
||||
results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc])
|
||||
elif interp_order == 5 and action.shape[0] == 3:
|
||||
coef = np.array([[1, 0, 0, 0, 0, 0], [1, tf, tf ** 2, tf ** 3, tf ** 4, tf ** 5],
|
||||
[0, 1, 0, 0, 0, 0], [0, 1, 2 * tf, 3 * tf ** 2, 4 * tf ** 3, 5 * tf ** 4],
|
||||
[0, 0, 2, 0, 0, 0], [0, 0, 2, 6 * tf, 12 * tf ** 2, 20 * tf ** 3]])
|
||||
results = np.vstack([prev_pos, action[0], prev_vel, action[1], prev_acc, action[2]])
|
||||
elif interp_order == -1:
|
||||
# Interpolate position and velocity linearly
|
||||
pass
|
||||
else:
|
||||
raise ValueError("Undefined interpolator order or the action dimension does not match!")
|
||||
|
||||
if interp_order > 0:
|
||||
A = scipy.linalg.block_diag(*[coef] * self.n_robot_joints)
|
||||
y = results.reshape(-2, order='F')
|
||||
weights = np.linalg.solve(A, y).reshape(self.n_robot_joints, interp_order + 1)
|
||||
weights_d = np.polynomial.polynomial.polyder(weights, axis=1)
|
||||
weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
|
||||
elif interp_order == -1:
|
||||
weights = np.vstack([prev_pos, (action[0] - prev_pos) / self.dt]).T
|
||||
weights_d = np.vstack([prev_vel, (action[1] - prev_vel) / self.dt]).T
|
||||
weights_dd = np.polynomial.polynomial.polyder(weights_d, axis=1)
|
||||
|
||||
if interp_order in [3, 4, 5]:
|
||||
self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.abs(weights_dd[:, 1]) + np.abs(weights_dd[:, 0] - prev_acc) / self._timestep
|
||||
else:
|
||||
self.jerk[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.ones_like(prev_acc) * np.inf
|
||||
|
||||
self.prev_pos[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights.T)
|
||||
self.prev_vel[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_d.T)
|
||||
self.prev_acc[i*self.n_robot_joints:(i+1)*self.n_robot_joints] = np.polynomial.polynomial.polyval(tf, weights_dd.T)
|
||||
|
||||
for t in np.linspace(self._timestep, self.dt, self._n_intermediate_steps):
|
||||
q = np.polynomial.polynomial.polyval(t, weights.T)
|
||||
qd = np.polynomial.polynomial.polyval(t, weights_d.T)
|
||||
qdd = np.polynomial.polynomial.polyval(t, weights_dd.T)
|
||||
yield q, qd, qdd
|
||||
|
||||
def reset(self, obs=None):
|
||||
obs = super(PositionControl, self).reset(obs)
|
||||
self.prev_pos = self._data.qpos[self.actuator_joint_ids]
|
||||
self.prev_vel = self._data.qvel[self.actuator_joint_ids]
|
||||
self.prev_acc = np.zeros(len(self.actuator_joint_ids))
|
||||
self.i_error = np.zeros(len(self.actuator_joint_ids))
|
||||
self.prev_controller_cmd_pos = self._data.qpos[self.actuator_joint_ids]
|
||||
|
||||
if self.debug:
|
||||
self.controller_record = deque(maxlen=self.info.horizon * self._n_intermediate_steps)
|
||||
return obs
|
||||
|
||||
def _step_init(self, obs, action):
|
||||
super(PositionControl, self)._step_init(obs, action)
|
||||
|
||||
if self.n_agents == 1:
|
||||
self.traj = self._create_traj(self.interp_order[0], action)
|
||||
else:
|
||||
def _traj():
|
||||
traj_1 = self._create_traj(self.interp_order[0], action[0], 0)
|
||||
traj_2 = self._create_traj(self.interp_order[1], action[1], 1)
|
||||
|
||||
for a1, a2 in zip(traj_1, traj_2):
|
||||
yield np.hstack([a1, a2])
|
||||
|
||||
self.traj = _traj()
|
||||
|
||||
def _create_traj(self, interp_order, action, i=0):
|
||||
if interp_order is None:
|
||||
return iter(action)
|
||||
return self._interpolate_trajectory(interp_order, action, i)
|
||||
|
||||
def _compute_action(self, obs, action):
|
||||
cur_pos, cur_vel = self.get_joints(obs)
|
||||
|
||||
desired_pos, desired_vel, desired_acc = next(self.traj)
|
||||
|
||||
return self._controller(desired_pos, desired_vel, desired_acc, cur_pos, cur_vel)
|
||||
|
||||
def _preprocess_action(self, action):
|
||||
action = super(PositionControl, self)._preprocess_action(action)
|
||||
|
||||
if self.n_agents == 1:
|
||||
assert action.shape == self.action_shape[0], f"Unexpected action shape. Expected {self.action_shape[0]} but got" \
|
||||
f" {action.shape}"
|
||||
else:
|
||||
for i in range(self.n_agents):
|
||||
assert action[i].shape == self.action_shape[i], f"Unexpected action shape. Expected {self.action_shape[i]} but got" \
|
||||
f" {action[i].shape}"
|
||||
|
||||
return action
|
||||
|
||||
|
||||
class PositionControlIIWA(PositionControl):
|
||||
def __init__(self, *args, **kwargs):
|
||||
p_gain = [1500., 1500., 1200., 1200., 1000., 1000., 500.]
|
||||
d_gain = [60, 80, 60, 30, 10, 1, 0.5]
|
||||
i_gain = [0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
super(PositionControlIIWA, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
|
||||
|
||||
|
||||
class PositionControlPlanar(PositionControl):
|
||||
def __init__(self, *args, **kwargs):
|
||||
p_gain = [960, 480, 240]
|
||||
d_gain = [60, 20, 4]
|
||||
i_gain = [0, 0, 0]
|
||||
super(PositionControlPlanar, self).__init__(p_gain=p_gain, d_gain=d_gain, i_gain=i_gain, *args, **kwargs)
|
||||
|
||||
|
||||
class PlanarPositionHit(PositionControlPlanar, three_dof.AirHockeyHit):
|
||||
pass
|
||||
|
||||
|
||||
class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend):
|
||||
pass
|
||||
|
||||
|
||||
class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit):
|
||||
pass
|
||||
|
||||
|
||||
class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend):
|
||||
pass
|
||||
|
||||
|
||||
class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament):
|
||||
pass
|
4
fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
Normal file
4
fancy_gym/envs/mujoco/air_hockey/seven_dof/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from .env_base import AirHockeyBase
|
||||
from .tournament import AirHockeyTournament
|
||||
from .hit import AirHockeyHit
|
||||
from .defend import AirHockeyDefend
|
75
fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
Normal file
75
fancy_gym/envs/mujoco/air_hockey/seven_dof/defend.py
Normal file
@ -0,0 +1,75 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||
|
||||
|
||||
class AirHockeyDefend(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey defending task.
|
||||
The agent should stop the puck at the line x=-0.6.
|
||||
"""
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
self.init_velocity_range = (1, 3)
|
||||
|
||||
self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
def setup(self, obs):
|
||||
puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
|
||||
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-0.5, 0.5)
|
||||
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-10, 10)
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super(AirHockeyDefend, self).setup(obs)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, state):
|
||||
puck_pos, puck_vel = self.get_puck(state)
|
||||
# If puck is over the middle line and moving towards opponent
|
||||
if puck_pos[0] > 0 and puck_vel[0] > 0:
|
||||
return True
|
||||
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||
return True
|
||||
return super().is_absorbing(state)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyDefend()
|
||||
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
||||
env.render()
|
||||
while True:
|
||||
# action = np.random.uniform(-1, 1, env.info.action_space.low.shape) * 8
|
||||
action = np.zeros(7)
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
print(observation)
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
258
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
Normal file
258
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_base.py
Normal file
@ -0,0 +1,258 @@
|
||||
import os
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.data.iiwas import __file__ as env_path
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils.universal_joint_plugin import UniversalJointPlugin
|
||||
from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
|
||||
from mushroom_rl.utils.spaces import Box
|
||||
|
||||
|
||||
"""
|
||||
Abstract class for all AirHockey Environments.
|
||||
|
||||
"""
|
||||
class AirHockeyBase(MuJoCo):
|
||||
def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
|
||||
n_agents=1, viewer_params={}):
|
||||
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
Args:
|
||||
n_agents (int, 1): number of agent to be used in the environment (one or two)
|
||||
"""
|
||||
|
||||
self.n_agents = n_agents
|
||||
|
||||
action_spec = []
|
||||
observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
collision_spec = [("puck", ["puck"]),
|
||||
("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
|
||||
("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
|
||||
|
||||
if 1 <= self.n_agents <= 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
|
||||
|
||||
action_spec += ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
|
||||
"iiwa_1/joint_6", "iiwa_1/joint_7"]
|
||||
|
||||
observation_spec += [("robot_1/joint_1_pos", "iiwa_1/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_2_pos", "iiwa_1/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_3_pos", "iiwa_1/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_4_pos", "iiwa_1/joint_4", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_5_pos", "iiwa_1/joint_5", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_6_pos", "iiwa_1/joint_6", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_7_pos", "iiwa_1/joint_7", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_1_vel", "iiwa_1/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_2_vel", "iiwa_1/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_3_vel", "iiwa_1/joint_3", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_4_vel", "iiwa_1/joint_4", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_5_vel", "iiwa_1/joint_5", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_6_vel", "iiwa_1/joint_6", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_7_vel", "iiwa_1/joint_7", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data += [("robot_1/joint_8_pos", "iiwa_1/striker_joint_1", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_9_pos", "iiwa_1/striker_joint_2", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_8_vel", "iiwa_1/striker_joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_9_vel", "iiwa_1/striker_joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_1/ee_pos", "iiwa_1/striker_mallet", ObservationType.BODY_POS),
|
||||
("robot_1/ee_vel", "iiwa_1/striker_mallet", ObservationType.BODY_VEL),
|
||||
("robot_1/rod_rot", "iiwa_1/striker_joint_link", ObservationType.BODY_ROT)]
|
||||
|
||||
collision_spec += [("robot_1/ee", ["iiwa_1/ee"])]
|
||||
|
||||
if self.n_agents == 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
|
||||
|
||||
observation_spec += [("robot_1/opponent_ee_pos", "iiwa_2/striker_joint_link", ObservationType.BODY_POS)]
|
||||
|
||||
action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4",
|
||||
"iiwa_2/joint_5",
|
||||
"iiwa_2/joint_6", "iiwa_2/joint_7"]
|
||||
|
||||
observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_1_pos", "iiwa_2/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_2_pos", "iiwa_2/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_3_pos", "iiwa_2/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_4_pos", "iiwa_2/joint_4", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_5_pos", "iiwa_2/joint_5", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_6_pos", "iiwa_2/joint_6", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_7_pos", "iiwa_2/joint_7", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_1_vel", "iiwa_2/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_2_vel", "iiwa_2/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_3_vel", "iiwa_2/joint_3", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_4_vel", "iiwa_2/joint_4", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_5_vel", "iiwa_2/joint_5", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_6_vel", "iiwa_2/joint_6", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_7_vel", "iiwa_2/joint_7", ObservationType.JOINT_VEL)]
|
||||
|
||||
observation_spec += [("robot_2/opponent_ee_pos", "iiwa_1/striker_joint_link", ObservationType.BODY_POS)]
|
||||
|
||||
additional_data += [("robot_2/joint_8_pos", "iiwa_2/striker_joint_1", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_9_pos", "iiwa_2/striker_joint_2", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_8_vel", "iiwa_2/striker_joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_9_vel", "iiwa_2/striker_joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_2/ee_pos", "iiwa_2/striker_mallet", ObservationType.BODY_POS),
|
||||
("robot_2/ee_vel", "iiwa_2/striker_mallet", ObservationType.BODY_VEL),
|
||||
("robot_2/rod_rot", "iiwa_2/striker_joint_link", ObservationType.BODY_ROT)]
|
||||
|
||||
collision_spec += [("robot_2/ee", ["iiwa_2/ee"])]
|
||||
else:
|
||||
raise ValueError('n_agents should be 1 or 2')
|
||||
|
||||
self.env_info = dict()
|
||||
self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
|
||||
self.env_info['puck'] = {"radius": 0.03165}
|
||||
self.env_info['mallet'] = {"radius": 0.04815}
|
||||
self.env_info['n_agents'] = self.n_agents
|
||||
self.env_info['robot'] = {
|
||||
"n_joints": 7,
|
||||
"ee_desired_height": 0.1645,
|
||||
"joint_vel_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
|
||||
[85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi,
|
||||
"joint_acc_limit": np.array([[-85, -85, -100, -75, -130, -135, -135],
|
||||
[85, 85, 100, 75, 130, 135, 135]]) / 180. * np.pi * 10,
|
||||
"base_frame": [],
|
||||
"universal_height": 0.0645,
|
||||
"control_frequency": 50,
|
||||
}
|
||||
|
||||
self.env_info['puck_pos_ids'] = [0, 1, 2]
|
||||
self.env_info['puck_vel_ids'] = [3, 4, 5]
|
||||
self.env_info['joint_pos_ids'] = [6, 7, 8, 9, 10, 11, 12]
|
||||
self.env_info['joint_vel_ids'] = [13, 14, 15, 16, 17, 18, 19]
|
||||
if self.n_agents == 2:
|
||||
self.env_info['opponent_ee_ids'] = [20, 21, 22]
|
||||
else:
|
||||
self.env_info['opponent_ee_ids'] = []
|
||||
|
||||
max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :7])) * self.n_agents
|
||||
|
||||
super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
|
||||
n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
|
||||
|
||||
# Construct the mujoco model at origin
|
||||
robot_model = mujoco.MjModel.from_xml_path(
|
||||
os.path.join(os.path.dirname(os.path.abspath(env_path)), "iiwa_only.xml"))
|
||||
robot_model.body('iiwa_1/base').pos = np.zeros(3)
|
||||
robot_data = mujoco.MjData(robot_model)
|
||||
|
||||
# Add env_info that requires mujoco models
|
||||
self.env_info['dt'] = self.dt
|
||||
self.env_info["robot"]["joint_pos_limit"] = np.array(
|
||||
[self._model.joint(f"iiwa_1/joint_{i + 1}").range for i in range(7)]).T
|
||||
self.env_info["robot"]["robot_model"] = robot_model
|
||||
self.env_info["robot"]["robot_data"] = robot_data
|
||||
self.env_info["rl_info"] = self.info
|
||||
|
||||
frame_T = np.eye(4)
|
||||
temp = np.zeros((9, 1))
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("iiwa_1/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("iiwa_1/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
if self.n_agents == 2:
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("iiwa_2/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("iiwa_2/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
# Ids of the joint, which are controller by the action space
|
||||
self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
|
||||
|
||||
self.universal_joint_plugin = UniversalJointPlugin(self._model, self._data, self.env_info)
|
||||
|
||||
def _modify_mdp_info(self, mdp_info):
|
||||
obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
|
||||
*np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[0]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][0]])
|
||||
obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
|
||||
*np.array([self._model.joint(f"iiwa_1/joint_{i + 1}").range[1]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][1]])
|
||||
if self.n_agents == 2:
|
||||
obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
|
||||
obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
|
||||
mdp_info.observation_space = Box(obs_low, obs_high)
|
||||
return mdp_info
|
||||
|
||||
def _simulation_pre_step(self):
|
||||
self.universal_joint_plugin.update()
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
|
||||
if type == 'pose':
|
||||
puck_w = np.eye(4)
|
||||
puck_w[:2, 3] = puck_in[:2]
|
||||
puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
|
||||
|
||||
puck_r = np.linalg.inv(robot_frame) @ puck_w
|
||||
puck_out = np.concatenate([puck_r[:2, 3],
|
||||
R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
|
||||
|
||||
if type == 'vel':
|
||||
rot_mat = robot_frame[:3, :3]
|
||||
|
||||
vel_lin = np.array([*puck_in[:2], 0])
|
||||
vel_ang = np.array([0., 0., puck_in[2]])
|
||||
|
||||
vel_lin_r = rot_mat.T @ vel_lin
|
||||
vel_ang_r = rot_mat.T @ vel_ang
|
||||
|
||||
puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
|
||||
return puck_out
|
||||
|
||||
def get_puck(self, obs):
|
||||
"""
|
||||
Getting the puck properties from the observations
|
||||
Args:
|
||||
obs: The current observation
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
|
||||
|
||||
"""
|
||||
puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
|
||||
puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
|
||||
return puck_pos, puck_vel
|
||||
|
||||
def get_ee(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_joints(self, obs):
|
||||
raise NotImplementedError
|
171
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
Normal file
171
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_double.py
Normal file
@ -0,0 +1,171 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
|
||||
|
||||
class AirHockeyDouble(AirHockeyBase):
|
||||
"""
|
||||
Base class for two agents air hockey tasks.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
super().__init__(gamma=gamma, horizon=horizon, n_agents=2, viewer_params=viewer_params)
|
||||
|
||||
self._compute_init_state()
|
||||
|
||||
self.filter_ratio = 0.274
|
||||
self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
|
||||
self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"] * self.env_info["n_agents"])
|
||||
|
||||
def _compute_init_state(self):
|
||||
init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
|
||||
|
||||
success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
|
||||
self.env_info['robot']['robot_data'],
|
||||
np.array([0.65, 0., 0.1645]),
|
||||
R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
|
||||
initial_q=init_state)
|
||||
|
||||
assert success is True
|
||||
|
||||
def get_ee(self, robot=1):
|
||||
"""
|
||||
Getting the ee properties from the current internal state the selected robot. Can also be obtained via forward kinematics
|
||||
on the current joint position, this function exists to avoid redundant computations.
|
||||
Args:
|
||||
robot: ID of robot, either 1 or 2
|
||||
|
||||
Returns: ([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
|
||||
"""
|
||||
ee_pos = self._read_data("robot_" + str(robot) + "/ee_pos")
|
||||
|
||||
ee_vel = self._read_data("robot_" + str(robot) + "/ee_vel")
|
||||
|
||||
return ee_pos, ee_vel
|
||||
|
||||
def get_joints(self, obs, agent=None):
|
||||
"""
|
||||
Get joint position and velocity of the robots
|
||||
Can choose the robot with agent = 1 / 2. If agent is None both are returned
|
||||
"""
|
||||
if agent:
|
||||
q_pos = np.zeros(7)
|
||||
q_vel = np.zeros(7)
|
||||
for i in range(7):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_" + str(agent) + "/joint_" + str(i + 1) + "_vel")[0]
|
||||
else:
|
||||
q_pos = np.zeros(14)
|
||||
q_vel = np.zeros(14)
|
||||
for i in range(7):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
q_pos[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i + 7] = self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
return q_pos, q_vel
|
||||
|
||||
def _create_observation(self, obs):
|
||||
# Filter the joint velocity
|
||||
q_pos, q_vel = self.get_joints(obs)
|
||||
q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
|
||||
self.q_pos_prev = q_pos
|
||||
self.q_vel_prev = q_vel_filter
|
||||
|
||||
for i in range(7):
|
||||
self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
|
||||
self.obs_helper.get_from_obs(obs, "robot_2/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i + 7]
|
||||
|
||||
# Wrap puck's rotation angle to [-pi, pi)
|
||||
yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
|
||||
return obs
|
||||
|
||||
def _modify_observation(self, obs):
|
||||
new_obs = obs.copy()
|
||||
|
||||
puck_pos, puck_vel = self.get_puck(new_obs)
|
||||
|
||||
puck_pos_1 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
|
||||
puck_vel_1 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos_1[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos_1[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos_1[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel_1[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel_1[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel_1[2]
|
||||
|
||||
opponent_pos_1 = self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')
|
||||
self.obs_helper.get_from_obs(new_obs, 'robot_1/opponent_ee_pos')[:] = \
|
||||
(np.linalg.inv(self.env_info['robot']['base_frame'][0]) @ np.concatenate([opponent_pos_1, [1]]))[:3]
|
||||
|
||||
puck_pos_2 = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][1])
|
||||
puck_vel_2 = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][1], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_pos")[:] = puck_pos_2[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_pos")[:] = puck_pos_2[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_pos")[:] = puck_pos_2[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_x_vel")[:] = puck_vel_2[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_y_vel")[:] = puck_vel_2[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "robot_2/puck_yaw_vel")[:] = puck_vel_2[2]
|
||||
|
||||
opponent_pos_2 = self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')
|
||||
self.obs_helper.get_from_obs(new_obs, 'robot_2/opponent_ee_pos')[:] = \
|
||||
(np.linalg.inv(self.env_info['robot']['base_frame'][1]) @ np.concatenate([opponent_pos_2, [1]]))[:3]
|
||||
|
||||
return new_obs
|
||||
|
||||
def setup(self, obs):
|
||||
for i in range(7):
|
||||
self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
self._data.joint("iiwa_2/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
|
||||
self.q_pos_prev[i] = self.init_state[i]
|
||||
self.q_pos_prev[i + 7] = self.init_state[i]
|
||||
self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
|
||||
self.q_vel_prev[i + 7] = self._data.joint("iiwa_2/joint_" + str(i + 1)).qvel[0]
|
||||
|
||||
self.universal_joint_plugin.reset()
|
||||
|
||||
super().setup(obs)
|
||||
# Update body positions, needed for _compute_universal_joint
|
||||
mujoco.mj_fwdPosition(self._model, self._data)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
env = AirHockeyDouble(viewer_params={'start_paused': True})
|
||||
env.reset()
|
||||
env.render()
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(14)
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
104
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
Normal file
104
fancy_gym/envs/mujoco/air_hockey/seven_dof/env_single.py
Normal file
@ -0,0 +1,104 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_base import AirHockeyBase
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics
|
||||
|
||||
class AirHockeySingle(AirHockeyBase):
|
||||
"""
|
||||
Base class for single agent air hockey tasks.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
|
||||
|
||||
self._compute_init_state()
|
||||
|
||||
self.filter_ratio = 0.274
|
||||
self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
|
||||
def _compute_init_state(self):
|
||||
init_state = np.array([0., -0.1961, 0., -1.8436, 0., 0.9704, 0.])
|
||||
|
||||
success, self.init_state = inverse_kinematics(self.env_info['robot']['robot_model'],
|
||||
self.env_info['robot']['robot_data'],
|
||||
np.array([0.65, 0., 0.1645]),
|
||||
R.from_euler('xyz', [0, 5 / 6 * np.pi, 0]).as_matrix(),
|
||||
initial_q=init_state)
|
||||
|
||||
assert success is True
|
||||
|
||||
def get_ee(self):
|
||||
"""
|
||||
Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
|
||||
on the current joint position, this function exists to avoid redundant computations.
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
|
||||
"""
|
||||
ee_pos = self._read_data("robot_1/ee_pos")
|
||||
|
||||
ee_vel = self._read_data("robot_1/ee_vel")
|
||||
|
||||
return ee_pos, ee_vel
|
||||
|
||||
def get_joints(self, obs):
|
||||
"""
|
||||
Get joint position and velocity of the robot
|
||||
"""
|
||||
q_pos = np.zeros(7)
|
||||
q_vel = np.zeros(7)
|
||||
for i in range(7):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
return q_pos, q_vel
|
||||
|
||||
def _create_observation(self, obs):
|
||||
# Filter the joint velocity
|
||||
q_pos, q_vel = self.get_joints(obs)
|
||||
q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
|
||||
self.q_pos_prev = q_pos
|
||||
self.q_vel_prev = q_vel_filter
|
||||
|
||||
for i in range(7):
|
||||
self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
|
||||
|
||||
yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
|
||||
return obs
|
||||
|
||||
def _modify_observation(self, obs):
|
||||
new_obs = obs.copy()
|
||||
puck_pos, puck_vel = self.get_puck(new_obs)
|
||||
|
||||
puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
|
||||
|
||||
puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
|
||||
|
||||
return new_obs
|
||||
|
||||
def setup(self, obs):
|
||||
for i in range(7):
|
||||
self._data.joint("iiwa_1/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
self.q_pos_prev[i] = self.init_state[i]
|
||||
self.q_vel_prev[i] = self._data.joint("iiwa_1/joint_" + str(i + 1)).qvel[0]
|
||||
|
||||
self.universal_joint_plugin.reset()
|
||||
|
||||
super().setup(obs)
|
||||
|
||||
# Update body positions, needed for _compute_universal_joint
|
||||
mujoco.mj_fwdPosition(self._model, self._data)
|
74
fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
Normal file
74
fancy_gym/envs/mujoco/air_hockey/seven_dof/hit.py
Normal file
@ -0,0 +1,74 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle
|
||||
|
||||
|
||||
class AirHockeyHit(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey hitting task.
|
||||
"""
|
||||
def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}):
|
||||
"""
|
||||
Constructor
|
||||
Args:
|
||||
opponent_agent(Agent, None): Agent which controls the opponent
|
||||
moving_init(bool, False): If true, initialize the puck with inital velocity.
|
||||
"""
|
||||
self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]]) # Table Frame
|
||||
self.init_velocity_range = (0, 0.5) # Table Frame
|
||||
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
self.moving_init = moving_init
|
||||
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||
self.env_info['mallet']['radius'] * 2
|
||||
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||
self.init_velocity_range = (0, 0.5) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
|
||||
|
||||
def setup(self, obs):
|
||||
# Initial position of the puck
|
||||
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
|
||||
if self.moving_init:
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-2, 2)
|
||||
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super(AirHockeyHit, self).setup(obs)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
# Stop if the puck bounces back on the opponents wall
|
||||
if puck_pos[0] > 0 and puck_vel[0] < 0:
|
||||
return True
|
||||
return super(AirHockeyHit, self).is_absorbing(obs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyHit(moving_init=True)
|
||||
env.reset()
|
||||
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(7)
|
||||
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
if done or steps > env.info.horizon:
|
||||
steps = 0
|
||||
env.reset()
|
111
fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
Normal file
111
fancy_gym/envs/mujoco/air_hockey/seven_dof/tournament.py
Normal file
@ -0,0 +1,111 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_double import AirHockeyDouble
|
||||
|
||||
|
||||
class AirHockeyTournament(AirHockeyDouble):
|
||||
"""
|
||||
Class for the air hockey tournament. Consists of 2 robots which should play against each other.
|
||||
When the puck is on one side for more than 15 seconds the puck is reset and the player gets a penalty.
|
||||
If a player accumulates 3 penalties his score is reduced by 1.
|
||||
"""
|
||||
def __init__(self, gamma=0.99, horizon=15000, viewer_params={}, agent_name="Agent", opponent_name="Opponent"):
|
||||
self.agent_name = agent_name
|
||||
self.opponent_name = opponent_name
|
||||
|
||||
self.score = [0, 0]
|
||||
self.faults = [0, 0]
|
||||
self.start_side = None
|
||||
|
||||
self.timer = 0
|
||||
|
||||
def custom_render_callback(viewport, context):
|
||||
names = f"Agents \nScores \nFaults "
|
||||
data = f"{self.agent_name} - {self.opponent_name}\n "
|
||||
data += f"{self.score[0]} - {self.score[1]}\n "
|
||||
data += f"{self.faults[0]} - {self.faults[1]}"
|
||||
mujoco.mjr_overlay(mujoco.mjtFont.mjFONT_BIG, mujoco.mjtGridPos.mjGRID_TOPLEFT, viewport, names, data, context)
|
||||
|
||||
viewer_params["custom_render_callback"] = custom_render_callback
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||
self.env_info['mallet']['radius'] * 2
|
||||
|
||||
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||
|
||||
def setup(self, obs):
|
||||
if self.start_side == None:
|
||||
self.start_side = np.random.choice([1, -1])
|
||||
self.prev_side = self.start_side
|
||||
|
||||
# Initial position of the puck
|
||||
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0] * self.start_side)
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
|
||||
self.prev_side = self.start_side
|
||||
self.timer = 0
|
||||
|
||||
super(AirHockeyTournament, self).setup(obs)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
# Puck stuck on one side for more than 15s
|
||||
if np.sign(puck_pos[0]) == self.prev_side:
|
||||
self.timer += self.dt
|
||||
else:
|
||||
self.prev_side *= -1
|
||||
self.timer = 0
|
||||
|
||||
if self.timer > 15.0 and np.abs(puck_pos[0]) >= 0.15:
|
||||
if self.prev_side == -1:
|
||||
self.faults[0] += 1
|
||||
self.start_side = -1
|
||||
if self.faults[0] % 3 == 0:
|
||||
self.score[1] += 1
|
||||
else:
|
||||
self.faults[1] += 1
|
||||
self.start_side = 1
|
||||
if self.faults[1] % 3 == 0:
|
||||
self.score[0] += 1
|
||||
|
||||
return True
|
||||
|
||||
# Puck in Goal
|
||||
if (np.abs(puck_pos[1]) - self.env_info['table']['goal_width'] / 2) <= 0:
|
||||
if puck_pos[0] > self.env_info['table']['length'] / 2:
|
||||
self.score[0] += 1
|
||||
self.start_side = -1
|
||||
return True
|
||||
|
||||
if puck_pos[0] < -self.env_info['table']['length'] / 2:
|
||||
self.score[1] += 1
|
||||
self.start_side = 1
|
||||
return True
|
||||
|
||||
# Puck stuck in the middle
|
||||
if np.abs(puck_pos[0]) < 0.15 and np.linalg.norm(puck_vel[0]) < 0.025:
|
||||
return True
|
||||
return super(AirHockeyTournament, self).is_absorbing(obs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyTournament()
|
||||
env.reset()
|
||||
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(14)
|
||||
steps += 1
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
if done or steps > env.info.horizon:
|
||||
steps = 0
|
||||
env.reset()
|
3
fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
Normal file
3
fancy_gym/envs/mujoco/air_hockey/three_dof/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .env_base import AirHockeyBase
|
||||
from .defend import AirHockeyDefend
|
||||
from .hit import AirHockeyHit
|
76
fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
Normal file
76
fancy_gym/envs/mujoco/air_hockey/three_dof/defend.py
Normal file
@ -0,0 +1,76 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
|
||||
|
||||
|
||||
class AirHockeyDefend(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey defending task.
|
||||
The agent should stop the puck at the line x=-0.6.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
self.init_velocity_range = (1, 3)
|
||||
|
||||
self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]]) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
def setup(self, state=None):
|
||||
# Set initial puck parameters
|
||||
puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0]
|
||||
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-0.5, 0.5)
|
||||
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-10, 10)
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super(AirHockeyDefend, self).setup(state)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, state):
|
||||
puck_pos, puck_vel = self.get_puck(state)
|
||||
# If puck is over the middle line and moving towards opponent
|
||||
if puck_pos[0] > 0 and puck_vel[0] > 0:
|
||||
return True
|
||||
if np.linalg.norm(puck_vel[:2]) < 0.1:
|
||||
return True
|
||||
return super().is_absorbing(state)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyDefend()
|
||||
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
||||
while True:
|
||||
action = np.zeros(3)
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
224
fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
Normal file
224
fancy_gym/envs/mujoco/air_hockey/three_dof/env_base.py
Normal file
@ -0,0 +1,224 @@
|
||||
import os
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.data.planar import __file__ as env_path
|
||||
from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
|
||||
from mushroom_rl.utils.spaces import Box
|
||||
|
||||
|
||||
class AirHockeyBase(MuJoCo):
|
||||
"""
|
||||
Abstract class for all AirHockey Environments.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, timestep=1 / 1000., n_intermediate_steps=20, n_substeps=1,
|
||||
n_agents=1, viewer_params={}):
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
Args:
|
||||
n_agents (int, 1): number of agent to be used in the environment (one or two)
|
||||
"""
|
||||
|
||||
self.n_agents = n_agents
|
||||
|
||||
action_spec = []
|
||||
|
||||
observation_spec = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data = [("puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL)]
|
||||
|
||||
collision_spec = [("puck", ["puck"]),
|
||||
("rim", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r", "rim_left", "rim_right"]),
|
||||
("rim_short_sides", ["rim_home_l", "rim_home_r", "rim_away_l", "rim_away_r"])]
|
||||
|
||||
if 1 <= self.n_agents <= 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "single.xml")
|
||||
|
||||
action_spec += ["planar_robot_1/joint_1", "planar_robot_1/joint_2", "planar_robot_1/joint_3"]
|
||||
|
||||
observation_spec += [("robot_1/joint_1_pos", "planar_robot_1/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_2_pos", "planar_robot_1/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_3_pos", "planar_robot_1/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_1/joint_1_vel", "planar_robot_1/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_2_vel", "planar_robot_1/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_1/joint_3_vel", "planar_robot_1/joint_3", ObservationType.JOINT_VEL)]
|
||||
|
||||
additional_data += [("robot_1/ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS),
|
||||
("robot_1/ee_vel", "planar_robot_1/body_ee", ObservationType.BODY_VEL)]
|
||||
|
||||
collision_spec += [("robot_1/ee", ["planar_robot_1/ee"])]
|
||||
|
||||
if self.n_agents == 2:
|
||||
scene = os.path.join(os.path.dirname(os.path.abspath(env_path)), "double.xml")
|
||||
|
||||
observation_spec += [("robot_1/opponent_ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS)]
|
||||
|
||||
action_spec += ["planar_robot_2/joint_1", "planar_robot_2/joint_2", "planar_robot_2/joint_3"]
|
||||
# Add puck pos/vel again to transform into second agents frame
|
||||
observation_spec += [("robot_2/puck_x_pos", "puck_x", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_y_pos", "puck_y", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_yaw_pos", "puck_yaw", ObservationType.JOINT_POS),
|
||||
("robot_2/puck_x_vel", "puck_x", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_y_vel", "puck_y", ObservationType.JOINT_VEL),
|
||||
("robot_2/puck_yaw_vel", "puck_yaw", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_1_pos", "planar_robot_2/joint_1", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_2_pos", "planar_robot_2/joint_2", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_3_pos", "planar_robot_2/joint_3", ObservationType.JOINT_POS),
|
||||
("robot_2/joint_1_vel", "planar_robot_2/joint_1", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_2_vel", "planar_robot_2/joint_2", ObservationType.JOINT_VEL),
|
||||
("robot_2/joint_3_vel", "planar_robot_2/joint_3", ObservationType.JOINT_VEL)]
|
||||
|
||||
observation_spec += [("robot_2/opponent_ee_pos", "planar_robot_1/body_ee", ObservationType.BODY_POS)]
|
||||
|
||||
additional_data += [("robot_2/ee_pos", "planar_robot_2/body_ee", ObservationType.BODY_POS),
|
||||
("robot_2/ee_vel", "planar_robot_2/body_ee", ObservationType.BODY_VEL)]
|
||||
|
||||
collision_spec += [("robot_2/ee", ["planar_robot_2/ee"])]
|
||||
else:
|
||||
raise ValueError('n_agents should be 1 or 2')
|
||||
|
||||
self.env_info = dict()
|
||||
self.env_info['table'] = {"length": 1.948, "width": 1.038, "goal_width": 0.25}
|
||||
self.env_info['puck'] = {"radius": 0.03165}
|
||||
self.env_info['mallet'] = {"radius": 0.04815}
|
||||
self.env_info['n_agents'] = self.n_agents
|
||||
self.env_info['robot'] = {
|
||||
"n_joints": 3,
|
||||
"ee_desired_height": 0.1,
|
||||
"joint_vel_limit": np.array([[-np.pi / 2, -np.pi / 2, -np.pi * 2 / 3],
|
||||
[np.pi / 2, np.pi / 2, np.pi * 2 / 3]]),
|
||||
|
||||
"joint_acc_limit": np.array([[-2 * np.pi, -2 * np.pi, -2 * 4 / 3 * np.pi],
|
||||
[2 * np.pi, 2 * np.pi, 2 * 4 / 3 * np.pi]]),
|
||||
"base_frame": [],
|
||||
"control_frequency": 50,
|
||||
}
|
||||
|
||||
self.env_info['puck_pos_ids'] = [0, 1, 2]
|
||||
self.env_info['puck_vel_ids'] = [3, 4, 5]
|
||||
self.env_info['joint_pos_ids'] = [6, 7, 8]
|
||||
self.env_info['joint_vel_ids'] = [9, 10, 11]
|
||||
if self.n_agents == 2:
|
||||
self.env_info['opponent_ee_ids'] = [13, 14, 15]
|
||||
else:
|
||||
self.env_info['opponent_ee_ids'] = []
|
||||
|
||||
max_joint_vel = ([np.inf] * 3 + list(self.env_info["robot"]["joint_vel_limit"][1, :3])) * self.n_agents
|
||||
|
||||
super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps,
|
||||
n_intermediate_steps, additional_data, collision_spec, max_joint_vel, **viewer_params)
|
||||
|
||||
# Construct the mujoco model at origin
|
||||
robot_model = mujoco.MjModel.from_xml_path(
|
||||
os.path.join(os.path.dirname(os.path.abspath(env_path)), "planar_robot_1.xml"))
|
||||
robot_model.body('planar_robot_1/base').pos = np.zeros(3)
|
||||
robot_data = mujoco.MjData(robot_model)
|
||||
|
||||
# Add env_info that requires mujoco models
|
||||
self.env_info['dt'] = self.dt
|
||||
self.env_info["robot"]["joint_pos_limit"] = np.array(
|
||||
[self._model.joint(f"planar_robot_1/joint_{i + 1}").range for i in range(3)]).T
|
||||
self.env_info["robot"]["robot_model"] = robot_model
|
||||
self.env_info["robot"]["robot_data"] = robot_data
|
||||
self.env_info["rl_info"] = self.info
|
||||
|
||||
frame_T = np.eye(4)
|
||||
temp = np.zeros((9, 1))
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_1/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("planar_robot_1/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
if self.n_agents == 2:
|
||||
mujoco.mju_quat2Mat(temp, self._model.body("planar_robot_2/base").quat)
|
||||
frame_T[:3, :3] = temp.reshape(3, 3)
|
||||
frame_T[:3, 3] = self._model.body("planar_robot_2/base").pos
|
||||
self.env_info['robot']['base_frame'].append(frame_T.copy())
|
||||
|
||||
# Ids of the joint, which are controller by the action space
|
||||
self.actuator_joint_ids = [self._model.joint(name).id for name in action_spec]
|
||||
|
||||
def _modify_mdp_info(self, mdp_info):
|
||||
obs_low = np.array([0, -1, -np.pi, -20., -20., -100,
|
||||
*np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[0]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][0]])
|
||||
obs_high = np.array([3.02, 1, np.pi, 20., 20., 100,
|
||||
*np.array([self._model.joint(f"planar_robot_1/joint_{i + 1}").range[1]
|
||||
for i in range(self.env_info['robot']['n_joints'])]),
|
||||
*self.env_info['robot']['joint_vel_limit'][1]])
|
||||
if self.n_agents == 2:
|
||||
obs_low = np.concatenate([obs_low, [1.5, -1.5, -1.5]])
|
||||
obs_high = np.concatenate([obs_high, [4.5, 1.5, 1.5]])
|
||||
mdp_info.observation_space = Box(obs_low, obs_high)
|
||||
return mdp_info
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
if np.any(np.abs(puck_pos[:2]) > boundary) or np.linalg.norm(puck_vel) > 100:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _puck_2d_in_robot_frame(puck_in, robot_frame, type='pose'):
|
||||
if type == 'pose':
|
||||
puck_w = np.eye(4)
|
||||
puck_w[:2, 3] = puck_in[:2]
|
||||
puck_w[:3, :3] = R.from_euler("xyz", [0., 0., puck_in[2]]).as_matrix()
|
||||
|
||||
puck_r = np.linalg.inv(robot_frame) @ puck_w
|
||||
puck_out = np.concatenate([puck_r[:2, 3],
|
||||
R.from_matrix(puck_r[:3, :3]).as_euler('xyz')[2:3]])
|
||||
|
||||
if type == 'vel':
|
||||
rot_mat = robot_frame[:3, :3]
|
||||
|
||||
vel_lin = np.array([*puck_in[:2], 0])
|
||||
vel_ang = np.array([0., 0., puck_in[2]])
|
||||
|
||||
vel_lin_r = rot_mat.T @ vel_lin
|
||||
vel_ang_r = rot_mat.T @ vel_ang
|
||||
|
||||
puck_out = np.concatenate([vel_lin_r[:2], vel_ang_r[2:3]])
|
||||
return puck_out
|
||||
|
||||
def get_puck(self, obs):
|
||||
"""
|
||||
Getting the puck properties from the observations
|
||||
Args:
|
||||
obs: The current observation
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, yaw], [lin_vel_x, lin_vel_y, yaw_vel])
|
||||
|
||||
"""
|
||||
puck_pos = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_pos"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")])
|
||||
puck_vel = np.concatenate([self.obs_helper.get_from_obs(obs, "puck_x_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_y_vel"),
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_vel")])
|
||||
return puck_pos, puck_vel
|
||||
|
||||
def get_ee(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_joints(self, obs):
|
||||
raise NotImplementedError
|
91
fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
Normal file
91
fancy_gym/envs/mujoco/air_hockey/three_dof/env_single.py
Normal file
@ -0,0 +1,91 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.three_dof.env_base import AirHockeyBase
|
||||
|
||||
|
||||
class AirHockeySingle(AirHockeyBase):
|
||||
"""
|
||||
Base class for single agent air hockey tasks.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, viewer_params={}):
|
||||
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
"""
|
||||
self.init_state = np.array([-1.15570723, 1.30024401, 1.44280414])
|
||||
super().__init__(gamma=gamma, horizon=horizon, n_agents=1, viewer_params=viewer_params)
|
||||
|
||||
self.filter_ratio = 0.274
|
||||
self.q_pos_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
self.q_vel_prev = np.zeros(self.env_info["robot"]["n_joints"])
|
||||
|
||||
def get_ee(self):
|
||||
"""
|
||||
Getting the ee properties from the current internal state. Can also be obtained via forward kinematics
|
||||
on the current joint position, this function exists to avoid redundant computations.
|
||||
|
||||
Returns:
|
||||
([pos_x, pos_y, pos_z], [ang_vel_x, ang_vel_y, ang_vel_z, lin_vel_x, lin_vel_y, lin_vel_z])
|
||||
"""
|
||||
ee_pos = self._read_data("robot_1/ee_pos")
|
||||
|
||||
ee_vel = self._read_data("robot_1/ee_vel")
|
||||
|
||||
return ee_pos, ee_vel
|
||||
|
||||
def get_joints(self, obs):
|
||||
"""
|
||||
Get joint position and velocity of the robot
|
||||
"""
|
||||
q_pos = np.zeros(3)
|
||||
q_vel = np.zeros(3)
|
||||
for i in range(3):
|
||||
q_pos[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_pos")[0]
|
||||
q_vel[i] = self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[0]
|
||||
|
||||
return q_pos, q_vel
|
||||
|
||||
def _modify_observation(self, obs):
|
||||
new_obs = obs.copy()
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
|
||||
puck_pos = self._puck_2d_in_robot_frame(puck_pos, self.env_info['robot']['base_frame'][0])
|
||||
|
||||
puck_vel = self._puck_2d_in_robot_frame(puck_vel, self.env_info['robot']['base_frame'][0], type='vel')
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_pos")[:] = puck_pos[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_pos")[:] = puck_pos[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_pos")[:] = puck_pos[2]
|
||||
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_x_vel")[:] = puck_vel[0]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_y_vel")[:] = puck_vel[1]
|
||||
self.obs_helper.get_from_obs(new_obs, "puck_yaw_vel")[:] = puck_vel[2]
|
||||
|
||||
return new_obs
|
||||
|
||||
def setup(self, state=None):
|
||||
for i in range(3):
|
||||
self._data.joint("planar_robot_1/joint_" + str(i + 1)).qpos = self.init_state[i]
|
||||
self.q_pos_prev[i] = self.init_state[i]
|
||||
self.q_vel_prev[i] = self._data.joint("planar_robot_1/joint_" + str(i + 1)).qvel[0]
|
||||
|
||||
mujoco.mj_fwdPosition(self._model, self._data)
|
||||
super().setup(state)
|
||||
|
||||
def _create_observation(self, obs):
|
||||
# Filter the joint velocity
|
||||
q_pos, q_vel = self.get_joints(obs)
|
||||
q_vel_filter = self.filter_ratio * q_vel + (1 - self.filter_ratio) * self.q_vel_prev
|
||||
self.q_pos_prev = q_pos
|
||||
self.q_vel_prev = q_vel_filter
|
||||
|
||||
for i in range(3):
|
||||
self.obs_helper.get_from_obs(obs, "robot_1/joint_" + str(i + 1) + "_vel")[:] = q_vel_filter[i]
|
||||
|
||||
yaw_angle = self.obs_helper.get_from_obs(obs, "puck_yaw_pos")
|
||||
self.obs_helper.get_from_obs(obs, "puck_yaw_pos")[:] = (yaw_angle + np.pi) % (2 * np.pi) - np.pi
|
||||
return obs
|
84
fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
Normal file
84
fancy_gym/envs/mujoco/air_hockey/three_dof/hit.py
Normal file
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.three_dof.env_single import AirHockeySingle
|
||||
|
||||
class AirHockeyHit(AirHockeySingle):
|
||||
"""
|
||||
Class for the air hockey hitting task.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma=0.99, horizon=500, moving_init=False, viewer_params={}):
|
||||
"""
|
||||
Constructor
|
||||
Args:
|
||||
moving_init(bool, False): If true, initialize the puck with inital velocity.
|
||||
"""
|
||||
super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params)
|
||||
|
||||
self.moving_init = moving_init
|
||||
hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \
|
||||
self.env_info['mallet']['radius'] * 2
|
||||
self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]]) # Table Frame
|
||||
self.init_velocity_range = (0, 0.5) # Table Frame
|
||||
self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]]) # Robot Frame
|
||||
|
||||
def setup(self, state=None):
|
||||
# Initial position of the puck
|
||||
puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0]
|
||||
|
||||
# self.init_state = np.array([-0.9273, 0.9273, np.pi / 2])
|
||||
|
||||
self._write_data("puck_x_pos", puck_pos[0])
|
||||
self._write_data("puck_y_pos", puck_pos[1])
|
||||
|
||||
if self.moving_init:
|
||||
lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1])
|
||||
angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1)
|
||||
puck_vel = np.zeros(3)
|
||||
puck_vel[0] = -np.cos(angle) * lin_vel
|
||||
puck_vel[1] = np.sin(angle) * lin_vel
|
||||
puck_vel[2] = np.random.uniform(-2, 2, 1)
|
||||
|
||||
self._write_data("puck_x_vel", puck_vel[0])
|
||||
self._write_data("puck_y_vel", puck_vel[1])
|
||||
self._write_data("puck_yaw_vel", puck_vel[2])
|
||||
|
||||
super(AirHockeyHit, self).setup(state)
|
||||
|
||||
def reward(self, state, action, next_state, absorbing):
|
||||
return 0
|
||||
|
||||
def is_absorbing(self, obs):
|
||||
puck_pos, puck_vel = self.get_puck(obs)
|
||||
# Stop if the puck bounces back on the opponents wall
|
||||
if puck_pos[0] > 0 and puck_vel[0] < 0:
|
||||
return True
|
||||
return super(AirHockeyHit, self).is_absorbing(obs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
env = AirHockeyHit(moving_init=False)
|
||||
|
||||
env.reset()
|
||||
env.render()
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
while True:
|
||||
action = np.zeros(3)
|
||||
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
|
||||
gamma *= env.info.gamma
|
||||
J += gamma * reward
|
||||
R += reward
|
||||
steps += 1
|
||||
if done or steps > env.info.horizon:
|
||||
print("J: ", J, " R: ", R)
|
||||
R = 0.
|
||||
J = 0.
|
||||
gamma = 1.
|
||||
steps = 0
|
||||
env.reset()
|
2
fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
Normal file
2
fancy_gym/envs/mujoco/air_hockey/utils/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .kinematics import inverse_kinematics, forward_kinematics, jacobian
|
||||
from .transformations import robot_to_world, world_to_robot
|
248
fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
Normal file
248
fancy_gym/envs/mujoco/air_hockey/utils/kinematics.py
Normal file
@ -0,0 +1,248 @@
|
||||
import mujoco
|
||||
import numpy as np
|
||||
|
||||
|
||||
def forward_kinematics(mj_model, mj_data, q, link="ee"):
|
||||
"""
|
||||
Compute the forward kinematics of the robots.
|
||||
|
||||
IMPORTANT:
|
||||
For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
|
||||
parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
|
||||
where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
|
||||
case this function will return the wrong values.
|
||||
|
||||
Coordinate System:
|
||||
All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
|
||||
base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
|
||||
coordinate system
|
||||
|
||||
Args:
|
||||
mj_model (mujoco.MjModel):
|
||||
mujoco MjModel of the robot-only model
|
||||
mj_data (mujoco.MjData):
|
||||
mujoco MjData object generated from the model
|
||||
q (np.array):
|
||||
joint configuration for which the forward kinematics are computed
|
||||
link (string, "ee"):
|
||||
Link for which the forward kinematics is calculated. When using the iiwas the choices are
|
||||
["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
|
||||
|
||||
Returns
|
||||
-------
|
||||
position: numpy.ndarray, (3,)
|
||||
Position of the link in robot's base frame
|
||||
orientation: numpy.ndarray, (3, 3)
|
||||
Orientation of the link in robot's base frame
|
||||
"""
|
||||
|
||||
return _mujoco_fk(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
|
||||
|
||||
|
||||
def inverse_kinematics(mj_model, mj_data, desired_position, desired_rotation=None, initial_q=None, link="ee"):
|
||||
"""
|
||||
Compute the inverse kinematics of the robots.
|
||||
|
||||
IMPORTANT:
|
||||
For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
|
||||
parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
|
||||
where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is
|
||||
the case this function will return the wrong values.
|
||||
|
||||
Coordinate System:
|
||||
All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
|
||||
base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
|
||||
coordinate system
|
||||
|
||||
Args:
|
||||
mj_model (mujoco.MjModel):
|
||||
mujoco MjModel of the robot-only model
|
||||
mj_data (mujoco.MjData):
|
||||
mujoco MjData object generated from the model
|
||||
desired_position (numpy.ndarray, (3,)):
|
||||
The desired position of the selected link.
|
||||
desired_rotation (optional, numpy.array, (3,3)):
|
||||
The desired rotation of the selected link.
|
||||
initial_q (numpy.ndarray, None):
|
||||
The initial configuration of the algorithm, if set to None it will take the initial configuration of the
|
||||
mj_data.
|
||||
link (str, "ee"):
|
||||
Link for which the inverse kinematics is calculated. When using the iiwas the choices are
|
||||
["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
|
||||
"""
|
||||
q_init = np.zeros(mj_model.nq)
|
||||
if initial_q is None:
|
||||
q_init = mj_data.qpos
|
||||
else:
|
||||
q_init[:initial_q.size] = initial_q
|
||||
|
||||
q_l = mj_model.jnt_range[:, 0]
|
||||
q_h = mj_model.jnt_range[:, 1]
|
||||
lower_limit = (q_l + q_h) / 2 - 0.95 * (q_h - q_l) / 2
|
||||
upper_limit = (q_l + q_h) / 2 + 0.95 * (q_h - q_l) / 2
|
||||
|
||||
desired_quat = None
|
||||
if desired_rotation is not None:
|
||||
desired_quat = np.zeros(4)
|
||||
mujoco.mju_mat2Quat(desired_quat, desired_rotation.reshape(-1, 1))
|
||||
|
||||
return _mujoco_clik(desired_position, desired_quat, q_init, link_to_xml_name(mj_model, link), mj_model,
|
||||
mj_data, lower_limit, upper_limit)
|
||||
|
||||
|
||||
def jacobian(mj_model, mj_data, q, link="ee"):
|
||||
"""
|
||||
Compute the Jacobian of the robots.
|
||||
|
||||
IMPORTANT:
|
||||
For the iiwa we assume that the universal joint at the end of the end-effector always leaves the mallet
|
||||
parallel to the table and facing down. This assumption only makes sense for a subset of robot configurations
|
||||
where the mallet can be parallel to the table without colliding with the rod it is mounted on. If this is the
|
||||
case this function will return the wrong values.
|
||||
|
||||
Coordinate System:
|
||||
All translations and rotations are in the coordinate frame of the Robot. The zero point is in the center of the
|
||||
base of the Robot. The x-axis points forward, the z-axis points up and the y-axis forms a right-handed
|
||||
coordinate system
|
||||
|
||||
Args:
|
||||
mj_model (mujoco.MjModel):
|
||||
mujoco MjModel of the robot-only model
|
||||
mj_data (mujoco.MjData):
|
||||
mujoco MjData object generated from the model
|
||||
q (numpy.ndarray):
|
||||
joint configuration for which the forward kinematics are computed
|
||||
link (string, "ee"):
|
||||
Link for which the forward kinematics is calculated. When using the iiwas the choices are
|
||||
["1", "2", "3", "4", "5", "6", "7", "ee"]. When using planar the choices are ["1", "2", "3", "ee"]
|
||||
|
||||
Returns
|
||||
-------
|
||||
numpy.ndarray, (6, num_joints):
|
||||
The Jacobian matrix for the robot kinematics.
|
||||
"""
|
||||
return _mujoco_jac(q, link_to_xml_name(mj_model, link), mj_model, mj_data)
|
||||
|
||||
|
||||
def link_to_xml_name(mj_model, link):
|
||||
try:
|
||||
mj_model.body('iiwa_1/base')
|
||||
link_to_frame_idx = {
|
||||
"1": "iiwa_1/link_1",
|
||||
"2": "iiwa_1/link_2",
|
||||
"3": "iiwa_1/link_3",
|
||||
"4": "iiwa_1/link_4",
|
||||
"5": "iiwa_1/link_5",
|
||||
"6": "iiwa_1/link_6",
|
||||
"7": "iiwa_1/link_7",
|
||||
"ee": "iiwa_1/striker_joint_link",
|
||||
}
|
||||
except:
|
||||
link_to_frame_idx = {
|
||||
"1": "planar_robot_1/body_1",
|
||||
"2": "planar_robot_1/body_2",
|
||||
"3": "planar_robot_1/body_3",
|
||||
"ee": "planar_robot_1/body_ee",
|
||||
}
|
||||
return link_to_frame_idx[link]
|
||||
|
||||
|
||||
def _mujoco_fk(q, name, model, data):
|
||||
data.qpos[:len(q)] = q
|
||||
mujoco.mj_fwdPosition(model, data)
|
||||
return data.body(name).xpos.copy(), data.body(name).xmat.reshape(3, 3).copy()
|
||||
|
||||
|
||||
def _mujoco_jac(q, name, model, data):
|
||||
data.qpos[:len(q)] = q
|
||||
dtype = data.qpos.dtype
|
||||
jac = np.empty((6, model.nv), dtype=dtype)
|
||||
jac_pos, jac_rot = jac[:3], jac[3:]
|
||||
mujoco.mj_fwdPosition(model, data)
|
||||
mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
|
||||
return jac
|
||||
|
||||
|
||||
def _mujoco_clik(desired_pos, desired_quat, initial_q, name, model, data, lower_limit, upper_limit):
|
||||
IT_MAX = 1000
|
||||
eps = 1e-4
|
||||
damp = 1e-3
|
||||
progress_thresh = 20.0
|
||||
max_update_norm = 0.1
|
||||
rot_weight = 1
|
||||
i = 0
|
||||
|
||||
dtype = data.qpos.dtype
|
||||
|
||||
data.qpos = initial_q
|
||||
|
||||
neg_x_quat = np.empty(4, dtype=dtype)
|
||||
error_x_quat = np.empty(4, dtype=dtype)
|
||||
|
||||
if desired_pos is not None and desired_quat is not None:
|
||||
jac = np.empty((6, model.nv), dtype=dtype)
|
||||
err = np.empty(6, dtype=dtype)
|
||||
jac_pos, jac_rot = jac[:3], jac[3:]
|
||||
err_pos, err_rot = err[:3], err[3:]
|
||||
else:
|
||||
jac = np.empty((3, model.nv), dtype=dtype)
|
||||
err = np.empty(3, dtype=dtype)
|
||||
if desired_pos is not None:
|
||||
jac_pos, jac_rot = jac, None
|
||||
err_pos, err_rot = err, None
|
||||
elif desired_quat is not None:
|
||||
jac_pos, jac_rot = None, jac
|
||||
err_pos, err_rot = None, err
|
||||
else:
|
||||
raise ValueError("Desired Position and desired rotation is None, cannot compute inverse kinematics")
|
||||
|
||||
while True:
|
||||
# forward kinematics
|
||||
mujoco.mj_fwdPosition(model, data)
|
||||
|
||||
x_pos = data.body(name).xpos
|
||||
x_quat = data.body(name).xquat
|
||||
|
||||
error_norm = 0
|
||||
if desired_pos is not None:
|
||||
err_pos[:] = desired_pos - x_pos
|
||||
error_norm += np.linalg.norm(err_pos)
|
||||
|
||||
if desired_quat is not None:
|
||||
mujoco.mju_negQuat(neg_x_quat, x_quat)
|
||||
mujoco.mju_mulQuat(error_x_quat, desired_quat, neg_x_quat)
|
||||
mujoco.mju_quat2Vel(err_rot, error_x_quat, 1)
|
||||
error_norm += np.linalg.norm(err_rot) * rot_weight
|
||||
|
||||
if error_norm < eps:
|
||||
success = True
|
||||
break
|
||||
if i >= IT_MAX:
|
||||
success = False
|
||||
break
|
||||
|
||||
mujoco.mj_jacBody(model, data, jac_pos, jac_rot, model.body(name).id)
|
||||
|
||||
hess_approx = jac.T.dot(jac)
|
||||
joint_delta = jac.T.dot(err)
|
||||
|
||||
hess_approx += np.eye(hess_approx.shape[0]) * damp
|
||||
update_joints = np.linalg.solve(hess_approx, joint_delta)
|
||||
|
||||
update_norm = np.linalg.norm(update_joints)
|
||||
|
||||
# Check whether we are still making enough progress, and halt if not.
|
||||
progress_criterion = error_norm / update_norm
|
||||
if progress_criterion > progress_thresh:
|
||||
success = False
|
||||
break
|
||||
|
||||
if update_norm > max_update_norm:
|
||||
update_joints *= max_update_norm / update_norm
|
||||
|
||||
mujoco.mj_integratePos(model, data.qpos, update_joints, 1)
|
||||
data.qpos = np.clip(data.qpos, lower_limit, upper_limit)
|
||||
i += 1
|
||||
q_cur = data.qpos.copy()
|
||||
|
||||
return success, q_cur
|
65
fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
Normal file
65
fancy_gym/envs/mujoco/air_hockey/utils/transformations.py
Normal file
@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def robot_to_world(base_frame, translation, rotation=None):
|
||||
"""
|
||||
Transform position or rotation optional from the robot base frame to the world frame
|
||||
|
||||
Args
|
||||
----
|
||||
base_frame: numpy.ndarray, (4,4)
|
||||
The transformation matrix from the world to robot base frame
|
||||
translation: ndarray, (3,)
|
||||
The 3D position to be transformed
|
||||
rotation: optional, ndarray, (3, 3)
|
||||
The rotation in the matrix form to be transformed
|
||||
|
||||
Returns
|
||||
-------
|
||||
position: ndarray (3,)
|
||||
The transformed 3D position
|
||||
rotation: ndarray (3, 3)
|
||||
The transformed rotation in the matrix form
|
||||
|
||||
"""
|
||||
|
||||
target = np.eye(4)
|
||||
target[:len(translation), 3] = translation
|
||||
if rotation is not None:
|
||||
target[:3, :3] = rotation
|
||||
|
||||
target_frame = base_frame @ target
|
||||
|
||||
return target_frame[:len(translation), 3], target_frame[:3, :3]
|
||||
|
||||
|
||||
def world_to_robot(base_frame, translation, rotation=None):
|
||||
"""
|
||||
Transfrom position and rotation (optional) from the world frame to the robot's base frame
|
||||
|
||||
Args
|
||||
----
|
||||
base_frame: ndarray, (4,4)
|
||||
The transformation matrix from the world to robot base frame
|
||||
translation: ndarray, (3,)
|
||||
The 3D position to be transformed
|
||||
rotation: optional, ndarray, (3, 3)
|
||||
The rotation in the matrix form to be tranformed
|
||||
|
||||
Returns
|
||||
-------
|
||||
position: ndarray, (3,)
|
||||
The transformed 3D position
|
||||
rotation: ndarray, (3, 3)
|
||||
The transformed rotation in the matrix form
|
||||
|
||||
"""
|
||||
|
||||
target = np.eye(4)
|
||||
target[:len(translation), 3] = translation
|
||||
if rotation is not None:
|
||||
target[:3, :3] = rotation
|
||||
|
||||
target_frame = np.linalg.inv(base_frame) @ target
|
||||
|
||||
return target_frame[:len(translation), 3], target_frame[:3, :3]
|
120
fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
Normal file
120
fancy_gym/envs/mujoco/air_hockey/utils/universal_joint_plugin.py
Normal file
@ -0,0 +1,120 @@
|
||||
import numpy as np
|
||||
|
||||
from fancy_gym.envs.mujoco.air_hockey.utils.kinematics import forward_kinematics
|
||||
|
||||
|
||||
class UniversalJointPlugin:
|
||||
def __init__(self, env_model, env_data, env_info):
|
||||
self.env_info = env_info
|
||||
self.env_model = env_model
|
||||
self.env_data = env_data
|
||||
self.Kp = 20
|
||||
self.Kd = 0.31
|
||||
|
||||
self.universal_joint_ids = []
|
||||
self.universal_joint_ctrl_ids = []
|
||||
self.universal_joint_ids += [env_model.joint("iiwa_1/striker_joint_1").id,
|
||||
env_model.joint("iiwa_1/striker_joint_2").id]
|
||||
self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_1/striker_joint_1").id,
|
||||
env_model.actuator("iiwa_1/striker_joint_2").id]
|
||||
action_spec = ["iiwa_1/joint_1", "iiwa_1/joint_2", "iiwa_1/joint_3", "iiwa_1/joint_4", "iiwa_1/joint_5",
|
||||
"iiwa_1/joint_6", "iiwa_1/joint_7"]
|
||||
|
||||
if self.env_info['n_agents'] >= 2:
|
||||
self.universal_joint_ids += [env_model.joint("iiwa_2/striker_joint_1").id,
|
||||
env_model.joint("iiwa_2/striker_joint_2").id]
|
||||
self.universal_joint_ctrl_ids += [env_model.actuator("iiwa_2/striker_joint_1").id,
|
||||
env_model.actuator("iiwa_2/striker_joint_2").id]
|
||||
action_spec += ["iiwa_2/joint_1", "iiwa_2/joint_2", "iiwa_2/joint_3", "iiwa_2/joint_4", "iiwa_2/joint_5",
|
||||
"iiwa_2/joint_6", "iiwa_2/joint_7"]
|
||||
|
||||
self.actuator_joint_ids = [self.env_model.joint(name).id for name in action_spec]
|
||||
|
||||
self.filter_ratio = 0.273
|
||||
self.u_joint_pos_des = np.zeros(2 * self.env_info['n_agents'])
|
||||
self.u_joint_pos_prev = None
|
||||
self.u_joint_vel_prev = np.zeros(2 * self.env_info['n_agents'])
|
||||
|
||||
def reset(self):
|
||||
self.u_joint_pos_prev = None
|
||||
self._control_universal_joint()
|
||||
for i in range(self.env_info['n_agents']):
|
||||
self.u_joint_vel_prev = self.env_data.qvel[self.universal_joint_ids]
|
||||
|
||||
self.env_data.qpos[self.universal_joint_ctrl_ids] = self.u_joint_pos_des
|
||||
|
||||
def update(self):
|
||||
self._control_universal_joint()
|
||||
|
||||
def _control_universal_joint(self):
|
||||
self._compute_universal_joint()
|
||||
self.u_joint_pos_prev = self.env_data.qpos[self.universal_joint_ids]
|
||||
self.u_joint_vel_prev = self.filter_ratio * self.env_data.qvel[self.universal_joint_ids] + \
|
||||
(1 - self.filter_ratio) * self.u_joint_vel_prev
|
||||
|
||||
Kp = 4
|
||||
Kd = 0.31
|
||||
torque = Kp * (self.u_joint_pos_des - self.u_joint_pos_prev) - Kd * self.u_joint_vel_prev
|
||||
self.env_data.ctrl[self.universal_joint_ctrl_ids] = torque
|
||||
|
||||
def _compute_universal_joint(self):
|
||||
for i in range(self.env_info['n_agents']):
|
||||
q = self.env_data.qpos[self.actuator_joint_ids[i * 7: (i + 1) * 7]]
|
||||
# Have to exclude the puck joints
|
||||
pos, rot_mat = forward_kinematics(self.env_info['robot']['robot_model'],
|
||||
self.env_info['robot']['robot_data'], q)
|
||||
|
||||
v_x = rot_mat[:, 0]
|
||||
v_y = rot_mat[:, 1]
|
||||
|
||||
# The desired position of the x-axis is the cross product of the desired z (0, 0, 1).T
|
||||
# and the current y-axis. (0, 0, 1).T x v_y
|
||||
x_desired = np.array([-v_y[1], v_y[0], 0])
|
||||
|
||||
# Find the signed angle from the current to the desired x-axis around the y-axis
|
||||
# https://stackoverflow.com/questions/5188561/signed-angle-between-two-3d-vectors-with-same-origin-within-the-same-plane
|
||||
q1 = np.arctan2(self._cross_3d(v_x, x_desired) @ v_y, v_x @ x_desired)
|
||||
if self.u_joint_pos_prev is not None:
|
||||
if q1 - self.u_joint_pos_prev[0] > np.pi:
|
||||
q1 -= np.pi * 2
|
||||
elif q1 - self.u_joint_pos_prev[0] < -np.pi:
|
||||
q1 += np.pi * 2
|
||||
|
||||
# Rotate the X axis by the calculated amount
|
||||
w = np.array([[0, -v_y[2], v_y[1]],
|
||||
[v_y[2], 0, -v_y[0]],
|
||||
[-v_y[1], v_y[0], 0]])
|
||||
|
||||
r = np.eye(3) + w * np.sin(q1) + w ** 2 * (1 - np.cos(q1))
|
||||
v_x_rotated = r @ v_x
|
||||
|
||||
# The desired position of the y-axis is the negative cross product of the desired z (0, 0, 1).T and the current
|
||||
# x-axis, which is already rotated around y. The negative is there because the x-axis is flipped.
|
||||
# -((0, 0, 1).T x v_x))
|
||||
y_desired = np.array([v_x_rotated[1], - v_x_rotated[0], 0])
|
||||
|
||||
# Find the signed angle from the current to the desired y-axis around the new rotated x-axis
|
||||
q2 = np.arctan2(self._cross_3d(v_y, y_desired) @ v_x_rotated, v_y @ y_desired)
|
||||
|
||||
if self.u_joint_pos_prev is not None:
|
||||
if q2 - self.u_joint_pos_prev[1] > np.pi:
|
||||
q2 -= np.pi * 2
|
||||
elif q2 - self.u_joint_pos_prev[1] < -np.pi:
|
||||
q2 += np.pi * 2
|
||||
|
||||
alpha_y = np.minimum(np.maximum(q1, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
|
||||
alpha_x = np.minimum(np.maximum(q2, -np.pi / 2 * 0.95), np.pi / 2 * 0.95)
|
||||
|
||||
if self.u_joint_pos_prev is None:
|
||||
self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
|
||||
else:
|
||||
self.u_joint_pos_des[i * 2: i * 2 + 2] += np.minimum(np.maximum(
|
||||
10 * (np.array([alpha_y, alpha_x]) - self.u_joint_pos_des[i * 2: i * 2 + 2]),
|
||||
-np.pi * 0.01), np.pi * 0.01)
|
||||
|
||||
self.u_joint_pos_des[i * 2: i * 2 + 2] = np.array([alpha_y, alpha_x])
|
||||
|
||||
return self.u_joint_pos_des
|
||||
|
||||
def _cross_3d(self, a, b):
|
||||
return np.array([a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]])
|
@ -101,6 +101,7 @@ class AntJumpEnv(AntEnvCustomXML):
|
||||
contact_force_range=(-1.0, 1.0),
|
||||
reset_noise_scale=0.1,
|
||||
exclude_current_positions_from_observation=True,
|
||||
**kwargs
|
||||
):
|
||||
self.current_step = 0
|
||||
self.max_height = 0
|
||||
@ -113,7 +114,7 @@ class AntJumpEnv(AntEnvCustomXML):
|
||||
healthy_z_range=healthy_z_range,
|
||||
contact_force_range=contact_force_range,
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation, **kwargs)
|
||||
|
||||
def step(self, action):
|
||||
self.current_step += 1
|
||||
|
@ -36,7 +36,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
|
||||
"render_fps": 50
|
||||
}
|
||||
|
||||
def __init__(self, frame_skip: int = 10, random_init: bool = False):
|
||||
def __init__(self, frame_skip: int = 10, random_init: bool = False, **kwargs):
|
||||
utils.EzPickle.__init__(**locals())
|
||||
self._steps = 0
|
||||
self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.])
|
||||
@ -58,7 +58,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
|
||||
MujocoEnv.__init__(self,
|
||||
model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"),
|
||||
frame_skip=self.frame_skip,
|
||||
observation_space=self.observation_space)
|
||||
observation_space=self.observation_space, **kwargs)
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
|
||||
|
||||
def step(self, action):
|
||||
@ -305,8 +305,8 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
|
||||
|
||||
|
||||
class BoxPushingDense(BoxPushingEnvBase):
|
||||
def __init__(self, frame_skip: int = 10, random_init: bool = False):
|
||||
super(BoxPushingDense, self).__init__(frame_skip=frame_skip, random_init=random_init)
|
||||
def __init__(self, **kwargs):
|
||||
super(BoxPushingDense, self).__init__(**kwargs)
|
||||
def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat,
|
||||
rod_tip_pos, rod_quat, qpos, qvel, action):
|
||||
joint_penalty = self._joint_limit_violate_penalty(qpos,
|
||||
@ -329,8 +329,8 @@ class BoxPushingDense(BoxPushingEnvBase):
|
||||
|
||||
|
||||
class BoxPushingTemporalSparse(BoxPushingEnvBase):
|
||||
def __init__(self, frame_skip: int = 10, random_init: bool = False):
|
||||
super(BoxPushingTemporalSparse, self).__init__(frame_skip=frame_skip, random_init=random_init)
|
||||
def __init__(self, **kwargs):
|
||||
super(BoxPushingTemporalSparse, self).__init__(**kwargs)
|
||||
|
||||
def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat,
|
||||
rod_tip_pos, rod_quat, qpos, qvel, action):
|
||||
@ -361,8 +361,8 @@ class BoxPushingTemporalSparse(BoxPushingEnvBase):
|
||||
|
||||
class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase):
|
||||
|
||||
def __init__(self, frame_skip: int = 10, random_init: bool = False):
|
||||
super(BoxPushingTemporalSpatialSparse, self).__init__(frame_skip=frame_skip, random_init=random_init)
|
||||
def __init__(self, **kwargs):
|
||||
super(BoxPushingTemporalSpatialSparse, self).__init__(**kwargs)
|
||||
|
||||
def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat,
|
||||
rod_tip_pos, rod_quat, qpos, qvel, action):
|
||||
@ -392,8 +392,8 @@ class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase):
|
||||
|
||||
class BoxPushingTemporalSpatialSparse2(BoxPushingEnvBase):
|
||||
|
||||
def __init__(self, frame_skip: int = 10, random_init: bool = False):
|
||||
super(BoxPushingTemporalSpatialSparse2, self).__init__(frame_skip=frame_skip, random_init=random_init)
|
||||
def __init__(self, **kwargs):
|
||||
super(BoxPushingTemporalSpatialSparse2, self).__init__(**kwargs)
|
||||
|
||||
def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat,
|
||||
rod_tip_pos, rod_quat, qpos, qvel, action):
|
||||
@ -428,8 +428,8 @@ class BoxPushingTemporalSpatialSparse2(BoxPushingEnvBase):
|
||||
|
||||
|
||||
class BoxPushingNoConstraintSparse(BoxPushingEnvBase):
|
||||
def __init__(self, frame_skip: int = 10, random_init: bool = False):
|
||||
super(BoxPushingNoConstraintSparse, self).__init__(frame_skip=frame_skip, random_init=random_init)
|
||||
def __init__(self, **kwargs):
|
||||
super(BoxPushingNoConstraintSparse, self).__init__(**kwargs)
|
||||
|
||||
def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat,
|
||||
rod_tip_pos, rod_quat, qpos, qvel, action):
|
||||
|
@ -74,7 +74,8 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
|
||||
reset_noise_scale=0.1,
|
||||
context=True,
|
||||
exclude_current_positions_from_observation=True,
|
||||
max_episode_steps=100):
|
||||
max_episode_steps=100,
|
||||
**kwargs):
|
||||
self.current_step = 0
|
||||
self.max_height = 0
|
||||
# self.max_episode_steps = max_episode_steps
|
||||
@ -85,7 +86,8 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
|
||||
forward_reward_weight=forward_reward_weight,
|
||||
ctrl_cost_weight=ctrl_cost_weight,
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
|
||||
def step(self, action):
|
||||
|
||||
|
@ -115,6 +115,7 @@ class HopperJumpEnv(HopperEnvCustomXML):
|
||||
reset_noise_scale=5e-3,
|
||||
exclude_current_positions_from_observation=False,
|
||||
sparse=False,
|
||||
**kwargs
|
||||
):
|
||||
|
||||
self.sparse = sparse
|
||||
@ -141,7 +142,8 @@ class HopperJumpEnv(HopperEnvCustomXML):
|
||||
healthy_z_range=healthy_z_range,
|
||||
healthy_angle_range=healthy_angle_range,
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
|
||||
# increase initial height
|
||||
self.init_qpos[1] = 1.5
|
||||
|
@ -29,7 +29,8 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML):
|
||||
reset_noise_scale=5e-3,
|
||||
context=True,
|
||||
exclude_current_positions_from_observation=True,
|
||||
max_episode_steps=250):
|
||||
max_episode_steps=250,
|
||||
**kwargs):
|
||||
self.current_step = 0
|
||||
self.max_height = 0
|
||||
self.max_episode_steps = max_episode_steps
|
||||
@ -50,7 +51,8 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML):
|
||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
||||
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
||||
exclude_current_positions_from_observation)
|
||||
exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
|
||||
def step(self, action):
|
||||
|
||||
|
@ -32,7 +32,8 @@ class HopperThrowEnv(HopperEnvCustomXML):
|
||||
reset_noise_scale=5e-3,
|
||||
context=True,
|
||||
exclude_current_positions_from_observation=True,
|
||||
max_episode_steps=250):
|
||||
max_episode_steps=250,
|
||||
**kwargs):
|
||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||
self.current_step = 0
|
||||
self.max_episode_steps = max_episode_steps
|
||||
@ -57,7 +58,8 @@ class HopperThrowEnv(HopperEnvCustomXML):
|
||||
healthy_z_range=healthy_z_range,
|
||||
healthy_state_range=healthy_angle_range,
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
|
||||
def step(self, action):
|
||||
self.current_step += 1
|
||||
|
@ -36,7 +36,8 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
|
||||
context=True,
|
||||
penalty=0.0,
|
||||
exclude_current_positions_from_observation=True,
|
||||
max_episode_steps=250):
|
||||
max_episode_steps=250,
|
||||
**kwargs):
|
||||
self.hit_basket_reward = hit_basket_reward
|
||||
self.current_step = 0
|
||||
self.max_episode_steps = max_episode_steps
|
||||
@ -65,7 +66,8 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
|
||||
healthy_z_range=healthy_z_range,
|
||||
healthy_angle_range=healthy_angle_range,
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
|
||||
def step(self, action):
|
||||
|
||||
|
@ -34,7 +34,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
|
||||
goal_switching_step: int = None,
|
||||
enable_artificial_wind: bool = False):
|
||||
enable_artificial_wind: bool = False, **kwargs):
|
||||
utils.EzPickle.__init__(**locals())
|
||||
self._steps = 0
|
||||
|
||||
@ -68,7 +68,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
MujocoEnv.__init__(self,
|
||||
model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"),
|
||||
frame_skip=frame_skip,
|
||||
observation_space=self.observation_space)
|
||||
observation_space=self.observation_space,
|
||||
**kwargs)
|
||||
|
||||
if ctxt_dim == 2:
|
||||
self.context_bounds = CONTEXT_BOUNDS_2DIMS
|
||||
@ -275,11 +276,11 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
|
||||
class TableTennisWind(TableTennisEnv):
|
||||
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4):
|
||||
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, **kwargs):
|
||||
self.observation_space = spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(22,), dtype=np.float64
|
||||
)
|
||||
super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, enable_artificial_wind=True)
|
||||
super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, enable_artificial_wind=True, **kwargs)
|
||||
|
||||
def _get_obs(self):
|
||||
obs = np.concatenate([
|
||||
@ -297,5 +298,5 @@ class TableTennisWind(TableTennisEnv):
|
||||
|
||||
|
||||
class TableTennisGoalSwitching(TableTennisEnv):
|
||||
def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99):
|
||||
super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step)
|
||||
def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99, **kwargs):
|
||||
super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step, **kwargs)
|
||||
|
@ -97,7 +97,8 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML):
|
||||
reset_noise_scale=5e-3,
|
||||
penalty=0,
|
||||
exclude_current_positions_from_observation=True,
|
||||
max_episode_steps=300):
|
||||
max_episode_steps=300,
|
||||
**kwargs):
|
||||
self.current_step = 0
|
||||
self.max_episode_steps = max_episode_steps
|
||||
self.max_height = 0
|
||||
@ -112,7 +113,8 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML):
|
||||
healthy_z_range=healthy_z_range,
|
||||
healthy_angle_range=healthy_angle_range,
|
||||
reset_noise_scale=reset_noise_scale,
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation)
|
||||
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
|
||||
**kwargs)
|
||||
|
||||
def step(self, action):
|
||||
self.current_step += 1
|
||||
|
@ -16,7 +16,7 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
||||
"""
|
||||
# Equivalent to gym, we have a make function which can be used to create environments.
|
||||
# It takes care of seeding and enables the use of a variety of external environments using the gym interface.
|
||||
env = gym.make(env_name)
|
||||
env = gym.make(env_name, render_mode='human' if render else None)
|
||||
|
||||
returns = 0
|
||||
# env.render(mode=None)
|
||||
@ -26,14 +26,6 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
||||
for i in range(iterations):
|
||||
|
||||
if render and i % 1 == 0:
|
||||
# This renders the full MP trajectory
|
||||
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
||||
# Resetting to no rendering, can be achieved by render(mode=None).
|
||||
# It is also possible to change the mode multiple times when
|
||||
# e.g. only every second trajectory should be displayed, such as here
|
||||
# Just make sure the correct mode is set before executing the step.
|
||||
env.render(mode="human")
|
||||
else:
|
||||
env.render()
|
||||
|
||||
# Now the action space is not the raw action but the parametrization of the trajectory generator,
|
||||
@ -49,14 +41,19 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
||||
if terminated or truncated:
|
||||
print(reward)
|
||||
obs = env.reset()
|
||||
env.close()
|
||||
|
||||
|
||||
def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True):
|
||||
"""
|
||||
Example for running a movement primitive based environment, which is already registered
|
||||
Example for running a custom movement primitive based environments.
|
||||
Our already registered environments follow the same structure.
|
||||
Hence, this also allows to adjust hyperparameters of the movement primitives.
|
||||
Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.
|
||||
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
||||
for our repo: https://github.com/ALRhub/fancy_gym/
|
||||
Args:
|
||||
env_name: DMP env_id
|
||||
seed: seed for deterministic behaviour
|
||||
seed: seed
|
||||
iterations: Number of rollout steps to run
|
||||
render: Render the episode
|
||||
|
||||
@ -65,14 +62,14 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1,
|
||||
"""
|
||||
# Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.
|
||||
# E.g. here for way to many basis functions
|
||||
env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}})
|
||||
env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}, render_mode='human' if render else None)
|
||||
|
||||
returns = 0
|
||||
obs = env.reset()
|
||||
|
||||
# This time rendering every trajectory
|
||||
if render:
|
||||
env.render(mode="human")
|
||||
env.render()
|
||||
|
||||
# number of samples/full trajectories (multiple environment steps)
|
||||
for i in range(iterations):
|
||||
@ -84,8 +81,47 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1,
|
||||
print(i, reward)
|
||||
obs = env.reset()
|
||||
|
||||
env.close()
|
||||
return obs
|
||||
|
||||
class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper):
|
||||
mp_config = {
|
||||
'ProMP': {
|
||||
'trajectory_generator_kwargs': {
|
||||
'trajectory_generator_type': 'promp',
|
||||
'weights_scale': 2
|
||||
},
|
||||
'phase_generator_kwargs': {
|
||||
'phase_generator_type': 'linear'
|
||||
},
|
||||
'controller_kwargs': {
|
||||
'controller_type': 'velocity'
|
||||
},
|
||||
'basis_generator_kwargs': {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 1
|
||||
}
|
||||
},
|
||||
'DMP': {
|
||||
'trajectory_generator_kwargs': {
|
||||
'trajectory_generator_type': 'dmp',
|
||||
'weights_scale': 500
|
||||
},
|
||||
'phase_generator_kwargs': {
|
||||
'phase_generator_type': 'exp',
|
||||
'alpha_phase': 2.5
|
||||
},
|
||||
'controller_kwargs': {
|
||||
'controller_type': 'velocity'
|
||||
},
|
||||
'basis_generator_kwargs': {
|
||||
'basis_generator_type': 'rbf',
|
||||
'num_basis': 5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
"""
|
||||
@ -105,37 +141,92 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
"""
|
||||
|
||||
base_env_id = "fancy/Reacher5d-v0"
|
||||
custom_env_id = "fancy/Reacher5d-Custom-v0"
|
||||
custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0"
|
||||
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
|
||||
|
||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
||||
# You can also add other gym.Wrappers in case they are needed.
|
||||
wrappers = [fancy_gym.envs.mujoco.reacher.MPWrapper]
|
||||
fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id)
|
||||
|
||||
# For a ProMP
|
||||
trajectory_generator_kwargs = {'trajectory_generator_type': 'promp',
|
||||
'weights_scale': 2}
|
||||
phase_generator_kwargs = {'phase_generator_type': 'linear'}
|
||||
controller_kwargs = {'controller_type': 'velocity'}
|
||||
basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 1
|
||||
}
|
||||
env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
|
||||
|
||||
# # For a DMP
|
||||
# trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp',
|
||||
# 'weights_scale': 500}
|
||||
# phase_generator_kwargs = {'phase_generator_type': 'exp',
|
||||
# 'alpha_phase': 2.5}
|
||||
# controller_kwargs = {'controller_type': 'velocity'}
|
||||
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
||||
# 'num_basis': 5
|
||||
# }
|
||||
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
|
||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||
seed=seed)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
if render:
|
||||
env.render(mode="human")
|
||||
env.render()
|
||||
|
||||
# number of samples/full trajectories (multiple environment steps)
|
||||
for i in range(iterations):
|
||||
ac = env.action_space.sample()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
rewards += reward
|
||||
|
||||
if terminated or truncated:
|
||||
print(rewards)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
try: # Some mujoco-based envs don't correlcty implement .close
|
||||
env.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True):
|
||||
"""
|
||||
Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.
|
||||
Args:
|
||||
seed: seed
|
||||
iterations: Number of rollout steps to run
|
||||
render: Render the episode
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
base_env_id = "fancy/Reacher5d-v0"
|
||||
custom_env_id = "fancy/Reacher5d-Custom-v0"
|
||||
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
|
||||
|
||||
fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': {
|
||||
'trajectory_generator_kwargs': {
|
||||
'trajectory_generator_type': 'promp',
|
||||
'weights_scale': 2
|
||||
},
|
||||
'phase_generator_kwargs': {
|
||||
'phase_generator_type': 'linear'
|
||||
},
|
||||
'controller_kwargs': {
|
||||
'controller_type': 'velocity'
|
||||
},
|
||||
'basis_generator_kwargs': {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 1
|
||||
}
|
||||
}})
|
||||
|
||||
env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
if render:
|
||||
env.render()
|
||||
|
||||
# number of samples/full trajectories (multiple environment steps)
|
||||
for i in range(iterations):
|
||||
ac = env.action_space.sample()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
rewards += reward
|
||||
|
||||
if terminated or truncated:
|
||||
print(rewards)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
if render:
|
||||
env.render()
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
@ -151,8 +242,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
try: # Some mujoco-based envs don't correlcty implement .close
|
||||
env.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
def main():
|
||||
render = False
|
||||
# DMP
|
||||
example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render)
|
||||
@ -172,3 +268,7 @@ if __name__ == '__main__':
|
||||
|
||||
# Custom MP
|
||||
example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||
example_fully_custom_mp_alternative(seed=10, iterations=1, render=render)
|
||||
|
||||
if __name__=='__main__':
|
||||
main()
|
||||
|
1
setup.py
1
setup.py
@ -14,6 +14,7 @@ extras = {
|
||||
'box2d': ['gymnasium[box2d]>=0.26.0'],
|
||||
'mujoco-legacy': ['mujoco-py >=2.1,<2.2', 'cython<3'],
|
||||
'jax': ["jax >=0.4.0", "jaxlib >=0.4.0"],
|
||||
'mushroom-rl': ['mushroom-rl'],
|
||||
}
|
||||
|
||||
# All dependencies
|
||||
|
@ -41,7 +41,7 @@ class ToyEnv(gym.Env):
|
||||
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
def render(self, mode="human"):
|
||||
def render(self):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -33,7 +33,7 @@ class ToyEnv(gym.Env):
|
||||
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
def render(self, mode="human"):
|
||||
def render(self):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@ class ToyEnv(gym.Env):
|
||||
obs, reward, terminated, truncated, info = np.array([-1]), 1, False, False, {}
|
||||
return obs, reward, terminated, truncated, info
|
||||
|
||||
def render(self, mode="human"):
|
||||
def render(self):
|
||||
pass
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user