Merge pull request #56 from HongyiZhouCN/55-table-tennis-dev

#55 add table tennis environment to fancy_gym
This commit is contained in:
Fabian 2023-05-24 10:03:38 +02:00 committed by GitHub
commit 053a17889f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
79 changed files with 902 additions and 14 deletions

3
.gitignore vendored
View File

@ -111,3 +111,6 @@ venv.bak/
/configs/db.cfg /configs/db.cfg
legacy/ legacy/
MUJOCO_LOG.TXT MUJOCO_LOG.TXT
# vscode
.vscode

View File

@ -137,9 +137,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def step(self, action: np.ndarray): def step(self, action: np.ndarray):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step""" """ This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# TODO remove this part, right now only needed for beer pong position, velocity = self.get_trajectory(action)
mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen) position, velocity = self.env.set_episode_arguments(action, position, velocity)
position, velocity = self.get_trajectory(mp_params) traj_is_valid, position, velocity = self.env.preprocessing_and_validity_callback(action, position, velocity)
trajectory_length = len(position) trajectory_length = len(position)
rewards = np.zeros(shape=(trajectory_length,)) rewards = np.zeros(shape=(trajectory_length,))
@ -151,6 +151,11 @@ class BlackBoxWrapper(gym.ObservationWrapper):
infos = dict() infos = dict()
done = False done = False
if not traj_is_valid:
obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity,
self.return_context_observation)
return self.observation(obs), trajectory_return, done, infos
self.plan_steps += 1 self.plan_steps += 1
for t, (pos, vel) in enumerate(zip(position, velocity)): for t, (pos, vel) in enumerate(zip(position, velocity)):
step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel)
@ -171,8 +176,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
self.env.render(**self.render_kwargs) self.env.render(**self.render_kwargs)
if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
t + 1 + self.current_traj_steps) t + 1 + self.current_traj_steps)
and self.plan_steps < self.max_planning_times): and self.plan_steps < self.max_planning_times):
if self.condition_on_desired: if self.condition_on_desired:
self.condition_pos = pos self.condition_pos = pos

View File

@ -52,8 +52,38 @@ class RawInterfaceWrapper(gym.Wrapper):
""" """
return self.env.dt return self.env.dt
def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[ def preprocessing_and_validity_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) \
np.ndarray, Union[np.ndarray, None]]: -> Tuple[bool, np.ndarray, np.ndarray]:
"""
Used to preprocess the action and check if the desired trajectory is valid.
Args:
action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if
specified, else only traj_gen parameters
pos_traj: a vector instance of the raw position trajectory
vel_traj: a vector instance of the raw velocity trajectory
Returns:
validity flag: bool, True if the raw trajectory is valid, False if not
pos_traj: a vector instance of the preprocessed position trajectory
vel_traj: a vector instance of the preprocessed velocity trajectory
"""
return True, pos_traj, vel_traj
def set_episode_arguments(self, action, pos_traj, vel_traj):
"""
Used to set the arguments for env that valid for the whole episode
deprecated, replaced by preprocessing_and_validity_callback
Args:
action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if
specified, else only traj_gen parameters
pos_traj: a vector instance of the raw position trajectory
vel_traj: a vector instance of the raw velocity trajectory
Returns:
pos_traj: a vector instance of the preprocessed position trajectory
vel_traj: a vector instance of the preprocessed velocity trajectory
"""
return pos_traj, vel_traj
def episode_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.array) -> Tuple[bool]:
""" """
Used to extract the parameters for the movement primitive and other parameters from an action array which might Used to extract the parameters for the movement primitive and other parameters from an action array which might
include other actions like ball releasing time for the beer pong environment. include other actions like ball releasing time for the beer pong environment.
@ -65,4 +95,20 @@ class RawInterfaceWrapper(gym.Wrapper):
Returns: Returns:
Tuple: mp_arguments and other arguments Tuple: mp_arguments and other arguments
""" """
return action, None return True
def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
"""
Used to return a artificial return from the env if the desired trajectory is invalid.
Args:
action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if
specified, else only traj_gen parameters
pos_traj: a vector instance of the raw position trajectory
vel_traj: a vector instance of the raw velocity trajectory
Returns:
obs: artificial observation if the trajectory is invalid, by default a zero vector
reward: artificial reward if the trajectory is invalid, by default 0
done: artificial done if the trajectory is invalid, by default True
info: artificial info if the trajectory is invalid, by default empty dict
"""
return np.zeros(1), 0, True, {}

View File

@ -18,6 +18,8 @@ from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \
BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING
from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \
MAX_EPISODE_STEPS_TABLE_TENNIS
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []}
@ -40,6 +42,8 @@ DEFAULT_BB_DICT_ProMP = {
'num_basis': 5, 'num_basis': 5,
'num_basis_zero_start': 1, 'num_basis_zero_start': 1,
'basis_bandwidth_factor': 3.0, 'basis_bandwidth_factor': 3.0,
},
"black_box_kwargs": {
} }
} }
@ -68,9 +72,12 @@ DEFAULT_BB_DICT_ProDMP = {
"wrappers": [], "wrappers": [],
"trajectory_generator_kwargs": { "trajectory_generator_kwargs": {
'trajectory_generator_type': 'prodmp', 'trajectory_generator_type': 'prodmp',
'duration': 2.0,
'weights_scale': 1.0,
}, },
"phase_generator_kwargs": { "phase_generator_kwargs": {
'phase_generator_type': 'exp', 'phase_generator_type': 'exp',
'tau': 1.5,
}, },
"controller_kwargs": { "controller_kwargs": {
'controller_type': 'motor', 'controller_type': 'motor',
@ -239,6 +246,34 @@ register(
max_episode_steps=FIXED_RELEASE_STEP, max_episode_steps=FIXED_RELEASE_STEP,
) )
# Table Tennis environments
for ctxt_dim in [2, 4]:
register(
id='TableTennis{}D-v0'.format(ctxt_dim),
entry_point='fancy_gym.envs.mujoco:TableTennisEnv',
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS,
kwargs={
"ctxt_dim": ctxt_dim,
'frame_skip': 4,
}
)
register(
id='TableTennisWind-v0',
entry_point='fancy_gym.envs.mujoco:TableTennisWind',
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS,
)
register(
id='TableTennisGoalSwitching-v0',
entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching',
max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS,
kwargs={
'goal_switching_step': 99
}
)
# movement Primitive Environments # movement Primitive Environments
## Simple Reacher ## Simple Reacher
@ -495,7 +530,8 @@ for _v in _versions:
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['disable_goal'] = True
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4
@ -507,6 +543,94 @@ for _v in _versions:
kwargs=kwargs_dict_box_pushing_prodmp kwargs=kwargs_dict_box_pushing_prodmp
) )
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id)
## Table Tennis
_versions = ['TableTennis2D-v0', 'TableTennis4D-v0', 'TableTennisWind-v0', 'TableTennisGoalSwitching-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_tt_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
if _v == 'TableTennisWind-v0':
kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper)
else:
kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper)
kwargs_dict_tt_promp['name'] = _v
kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0])
kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = False
kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = False
kwargs_dict_tt_promp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5]
kwargs_dict_tt_promp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15]
kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3
kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 1
kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_goal'] = 1
kwargs_dict_tt_promp['black_box_kwargs']['verbose'] = 2
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_tt_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProDMP-{_name[1]}'
kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP)
if _v == 'TableTennisWind-v0':
kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper)
else:
kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper)
kwargs_dict_tt_prodmp['name'] = _v
kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0])
kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.7
kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['relative_goal'] = True
kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['disable_goal'] = True
kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5]
kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15]
kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True
kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 3
kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25.
kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_tt_prodmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id)
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ReplanProDMP-{_name[1]}'
kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP)
if _v == 'TableTennisWind-v0':
kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper)
else:
kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper)
kwargs_dict_tt_prodmp['name'] = _v
kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0])
kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = False
kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0
kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5]
kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15]
kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True
kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25.
kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3
kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 50 == 0
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_tt_prodmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id)
# #
# ## Walker2DJump # ## Walker2DJump
# _versions = ['Walker2DJump-v0'] # _versions = ['Walker2DJump-v0']

View File

@ -8,3 +8,4 @@ from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv from .reacher.reacher import ReacherEnv
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse
from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching

View File

@ -28,10 +28,10 @@ class MPWrapper(RawInterfaceWrapper):
return self.data.qvel[0:7].copy() return self.data.qvel[0:7].copy()
# TODO: Fix this # TODO: Fix this
def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None], bool]:
if mp.learn_tau: if mp.learn_tau:
self.release_step = action[0] / self.dt # Tau value self.release_step = action[0] / self.dt # Tau value
return action, None return action, None, True
def set_context(self, context): def set_context(self, context):
xyz = np.zeros(3) xyz = np.zeros(3)

View File

@ -0,0 +1 @@
from .mp_wrapper import TT_MPWrapper, TTVelObs_MPWrapper

View File

@ -0,0 +1,36 @@
<mujocoinclude>
<actuator>
<!-- <motor name="wam/base_slide_motor" joint="wam/base_slide_joint" ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0"/>-->
<motor name="wam/base_motor" joint="wam/base_yaw_joint_right" ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0"/>
<motor name="wam/shoulder_pitch_motor" joint='wam/shoulder_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0"/>
<motor name="wam/shoulder_yaw_motor" joint='wam/shoulder_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0"/>
<motor name="wam/elbow_pitch_motor" joint='wam/elbow_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0"/>
<motor name="wam/wrist_yaw_motor" joint='wam/wrist_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
<motor name="wam/wrist_pitch_motor" joint='wam/wrist_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
<motor name="wam/palm_yaw_motor" joint='wam/palm_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0"/>
</actuator>
</mujocoinclude>
<!--<mujocoinclude>-->
<!--<actuator>-->
<!-- <motor name="wam/base_motor" joint="wam/base_yaw_joint_right"/>-->
<!-- <motor name="wam/shoulder_pitch_motor" joint='wam/shoulder_pitch_joint_right'/>-->
<!-- <motor name="wam/shoulder_yaw_motor" joint='wam/shoulder_yaw_joint_right'/>-->
<!-- <motor name="wam/elbow_pitch_motor" joint='wam/elbow_pitch_joint_right'/>-->
<!-- <motor name="wam/wrist_yaw_motor" joint='wam/wrist_yaw_joint_right'/>-->
<!-- <motor name="wam/wrist_pitch_motor" joint='wam/wrist_pitch_joint_right'/>-->
<!-- <motor name="wam/palm_yaw_motor" joint='wam/palm_yaw_joint_right'/>-->
<!--</actuator>-->
<!--</mujocoinclude>-->
<!--<mujocoinclude>-->
<!-- <actuator boastype="none">-->
<!-- <motor name="wam/shoulder_yaw_link_right_motor" joint="wam/base_yaw_joint_right"/>-->
<!-- <motor name="wam/shoulder_pitch_joint_right_motor" joint='wam/shoulder_pitch_joint_right'/>-->
<!-- <motor name="wam/shoulder_yaw_joint_right_motor" joint='wam/shoulder_yaw_joint_right'/>-->
<!-- <motor name="wam/elbow_pitch_joint_right_motor" joint='wam/elbow_pitch_joint_right'/>-->
<!-- <motor name="wam/wrist_yaw_joint_right_motor" joint='wam/wrist_yaw_joint_right'/>-->
<!-- <motor name="wam/wrist_pitch_joint_right_motor" joint='wam/wrist_pitch_joint_right'/>-->
<!-- <motor name="wam/palm_yaw_joint_right_motor" joint='wam/palm_yaw_joint_right'/>-->
<!-- </actuator>-->
<!--</mujocoinclude>-->

View File

@ -0,0 +1,104 @@
<mujocoinclue>
<body name="wam/base_link_right" pos="2.1 0 2.0" quat="0 0 1 0" childclass="wam" >
<!-- <joint name="wam/base_slide_joint" axis="0 1 0" type="slide" damping="1.98" range="-0.8 0.8"/>-->
<inertial pos="0 0 0" mass="1" diaginertia="0.1 0.1 0.1"/>
<geom name="base_link_fine" class="viz" mesh="base_link_fine" rgba="0.5 0.5 0.5 0"/>
<geom name="base_link_convex" class="col" mesh="base_link_convex" rgba="0.5 0.5 0.5 1"/>
<body name="wam/shoulder_yaw_link_right" pos="0 0 0.346">
<inertial pos="-0.00443422 -0.00066489 -0.128904" quat="0.69566 0.716713 -0.0354863 0.0334839" mass="5"
diaginertia="0.135089 0.113095 0.0904426"/>
<!-- control 0: 1.6-->
<joint name="wam/base_yaw_joint_right" range="-2.6 2.6" damping="1.98"/>
<geom name="shoulder_link_fine" class="viz" mesh="shoulder_link_fine" rgba="1 1 1 0"/>
<geom name="shoulder_link_convex_decomposition_p1" class="col"
mesh="shoulder_link_convex_decomposition_p1"/>
<geom name="shoulder_link_convex_decomposition_p2" class="col"
mesh="shoulder_link_convex_decomposition_p2"/>
<geom name="shoulder_link_convex_decomposition_p3" class="col"
mesh="shoulder_link_convex_decomposition_p3"/>
<body name="wam/shoulder_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00236981 -0.0154211 0.0310561" quat="0.961794 0.273112 -0.0169316 0.00866592"
mass="3.87494" diaginertia="0.0214195 0.0167127 0.0126452"/> <!--seems off-->
<!-- control 1: 0-->
<joint name="wam/shoulder_pitch_joint_right" range="-2 2" damping="0.55"/>
<geom name="shoulder_pitch_link_fine" class="viz" mesh="shoulder_pitch_link_fine" rgba="1 1 1 0"/>
<geom name="shoulder_pitch_link_convex" class="col" mesh="shoulder_pitch_link_convex"/>
<body name="wam/upper_arm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="0.00683259 3.309e-005 0.392492" quat="0.647136 0.0170822 0.0143038 0.762049"
mass="2.20228" diaginertia="0.0592718 0.0592207 0.00313419"/>
<!-- control 2: 0-->
<joint name="wam/shoulder_yaw_joint_right" range="-2.8 2.8" damping="1.65"/>
<geom name="upper_arm_link_fine" class="viz" mesh="upper_arm_link_fine" rgba="1 1 1 0"/>
<geom name="upper_arm_link_convex_decomposition_p1" class="col"
mesh="upper_arm_link_convex_decomposition_p1" rgba="0.094 0.48 0.804 1"/>
<geom name="upper_arm_link_convex_decomposition_p2" class="col"
mesh="upper_arm_link_convex_decomposition_p2" rgba="0.094 0.48 0.804 1"/>
<body name="wam/forearm_link_right" pos="0.045 0 0.55" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.0400149 -0.142717 -0.00022942"
quat="0.704281 0.706326 0.0180333 0.0690353" mass="0.500168"
diaginertia="0.0151047 0.0148285 0.00275805"/>
<!-- control 3: 2.4-->
<joint name="wam/elbow_pitch_joint_right" range="-0.9 3.1" damping="0.88"/>
<geom name="elbow_link_fine" class="viz" mesh="elbow_link_fine" rgba="1 1 1 0"/>
<geom name="elbow_link_convex" class="col" mesh="elbow_link_convex"/>
<geom name="forearm_link_fine" class="viz" mesh="forearm_link_fine" pos="-.045 -0.0730 0"
euler="1.57 0 0" rgba="1 1 1 0"/>
<geom name="forearm_link_convex_decomposition_p1" class="col"
mesh="forearm_link_convex_decomposition_p1" pos="-0.045 -0.0730 0"
euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
<geom name="forearm_link_convex_decomposition_p2" class="col"
mesh="forearm_link_convex_decomposition_p2" pos="-.045 -0.0730 0"
euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
<body name="wam/wrist_yaw_link_right" pos="-0.045 -0.3 0" quat="0.707107 0.707107 0 0">
<inertial pos="8.921e-005 0.00435824 -0.00511217"
quat="0.630602 0.776093 0.00401969 -0.002372" mass="1.05376"
diaginertia="0.000555168 0.00046317 0.000234072"/> <!--this is an approximation-->
<!-- control 4: 0-->
<joint name="wam/wrist_yaw_joint_right" range="-4.8 1.3" damping="0.55"/>
<geom name="wrist_yaw_link_fine" class="viz" mesh="wrist_yaw_link_fine" rgba="1 1 1 0"/>
<geom name="wrist_yaw_link_convex_decomposition_p1" class="col"
mesh="wrist_yaw_link_convex_decomposition_p1"/>
<geom name="wrist_yaw_link_convex_decomposition_p2" class="col"
mesh="wrist_yaw_link_convex_decomposition_p2"/>
<body name="wam/wrist_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
<inertial pos="-0.00012262 -0.0246834 -0.0170319"
quat="0.630602 0.776093 0.00401969 -0.002372" mass="0.517974"
diaginertia="0.000555168 0.00046317 0.000234072"/>
<!-- control 5: 0-->
<joint name="wam/wrist_pitch_joint_right" range="-1.6 1.6" damping="0.11"/>
<geom name="wrist_pitch_link_fine" class="viz" mesh="wrist_pitch_link_fine"
rgba="1 1 1 0"/>
<geom name="wrist_pitch_link_convex_decomposition_p1" rgba="1 0.5 0.313 1"
class="col" mesh="wrist_pitch_link_convex_decomposition_p1"/>
<geom name="wrist_pitch_link_convex_decomposition_p2" rgba="1 0.5 0.313 1"
class="col" mesh="wrist_pitch_link_convex_decomposition_p2"/>
<geom name="wrist_pitch_link_convex_decomposition_p3" rgba="1 0.5 0.313 1"
class="col" mesh="wrist_pitch_link_convex_decomposition_p3"/>
<body name="wam/wrist_palm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
<inertial pos="0 0 0.055" quat="0.707107 0 0 0.707107" mass="0.0828613"
diaginertia="0.00020683 0.00010859 0.00010851"/>
<!-- control 6: 1.8-->
<joint name="wam/palm_yaw_joint_right" range="-2.2 2.2" damping="0.11"/>
<geom name="wrist_palm_link_fine" class="viz" mesh="wrist_palm_link_fine"
rgba="1 1 1 0"/>
<geom name="wrist_palm_link_convex" class="col" mesh="wrist_palm_link_convex"/>
<!-- EE=wam/paddle, configure name to the end effector name-->
<body name="EE" pos="0 0 0.26" childclass="contact_geom">
<geom name="bat" type="cylinder" size="0.075 0.005" rgba="1 0 0 1"
quat="0.71 0 0.71 0"/>
<geom name="bat_back" type="cylinder" size="0.0749 0.0025" rgba="0 1 0 1"
quat="0.71 0 0.71 0" pos="-0.0026 0 0"/>
<geom name="wam/paddle_handle" type="box" size="0.005 0.01 0.05" pos="0 0 -0.08"
rgba="1 1 1 1"/>
<!-- Extract information for sampling goals.-->
<site name="wam/paddle_center" pos="0 0 0" rgba="1 1 1 1" size="0.00001"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</mujocoinclue>

View File

@ -0,0 +1,11 @@
<mujocoinclude>
<body name="target_ball" pos="0 0 0">
<!-- <joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>-->
<!-- <joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>-->
<!-- <joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>-->
<joint name="ball_joint" damping="0.0" pos="0 0 0" stiffness="0" frictionloss="0" type="free"/>
<geom size="0.025 0.025 0.025" type="sphere" condim="4" name="target_ball_contact" rgba="1 1 0 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="0.1 0.03" priority="1"/>
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
</mujocoinclude>

View File

@ -0,0 +1,30 @@
<mujocoinclude>
<body name="table_tennis_table" pos="0 0 0">
<geom class="viz" name="table_base_1" pos="1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<geom class="viz" name="table_base_2" pos="1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<geom class="viz" name="table_base_3" pos="-1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<geom class="viz" name="table_base_4" pos="-1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
<body name="table_top" pos="0 0 0.76">
<geom class="contact_geom" name="table_tennis_table" pos="0 0 0" rgba="0 0 0.5 1" size="1.37 .7625 .01" type="box" />
<site name="left_up_corner" pos="-1.37 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
<site name="middle_up_corner" pos="0 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
<site name="left_down_corner" pos="-1.37 -0.7625 0.01" rgba="1 1 1 1" size="0.00001" />
<site name="middle_down_corner" pos="0 -.7625 0.01" rgba="1 1 1 1" size="0.00001" />
<geom class="contact_geom" material="floor_plane" name="table_te_context_spacennis_net" pos="0 0 0.08625" rgba="0 0 1 0.5" size="0.01 0.915 0.07625" type="box" />
<geom class="viz" name="left_while_line" pos="0 -0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
<geom class="viz" name="center_while_line" pos="0 0 0.01" rgba="1 1 1 1" size="1.37 .01 .001" type="box" />
<geom class="viz" name="right_while_line" pos="0 0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
<geom class="viz" name="right_side_line" pos="1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
<geom class="viz" name="left_side_line" pos="-1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
</body>
<body name="achieved_pos" pos="0 0 0.5">
<geom class="viz" name="achieved_point_geom" pos="0 0 0" rgba="0 1 0 1" size="0.02 0.001" type="cylinder" />
</body>
<body name="right_achieved_pos" pos="0 0 0.5">
<geom class="viz" name="hitting_achieved_point_geom" pos="0 0 0" rgba="1 0 0 1" size="0.02 0.001" type="cylinder" />
</body>
<body name="target_point" pos="0 0 0.5">
<geom class="viz" name="target_point_geom" pos="0 0 0" rgba="1 1 0 1" size="0.02 0.001" type="cylinder" />
</body>
</body>
</mujocoinclude>

View File

@ -0,0 +1,10 @@
<mujocoinclude>
<body name="target_ball" pos="0. 0. 0.1">
<joint axis="1 0 0" damping="0.0" name="tar_x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
<joint axis="0 1 0" damping="0.0" name="tar_y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
<joint axis="0 0 1" damping="0.0" name="tar_z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
<geom size="0.025 0.025 0.025" type="sphere" condim="4" name="target_ball_contact" rgba="1 1 0 1" mass="0.1"
friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="0.1 0.03" priority="1"/>
<site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
</body>
</mujocoinclude>

View File

@ -0,0 +1,54 @@
<mujocoinclude>
<actuator>
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="162.0" />-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="122.0" />-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="102.0" />-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" />-->
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="151.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="125.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="121.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="600.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="500.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
<position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="800.0" ctrllimited="true"/>
<position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>
<position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="1000.0" ctrllimited="true"/>
<position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>
<velocity ctrlrange="-3 3" joint="wam/base_yaw_joint_right" kv="0.1" ctrllimited="true"/>
<velocity ctrlrange="-3 3" joint="wam/shoulder_pitch_joint_right" kv="0.1" ctrllimited="true"/>
<velocity ctrlrange="-3 3" joint="wam/shoulder_yaw_joint_right" kv="0.1" ctrllimited="true"/>
<velocity ctrlrange="-3 3" joint="wam/elbow_pitch_joint_right" kv="0.01" ctrllimited="true"/>
<velocity ctrlrange="-3 3" joint="wam/wrist_yaw_joint_right" kv="0.01" ctrllimited="true"/>
<velocity ctrlrange="-3 3" joint="wam/wrist_pitch_joint_right" kv="0.1" ctrllimited="true"/>
<velocity ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kv="0.1" ctrllimited="true"/>
<!-- <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="1600.0" ctrllimited="true"/>-->
<!--&lt;!&ndash; <velocity ctrlrange="-50 50" joint="wam/base_yaw_joint_right" kv="100" ctrllimited="true"/>&ndash;&gt;-->
<!-- <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="1200.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
<!-- <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
</actuator>
</mujocoinclude>

View File

@ -0,0 +1,46 @@
<mujocoinclude>
<default>
<default class="wam">
<joint type="hinge" limited="true" pos="0 0 0" axis="0 0 1"/>
</default>
<default class="viz">
<geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 1 1 1"/>
</default>
<default class="col">
<geom type="mesh" contype="0" conaffinity="1" group="0" rgba="1 1 1 1"/>
</default>
<default class="contact_geom">
<geom condim="4" friction="0.1 0.1 0.1" margin="0" solimp="1 1 0" solref="0.1 0.03"/>
</default>
</default>
<asset>
<mesh file="base_link_fine.stl"/>
<mesh file="base_link_convex.stl"/>
<mesh file="shoulder_link_fine.stl"/>
<mesh file="shoulder_link_convex_decomposition_p1.stl"/>
<mesh file="shoulder_link_convex_decomposition_p2.stl"/>
<mesh file="shoulder_link_convex_decomposition_p3.stl"/>
<mesh file="shoulder_pitch_link_fine.stl"/>
<mesh file="shoulder_pitch_link_convex.stl"/>
<mesh file="upper_arm_link_fine.stl"/>
<mesh file="upper_arm_link_convex_decomposition_p1.stl"/>
<mesh file="upper_arm_link_convex_decomposition_p2.stl"/>
<mesh file="elbow_link_fine.stl"/>
<mesh file="elbow_link_convex.stl"/>
<mesh file="forearm_link_fine.stl"/>
<mesh file="forearm_link_convex_decomposition_p1.stl"/>
<mesh file="forearm_link_convex_decomposition_p2.stl"/>
<mesh file="wrist_yaw_link_fine.stl"/>
<mesh file="wrist_yaw_link_convex_decomposition_p1.stl"/>
<mesh file="wrist_yaw_link_convex_decomposition_p2.stl"/>
<mesh file="wrist_pitch_link_fine.stl"/>
<mesh file="wrist_pitch_link_convex_decomposition_p1.stl"/>
<mesh file="wrist_pitch_link_convex_decomposition_p2.stl"/>
<mesh file="wrist_pitch_link_convex_decomposition_p3.stl"/>
<mesh file="wrist_palm_link_fine.stl"/>
<mesh file="wrist_palm_link_convex.stl"/>
<texture builtin="checker" height="512" name="texplane" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" type="2d"
width="512"/>
<material name="floor_plane" reflectance="0.5" texrepeat="1 1" texture="texplane" texuniform="true"/>
</asset>
</mujocoinclude>

View File

@ -0,0 +1,20 @@
<mujoco model="table_tennis(v0.1)">
<compiler angle="radian" coordinate="local" meshdir="../meshes/wam" />
<option gravity="0 0 -9.81" timestep="0.002">
<flag warmstart="enable" />
</option>
<custom>
<numeric data="0 0 0 0 0 0 0" name="START_ANGLES" />
</custom>
<include file="shared.xml" />
<worldbody>
<light cutoff="60" diffuse="1 1 1" dir="-.1 -.2 -1.3" directional="true" exponent="1" pos=".1 .2 1.3" specular=".1 .1 .1" />
<geom conaffinity="1" contype="1" material="floor_plane" name="floor" pos="0 0 0" size="10 5 1" type="plane" />
<include file="include_table.xml" />
<include file="include_barrett_wam_7dof_right.xml" />
<include file="include_target_ball.xml" />
<!-- <include file="include_free_ball.xml" />-->
</worldbody>
<include file="include_7_motor_actuator.xml" />
<!-- <include file="right_arm_actuator.xml"/>-->
</mujoco>

View File

@ -0,0 +1,54 @@
from typing import Union, Tuple
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound
class TT_MPWrapper(RawInterfaceWrapper):
# Random x goal + random init pos
@property
def context_mask(self):
return np.hstack([
[False] * 7, # joints position
[False] * 7, # joints velocity
[True] * 2, # position ball x, y
[False] * 1, # position ball z
#[True] * 3, # velocity ball x, y, z
[True] * 2, # target landing position
# [True] * 1, # time
])
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qpos[:7].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qvel[:7].copy()
def preprocessing_and_validity_callback(self, action, pos_traj, vel_traj):
return self.check_traj_validity(action, pos_traj, vel_traj)
def set_episode_arguments(self, action, pos_traj, vel_traj):
return pos_traj, vel_traj
def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray,
return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]:
return self.get_invalid_traj_step_return(action, pos_traj, return_contextual_obs)
class TTVelObs_MPWrapper(TT_MPWrapper):
@property
def context_mask(self):
return np.hstack([
[False] * 7, # joints position
[False] * 7, # joints velocity
[True] * 2, # position ball x, y
[False] * 1, # position ball z
[True] * 3, # velocity ball x, y, z
[True] * 2, # target landing position
# [True] * 1, # time
])

View File

@ -0,0 +1,279 @@
import os
import numpy as np
from gym import utils, spaces
from gym.envs.mujoco import MujocoEnv
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound
import mujoco
MAX_EPISODE_STEPS_TABLE_TENNIS = 350
CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]])
CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65],
[-0.2, 0.65, -0.2, 0.65]])
CONTEXT_BOUNDS_SWICHING = np.array([[-1.0, -0.65, -1.0, 0.],
[-0.2, 0.65, -0.2, 0.65]])
class TableTennisEnv(MujocoEnv, utils.EzPickle):
"""
7 DoF table tennis environment
"""
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
goal_switching_step: int = None,
enable_artificial_wind: bool = False):
utils.EzPickle.__init__(**locals())
self._steps = 0
self._hit_ball = False
self._ball_land_on_table = False
self._ball_contact_after_hit = False
self._ball_return_success = False
self._ball_landing_pos = None
self._init_ball_state = None
self._terminated = False
self._id_set = False
# reward calculation
self.ball_landing_pos = None
self._goal_pos = np.zeros(2)
self._ball_traj = []
self._racket_traj = []
self._goal_switching_step = goal_switching_step
self._enable_artificial_wind = enable_artificial_wind
self._artificial_force = 0.
MujocoEnv.__init__(self,
model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"),
frame_skip=frame_skip,
mujoco_bindings="mujoco")
if ctxt_dim == 2:
self.context_bounds = CONTEXT_BOUNDS_2DIMS
elif ctxt_dim == 4:
self.context_bounds = CONTEXT_BOUNDS_4DIMS
if self._goal_switching_step is not None:
self.context_bounds = CONTEXT_BOUNDS_SWICHING
else:
raise NotImplementedError
self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32)
self._wind_vel = np.zeros(3)
def _set_ids(self):
self._floor_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "floor")
self._ball_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "target_ball_contact")
self._bat_front_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "bat")
self._bat_back_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "bat_back")
self._table_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "table_tennis_table")
self._id_set = True
def step(self, action):
if not self._id_set:
self._set_ids()
unstable_simulation = False
if self._steps == self._goal_switching_step and self.np_random.uniform() < 0.5:
new_goal_pos = self._generate_goal_pos(random=True)
new_goal_pos[1] = -new_goal_pos[1]
self._goal_pos = new_goal_pos
self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
mujoco.mj_forward(self.model, self.data)
for _ in range(self.frame_skip):
if self._enable_artificial_wind:
self.data.qfrc_applied[-2] = self._artificial_force
try:
self.do_simulation(action, 1)
except Exception as e:
print("Simulation get unstable return with MujocoException: ", e)
unstable_simulation = True
self._terminated = True
break
if not self._hit_ball:
self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \
self._contact_checker(self._ball_contact_id, self._bat_back_id)
if not self._hit_ball:
ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id)
if ball_land_on_floor_no_hit:
self._ball_landing_pos = self.data.body("target_ball").xpos.copy()
self._terminated = True
if self._hit_ball and not self._ball_contact_after_hit:
if self._contact_checker(self._ball_contact_id, self._floor_contact_id): # first check contact with floor
self._ball_contact_after_hit = True
self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
self._terminated = True
elif self._contact_checker(self._ball_contact_id, self._table_contact_id): # second check contact with table
self._ball_contact_after_hit = True
self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy()
if self._ball_landing_pos[0] < 0.: # ball lands on the opponent side
self._ball_return_success = True
self._terminated = True
# update ball trajectory & racket trajectory
self._ball_traj.append(self.data.body("target_ball").xpos.copy())
self._racket_traj.append(self.data.geom("bat").xpos.copy())
self._steps += 1
self._terminated = True if self._steps >= MAX_EPISODE_STEPS_TABLE_TENNIS else self._terminated
reward = -25 if unstable_simulation else self._get_reward(self._terminated)
land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \
if self._ball_landing_pos is not None else 10.
return self._get_obs(), reward, self._terminated, {
"hit_ball": self._hit_ball,
"ball_returned_success": self._ball_return_success,
"land_dist_error": land_dist_err,
"is_success": self._ball_return_success and land_dist_err < 0.2,
"num_steps": self._steps,
}
def _contact_checker(self, id_1, id_2):
for coni in range(0, self.data.ncon):
con = self.data.contact[coni]
if (con.geom1 == id_1 and con.geom2 == id_2) or (con.geom1 == id_2 and con.geom2 == id_1):
return True
return False
def reset_model(self):
self._steps = 0
self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False)
self._goal_pos = self._generate_goal_pos(random=True)
self.data.joint("tar_x").qpos = self._init_ball_state[0]
self.data.joint("tar_y").qpos = self._init_ball_state[1]
self.data.joint("tar_z").qpos = self._init_ball_state[2]
self.data.joint("tar_x").qvel = self._init_ball_state[3]
self.data.joint("tar_y").qvel = self._init_ball_state[4]
self.data.joint("tar_z").qvel = self._init_ball_state[5]
if self._enable_artificial_wind:
self._artificial_force = self.np_random.uniform(low=-0.1, high=0.1)
self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]])
self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5])
self.data.qvel[:7] = np.zeros(7)
mujoco.mj_forward(self.model, self.data)
self._hit_ball = False
self._ball_land_on_table = False
self._ball_contact_after_hit = False
self._ball_return_success = False
self._ball_landing_pos = None
self._terminated = False
self._ball_traj = []
self._racket_traj = []
return self._get_obs()
def _generate_goal_pos(self, random=True):
if random:
return self.np_random.uniform(low=self.context_bounds[0][-2:], high=self.context_bounds[1][-2:])
else:
return np.array([-0.6, 0.4])
def _get_obs(self):
obs = np.concatenate([
self.data.qpos.flat[:7].copy(),
self.data.qvel.flat[:7].copy(),
self.data.joint("tar_x").qpos.copy(),
self.data.joint("tar_y").qpos.copy(),
self.data.joint("tar_z").qpos.copy(),
self._goal_pos.copy(),
])
return obs
def _get_reward(self, terminated):
if not terminated:
return 0
min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1))
if not self._hit_ball:
return 0.2 * (1 - np.tanh(min_r_b_dist**2))
if self._ball_landing_pos is None:
min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1))
return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2])
over_net_bonus = int(self._ball_landing_pos[0] < 0)
return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
def _generate_random_ball(self, random_pos=False, random_vel=False):
x_pos, y_pos, z_pos = -0.5, 0.35, 1.75
x_vel, y_vel, z_vel = 2.5, 0., 0.5
if random_pos:
x_pos = self.np_random.uniform(low=self.context_bounds[0][0], high=self.context_bounds[1][0])
y_pos = self.np_random.uniform(low=self.context_bounds[0][1], high=self.context_bounds[1][1])
if random_vel:
x_vel = self.np_random.uniform(low=2.0, high=3.0)
init_ball_state = np.array([x_pos, y_pos, z_pos, x_vel, y_vel, z_vel])
return init_ball_state
def _generate_valid_init_ball(self, random_pos=False, random_vel=False):
init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel)
while not is_init_state_valid(init_ball_state):
init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel)
return init_ball_state
def _get_traj_invalid_penalty(self, action, pos_traj):
tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]]))
delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]]))
violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0))
violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0))
invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \
violate_high_bound_error + violate_low_bound_error
return -invalid_penalty
def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs):
obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj
penalty = self._get_traj_invalid_penalty(action, pos_traj)
return obs, penalty, True, {
"hit_ball": [False],
"ball_returned_success": [False],
"land_dist_error": [10.],
"is_success": [False],
"trajectory_length": 1,
"num_steps": [1],
}
@staticmethod
def check_traj_validity(action, pos_traj, vel_traj):
time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \
or action[1] > delay_bound[1] or action[1] < delay_bound[0]
if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low):
return False, pos_traj, vel_traj
return True, pos_traj, vel_traj
class TableTennisWind(TableTennisEnv):
def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4):
super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, enable_artificial_wind=True)
def _get_obs(self):
obs = np.concatenate([
self.data.qpos.flat[:7].copy(),
self.data.qvel.flat[:7].copy(),
self.data.joint("tar_x").qpos.copy(),
self.data.joint("tar_y").qpos.copy(),
self.data.joint("tar_z").qpos.copy(),
self.data.joint("tar_x").qvel.copy(),
self.data.joint("tar_y").qvel.copy(),
self.data.joint("tar_z").qvel.copy(),
self._goal_pos.copy(),
])
return obs
class TableTennisGoalSwitching(TableTennisEnv):
def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99):
super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step)

View File

@ -0,0 +1,51 @@
import numpy as np
jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
delay_bound = [0.05, 0.15]
tau_bound = [0.5, 1.5]
net_height = 0.1
table_height = 0.77
table_x_min = -1.1
table_x_max = 1.1
table_y_min = -0.6
table_y_max = 0.6
g = 9.81
def is_init_state_valid(init_state):
assert len(init_state) == 6, "init_state must be a 6D vector (pos+vel),got {}".format(init_state)
x = init_state[0]
y = init_state[1]
z = init_state[2] - table_height + 0.1
v_x = init_state[3]
v_y = init_state[4]
v_z = init_state[5]
# check if the initial state is wrong
if x > -0.2:
return False
# check if the ball velocity direction is wrong
if v_x < 0.:
return False
# check if the ball can pass the net
t_n = (-2.*(-v_z)/g + np.sqrt(4*(v_z**2)/g**2 - 8*(net_height-z)/g))/2.
if x + v_x * t_n < 0.05:
return False
# check if ball landing position will violate x bounds
t_l = (-2.*(-v_z)/g + np.sqrt(4*(v_z**2)/g**2 + 8*(z)/g))/2.
if x + v_x * t_l > table_x_max:
return False
# check if ball landing position will violate y bounds
if y + v_y * t_l > table_y_max or y + v_y * t_l < table_y_min:
return False
return True
def magnus_force(top_spin=0.0, side_spin=0.0, v_ball=np.zeros(3), v_wind=np.zeros(3)):
rho = 1.225 # Air density
A = 1.256 * 10e-3 # Cross-section area of ball
C_l = 4.68 * 10e-4 - 2.0984 * 10e-5 * (np.linalg.norm(v_ball) - 50) # Lift force coeffient or simply 1.23
w = np.array([0.0, top_spin, side_spin]) # Angular velocity of ball
f_m = 0.5 * rho * A * C_l * np.linalg.norm(v_ball-v_wind) * np.cross(w, v_ball-v_wind)
return f_m

View File

View File

@ -162,9 +162,12 @@ if __name__ == '__main__':
# ProMP # ProMP
example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render)
example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render)
# ProDMP # ProDMP with Replanning
example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render)
example_mp("TableTennis4DReplanProDMP-v0", seed=10, iterations=20, render=render)
example_mp("TableTennisWindReplanProDMP-v0", seed=10, iterations=20, render=render)
# Altered basis functions # Altered basis functions
obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)

View File

@ -0,0 +1,10 @@
import fancy_gym
def compare_bases_shape(env1_id, env2_id):
env1 = fancy_gym.make(env1_id, seed=0)
env1.traj_gen.show_scaled_basis(plot=True)
env2 = fancy_gym.make(env2_id, seed=0)
env2.traj_gen.show_scaled_basis(plot=True)
return
if __name__ == '__main__':
compare_bases_shape("TableTennis4DProDMP-v0", "TableTennis4DProMP-v0")

View File

@ -168,11 +168,11 @@ def make_bb(
# set tau bounds to minimum of two env steps otherwise computing the velocity is not possible. # set tau bounds to minimum of two env steps otherwise computing the velocity is not possible.
# maximum is full duration of one episode. # maximum is full duration of one episode.
if phase_kwargs.get('learn_tau'): if phase_kwargs.get('learn_tau') and phase_kwargs.get('tau_bound') is None:
phase_kwargs["tau_bound"] = [env.dt * 2, black_box_kwargs['duration']] phase_kwargs["tau_bound"] = [env.dt * 2, black_box_kwargs['duration']]
# Max delay is full duration minus two steps due to above reason # Max delay is full duration minus two steps due to above reason
if phase_kwargs.get('learn_delay'): if phase_kwargs.get('learn_delay') and phase_kwargs.get('delay_bound') is None:
phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2]
phase_gen = get_phase_generator(**phase_kwargs) phase_gen = get_phase_generator(**phase_kwargs)