diff --git a/.gitignore b/.gitignore index ec01816..91a91dd 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,6 @@ venv.bak/ /configs/db.cfg legacy/ MUJOCO_LOG.TXT + +# vscode +.vscode diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index b997fd5..f5677d6 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -142,10 +142,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): def step(self, action: np.ndarray): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" - # TODO remove this part, right now only needed for beer pong - mp_params, env_spec_params = self.env.episode_callback( - action, self.traj_gen) - position, velocity = self.get_trajectory(mp_params) + position, velocity = self.get_trajectory(action) + position, velocity = self.env.set_episode_arguments(action, position, velocity) + traj_is_valid, position, velocity = self.env.preprocessing_and_validity_callback(action, position, velocity) trajectory_length = len(position) rewards = np.zeros(shape=(trajectory_length,)) @@ -158,6 +157,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False + if not traj_is_valid: + obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, + self.return_context_observation) + return self.observation(obs), trajectory_return, done, infos + self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): step_action = self.tracking_controller.get_action( @@ -180,9 +184,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if terminated or truncated or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps) - and self.plan_steps < self.max_planning_times): + if terminated or truncated or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, t + 1 + self.current_traj_steps) and self.plan_steps < self.max_planning_times): if self.condition_on_desired: self.condition_pos = pos diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py index b1a6aaa..bf6e67d 100644 --- a/fancy_gym/black_box/raw_interface_wrapper.py +++ b/fancy_gym/black_box/raw_interface_wrapper.py @@ -52,8 +52,38 @@ class RawInterfaceWrapper(gym.Wrapper): """ return self.env.dt - def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[ - np.ndarray, Union[np.ndarray, None]]: + def preprocessing_and_validity_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) \ + -> Tuple[bool, np.ndarray, np.ndarray]: + """ + Used to preprocess the action and check if the desired trajectory is valid. + Args: + action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if + specified, else only traj_gen parameters + pos_traj: a vector instance of the raw position trajectory + vel_traj: a vector instance of the raw velocity trajectory + Returns: + validity flag: bool, True if the raw trajectory is valid, False if not + pos_traj: a vector instance of the preprocessed position trajectory + vel_traj: a vector instance of the preprocessed velocity trajectory + """ + return True, pos_traj, vel_traj + + def set_episode_arguments(self, action, pos_traj, vel_traj): + """ + Used to set the arguments for env that valid for the whole episode + deprecated, replaced by preprocessing_and_validity_callback + Args: + action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if + specified, else only traj_gen parameters + pos_traj: a vector instance of the raw position trajectory + vel_traj: a vector instance of the raw velocity trajectory + Returns: + pos_traj: a vector instance of the preprocessed position trajectory + vel_traj: a vector instance of the preprocessed velocity trajectory + """ + return pos_traj, vel_traj + + def episode_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.array) -> Tuple[bool]: """ Used to extract the parameters for the movement primitive and other parameters from an action array which might include other actions like ball releasing time for the beer pong environment. @@ -65,4 +95,20 @@ class RawInterfaceWrapper(gym.Wrapper): Returns: Tuple: mp_arguments and other arguments """ - return action, None + return True + + def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: + """ + Used to return a artificial return from the env if the desired trajectory is invalid. + Args: + action: a vector instance of the whole action space, includes traj_gen parameters and additional parameters if + specified, else only traj_gen parameters + pos_traj: a vector instance of the raw position trajectory + vel_traj: a vector instance of the raw velocity trajectory + Returns: + obs: artificial observation if the trajectory is invalid, by default a zero vector + reward: artificial reward if the trajectory is invalid, by default 0 + done: artificial done if the trajectory is invalid, by default True + info: artificial info if the trajectory is invalid, by default empty dict + """ + return np.zeros(1), 0, True, {} \ No newline at end of file diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index af66cec..65a82dc 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -18,6 +18,8 @@ from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING +from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \ + MAX_EPISODE_STEPS_TABLE_TENNIS ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} @@ -40,6 +42,8 @@ DEFAULT_BB_DICT_ProMP = { 'num_basis': 5, 'num_basis_zero_start': 1, 'basis_bandwidth_factor': 3.0, + }, + "black_box_kwargs": { } } @@ -68,9 +72,12 @@ DEFAULT_BB_DICT_ProDMP = { "wrappers": [], "trajectory_generator_kwargs": { 'trajectory_generator_type': 'prodmp', + 'duration': 2.0, + 'weights_scale': 1.0, }, "phase_generator_kwargs": { 'phase_generator_type': 'exp', + 'tau': 1.5, }, "controller_kwargs": { 'controller_type': 'motor', @@ -239,6 +246,34 @@ register( max_episode_steps=FIXED_RELEASE_STEP, ) +# Table Tennis environments +for ctxt_dim in [2, 4]: + register( + id='TableTennis{}D-v0'.format(ctxt_dim), + entry_point='fancy_gym.envs.mujoco:TableTennisEnv', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, + kwargs={ + "ctxt_dim": ctxt_dim, + 'frame_skip': 4, + } + ) + +register( + id='TableTennisWind-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisWind', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, +) + +register( + id='TableTennisGoalSwitching-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, + kwargs={ + 'goal_switching_step': 99 + } +) + + # movement Primitive Environments ## Simple Reacher @@ -495,7 +530,8 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 + kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['disable_goal'] = True + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 @@ -507,6 +543,94 @@ for _v in _versions: kwargs=kwargs_dict_box_pushing_prodmp ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) + +## Table Tennis +_versions = ['TableTennis2D-v0', 'TableTennis4D-v0', 'TableTennisWind-v0', 'TableTennisGoalSwitching-v0'] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_tt_promp = deepcopy(DEFAULT_BB_DICT_ProMP) + if _v == 'TableTennisWind-v0': + kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) + else: + kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) + kwargs_dict_tt_promp['name'] = _v + kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) + kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = False + kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = False + kwargs_dict_tt_promp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] + kwargs_dict_tt_promp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] + kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3 + kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 1 + kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_goal'] = 1 + kwargs_dict_tt_promp['black_box_kwargs']['verbose'] = 2 + register( + id=_env_id, + entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_tt_promp + ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProDMP-{_name[1]}' + kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) + if _v == 'TableTennisWind-v0': + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) + else: + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) + kwargs_dict_tt_prodmp['name'] = _v + kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) + kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.7 + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['relative_goal'] = True + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['disable_goal'] = True + kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] + kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] + kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True + kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True + kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 3 + kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. + kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 + kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 + register( + id=_env_id, + entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_tt_prodmp + ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) + +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' + kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) + if _v == 'TableTennisWind-v0': + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) + else: + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) + kwargs_dict_tt_prodmp['name'] = _v + kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) + kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = False + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 + kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] + kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] + kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True + kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True + kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 2 + kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. + kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 + kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 + kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3 + kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 50 == 0 + register( + id=_env_id, + entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_tt_prodmp + ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) # # ## Walker2DJump # _versions = ['Walker2DJump-v0'] diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py index 3254b4d..ff51711 100644 --- a/fancy_gym/envs/mujoco/__init__.py +++ b/fancy_gym/envs/mujoco/__init__.py @@ -8,3 +8,4 @@ from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .reacher.reacher import ReacherEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse +from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching diff --git a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py index 8988f5a..17a11e1 100644 --- a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py @@ -28,10 +28,10 @@ class MPWrapper(RawInterfaceWrapper): return self.data.qvel[0:7].copy() # TODO: Fix this - def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: + def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None], bool]: if mp.learn_tau: self.release_step = action[0] / self.dt # Tau value - return action, None + return action, None, True def set_context(self, context): xyz = np.zeros(3) diff --git a/fancy_gym/envs/mujoco/table_tennis/__init__.py b/fancy_gym/envs/mujoco/table_tennis/__init__.py new file mode 100644 index 0000000..1438432 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import TT_MPWrapper, TTVelObs_MPWrapper diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/base_link_convex.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/base_link_convex.stl new file mode 100755 index 0000000..133b112 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/base_link_convex.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/base_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/base_link_fine.stl new file mode 100755 index 0000000..047e9df Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/base_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_dist_link_convex.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_dist_link_convex.stl new file mode 100644 index 0000000..3b05c27 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_dist_link_convex.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_dist_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_dist_link_fine.stl new file mode 100644 index 0000000..5ff94a2 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_dist_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_med_link_convex.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_med_link_convex.stl new file mode 100644 index 0000000..c548448 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_med_link_convex.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_med_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_med_link_fine.stl new file mode 100644 index 0000000..495160d Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_med_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..b4bb322 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..7b2f001 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..f05174e Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_fine.stl new file mode 100644 index 0000000..eb252d9 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_finger_prox_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_fine.stl new file mode 100644 index 0000000..0a986fa Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c039f0d Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..250acaf Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..993d0f7 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl new file mode 100644 index 0000000..8448a3f Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup.stl new file mode 100644 index 0000000..bc34058 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split1.stl new file mode 100644 index 0000000..c80aa61 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split10.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split10.stl new file mode 100644 index 0000000..bd5708b Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split10.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split11.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split11.stl new file mode 100644 index 0000000..ac81da2 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split11.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split12.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split12.stl new file mode 100644 index 0000000..a18e96e Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split12.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split13.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split13.stl new file mode 100644 index 0000000..f0e5832 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split13.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split14.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split14.stl new file mode 100644 index 0000000..41a3e94 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split14.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split15.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split15.stl new file mode 100644 index 0000000..7a26643 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split15.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split16.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split16.stl new file mode 100644 index 0000000..155b24e Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split16.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split17.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split17.stl new file mode 100644 index 0000000..2fe8d95 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split17.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split18.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split18.stl new file mode 100644 index 0000000..f5287b2 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split18.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split2.stl new file mode 100644 index 0000000..5c1e50c Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split3.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split3.stl new file mode 100644 index 0000000..ef6d547 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split3.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split4.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split4.stl new file mode 100644 index 0000000..5476296 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split4.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split5.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split5.stl new file mode 100644 index 0000000..ccfcd42 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split5.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split6.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split6.stl new file mode 100644 index 0000000..72d6287 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split6.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split7.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split7.stl new file mode 100644 index 0000000..d4918f2 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split7.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split8.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split8.stl new file mode 100644 index 0000000..8a0cd84 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split8.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split9.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split9.stl new file mode 100644 index 0000000..4281a69 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/cup_split9.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/elbow_link_convex.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/elbow_link_convex.stl new file mode 100755 index 0000000..b34963d Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/elbow_link_convex.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/elbow_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/elbow_link_fine.stl new file mode 100755 index 0000000..f6a1515 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/elbow_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl new file mode 100755 index 0000000..e6aa6b6 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl new file mode 100755 index 0000000..667902e Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_fine.stl new file mode 100755 index 0000000..ed66bbb Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/forearm_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl new file mode 100755 index 0000000..aba957d Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl new file mode 100755 index 0000000..5cca6a9 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl new file mode 100755 index 0000000..3343e27 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_fine.stl new file mode 100755 index 0000000..ae505fd Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_pitch_link_convex.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_pitch_link_convex.stl new file mode 100755 index 0000000..c36cfec Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_pitch_link_convex.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_pitch_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_pitch_link_fine.stl new file mode 100755 index 0000000..dc633c4 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/shoulder_pitch_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl new file mode 100755 index 0000000..82d0093 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl new file mode 100755 index 0000000..7fd5a55 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_fine.stl new file mode 100755 index 0000000..76353ae Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/upper_arm_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_palm_link_convex.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_palm_link_convex.stl new file mode 100755 index 0000000..a0386f6 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_palm_link_convex.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_palm_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_palm_link_fine.stl new file mode 100755 index 0000000..f6b41ad Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_palm_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c36f88f Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..d00cac1 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl new file mode 100755 index 0000000..34d1d8b Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_fine.stl new file mode 100644 index 0000000..13d2f73 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_pitch_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl new file mode 100755 index 0000000..06e857f Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl new file mode 100755 index 0000000..48e1bb1 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_fine.stl b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_fine.stl new file mode 100644 index 0000000..0d95239 Binary files /dev/null and b/fancy_gym/envs/mujoco/table_tennis/assets/meshes/wam/wrist_yaw_link_fine.stl differ diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_7_motor_actuator.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_7_motor_actuator.xml new file mode 100644 index 0000000..dcadc5c --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_7_motor_actuator.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_barrett_wam_7dof_right.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_barrett_wam_7dof_right.xml new file mode 100644 index 0000000..28580b9 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_barrett_wam_7dof_right.xml @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_free_ball.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_free_ball.xml new file mode 100644 index 0000000..3bbd964 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_free_ball.xml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_table.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_table.xml new file mode 100644 index 0000000..c313489 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_table.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_target_ball.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_target_ball.xml new file mode 100644 index 0000000..bf77c0f --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/include_target_ball.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/right_arm_actuator.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/right_arm_actuator.xml new file mode 100644 index 0000000..dfa6924 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/right_arm_actuator.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/shared.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/shared.xml new file mode 100644 index 0000000..e349992 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/shared.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/assets/xml/table_tennis_env.xml b/fancy_gym/envs/mujoco/table_tennis/assets/xml/table_tennis_env.xml new file mode 100644 index 0000000..8c2aba3 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/assets/xml/table_tennis_env.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py new file mode 100644 index 0000000..e33ed6c --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -0,0 +1,54 @@ +from typing import Union, Tuple + +import numpy as np + +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound + + +class TT_MPWrapper(RawInterfaceWrapper): + + # Random x goal + random init pos + @property + def context_mask(self): + return np.hstack([ + [False] * 7, # joints position + [False] * 7, # joints velocity + [True] * 2, # position ball x, y + [False] * 1, # position ball z + #[True] * 3, # velocity ball x, y, z + [True] * 2, # target landing position + # [True] * 1, # time + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.data.qpos[:7].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.data.qvel[:7].copy() + + def preprocessing_and_validity_callback(self, action, pos_traj, vel_traj): + return self.check_traj_validity(action, pos_traj, vel_traj) + + def set_episode_arguments(self, action, pos_traj, vel_traj): + return pos_traj, vel_traj + + def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray, + return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]: + return self.get_invalid_traj_step_return(action, pos_traj, return_contextual_obs) + +class TTVelObs_MPWrapper(TT_MPWrapper): + + @property + def context_mask(self): + return np.hstack([ + [False] * 7, # joints position + [False] * 7, # joints velocity + [True] * 2, # position ball x, y + [False] * 1, # position ball z + [True] * 3, # velocity ball x, y, z + [True] * 2, # target landing position + # [True] * 1, # time + ]) \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py new file mode 100644 index 0000000..7fb5e9f --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -0,0 +1,279 @@ +import os + +import numpy as np +from gym import utils, spaces +from gym.envs.mujoco import MujocoEnv + +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound + +import mujoco + +MAX_EPISODE_STEPS_TABLE_TENNIS = 350 + +CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]]) +CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65], + [-0.2, 0.65, -0.2, 0.65]]) +CONTEXT_BOUNDS_SWICHING = np.array([[-1.0, -0.65, -1.0, 0.], + [-0.2, 0.65, -0.2, 0.65]]) + + +class TableTennisEnv(MujocoEnv, utils.EzPickle): + """ + 7 DoF table tennis environment + """ + def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, + goal_switching_step: int = None, + enable_artificial_wind: bool = False): + utils.EzPickle.__init__(**locals()) + self._steps = 0 + + self._hit_ball = False + self._ball_land_on_table = False + self._ball_contact_after_hit = False + self._ball_return_success = False + self._ball_landing_pos = None + self._init_ball_state = None + self._terminated = False + + self._id_set = False + + # reward calculation + self.ball_landing_pos = None + self._goal_pos = np.zeros(2) + self._ball_traj = [] + self._racket_traj = [] + + self._goal_switching_step = goal_switching_step + + self._enable_artificial_wind = enable_artificial_wind + + self._artificial_force = 0. + + MujocoEnv.__init__(self, + model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), + frame_skip=frame_skip, + mujoco_bindings="mujoco") + + if ctxt_dim == 2: + self.context_bounds = CONTEXT_BOUNDS_2DIMS + elif ctxt_dim == 4: + self.context_bounds = CONTEXT_BOUNDS_4DIMS + if self._goal_switching_step is not None: + self.context_bounds = CONTEXT_BOUNDS_SWICHING + else: + raise NotImplementedError + + self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) + + self._wind_vel = np.zeros(3) + + def _set_ids(self): + self._floor_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "floor") + self._ball_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "target_ball_contact") + self._bat_front_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "bat") + self._bat_back_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "bat_back") + self._table_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "table_tennis_table") + self._id_set = True + + def step(self, action): + if not self._id_set: + self._set_ids() + + unstable_simulation = False + + if self._steps == self._goal_switching_step and self.np_random.uniform() < 0.5: + new_goal_pos = self._generate_goal_pos(random=True) + new_goal_pos[1] = -new_goal_pos[1] + self._goal_pos = new_goal_pos + self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) + mujoco.mj_forward(self.model, self.data) + + for _ in range(self.frame_skip): + if self._enable_artificial_wind: + self.data.qfrc_applied[-2] = self._artificial_force + try: + self.do_simulation(action, 1) + except Exception as e: + print("Simulation get unstable return with MujocoException: ", e) + unstable_simulation = True + self._terminated = True + break + + if not self._hit_ball: + self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \ + self._contact_checker(self._ball_contact_id, self._bat_back_id) + if not self._hit_ball: + ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id) + if ball_land_on_floor_no_hit: + self._ball_landing_pos = self.data.body("target_ball").xpos.copy() + self._terminated = True + if self._hit_ball and not self._ball_contact_after_hit: + if self._contact_checker(self._ball_contact_id, self._floor_contact_id): # first check contact with floor + self._ball_contact_after_hit = True + self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy() + self._terminated = True + elif self._contact_checker(self._ball_contact_id, self._table_contact_id): # second check contact with table + self._ball_contact_after_hit = True + self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy() + if self._ball_landing_pos[0] < 0.: # ball lands on the opponent side + self._ball_return_success = True + self._terminated = True + + # update ball trajectory & racket trajectory + self._ball_traj.append(self.data.body("target_ball").xpos.copy()) + self._racket_traj.append(self.data.geom("bat").xpos.copy()) + + self._steps += 1 + self._terminated = True if self._steps >= MAX_EPISODE_STEPS_TABLE_TENNIS else self._terminated + + reward = -25 if unstable_simulation else self._get_reward(self._terminated) + + land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \ + if self._ball_landing_pos is not None else 10. + + return self._get_obs(), reward, self._terminated, { + "hit_ball": self._hit_ball, + "ball_returned_success": self._ball_return_success, + "land_dist_error": land_dist_err, + "is_success": self._ball_return_success and land_dist_err < 0.2, + "num_steps": self._steps, + } + + def _contact_checker(self, id_1, id_2): + for coni in range(0, self.data.ncon): + con = self.data.contact[coni] + if (con.geom1 == id_1 and con.geom2 == id_2) or (con.geom1 == id_2 and con.geom2 == id_1): + return True + return False + + def reset_model(self): + self._steps = 0 + self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False) + self._goal_pos = self._generate_goal_pos(random=True) + self.data.joint("tar_x").qpos = self._init_ball_state[0] + self.data.joint("tar_y").qpos = self._init_ball_state[1] + self.data.joint("tar_z").qpos = self._init_ball_state[2] + self.data.joint("tar_x").qvel = self._init_ball_state[3] + self.data.joint("tar_y").qvel = self._init_ball_state[4] + self.data.joint("tar_z").qvel = self._init_ball_state[5] + + if self._enable_artificial_wind: + self._artificial_force = self.np_random.uniform(low=-0.1, high=0.1) + + self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) + + self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5]) + self.data.qvel[:7] = np.zeros(7) + + mujoco.mj_forward(self.model, self.data) + + self._hit_ball = False + self._ball_land_on_table = False + self._ball_contact_after_hit = False + self._ball_return_success = False + self._ball_landing_pos = None + self._terminated = False + self._ball_traj = [] + self._racket_traj = [] + return self._get_obs() + + def _generate_goal_pos(self, random=True): + if random: + return self.np_random.uniform(low=self.context_bounds[0][-2:], high=self.context_bounds[1][-2:]) + else: + return np.array([-0.6, 0.4]) + + def _get_obs(self): + obs = np.concatenate([ + self.data.qpos.flat[:7].copy(), + self.data.qvel.flat[:7].copy(), + self.data.joint("tar_x").qpos.copy(), + self.data.joint("tar_y").qpos.copy(), + self.data.joint("tar_z").qpos.copy(), + self._goal_pos.copy(), + ]) + return obs + + def _get_reward(self, terminated): + if not terminated: + return 0 + min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1)) + if not self._hit_ball: + return 0.2 * (1 - np.tanh(min_r_b_dist**2)) + if self._ball_landing_pos is None: + min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1)) + return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2)) + min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2]) + over_net_bonus = int(self._ball_landing_pos[0] < 0) + return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus + + def _generate_random_ball(self, random_pos=False, random_vel=False): + x_pos, y_pos, z_pos = -0.5, 0.35, 1.75 + x_vel, y_vel, z_vel = 2.5, 0., 0.5 + if random_pos: + x_pos = self.np_random.uniform(low=self.context_bounds[0][0], high=self.context_bounds[1][0]) + y_pos = self.np_random.uniform(low=self.context_bounds[0][1], high=self.context_bounds[1][1]) + if random_vel: + x_vel = self.np_random.uniform(low=2.0, high=3.0) + init_ball_state = np.array([x_pos, y_pos, z_pos, x_vel, y_vel, z_vel]) + return init_ball_state + + def _generate_valid_init_ball(self, random_pos=False, random_vel=False): + init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) + while not is_init_state_valid(init_ball_state): + init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) + return init_ball_state + + def _get_traj_invalid_penalty(self, action, pos_traj): + tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) + delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) + violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) + violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0)) + invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ + violate_high_bound_error + violate_low_bound_error + return -invalid_penalty + + def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs): + obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj + penalty = self._get_traj_invalid_penalty(action, pos_traj) + return obs, penalty, True, { + "hit_ball": [False], + "ball_returned_success": [False], + "land_dist_error": [10.], + "is_success": [False], + "trajectory_length": 1, + "num_steps": [1], + } + + @staticmethod + def check_traj_validity(action, pos_traj, vel_traj): + time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ + or action[1] > delay_bound[1] or action[1] < delay_bound[0] + if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): + return False, pos_traj, vel_traj + return True, pos_traj, vel_traj + + +class TableTennisWind(TableTennisEnv): + def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4): + super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, enable_artificial_wind=True) + + def _get_obs(self): + obs = np.concatenate([ + self.data.qpos.flat[:7].copy(), + self.data.qvel.flat[:7].copy(), + self.data.joint("tar_x").qpos.copy(), + self.data.joint("tar_y").qpos.copy(), + self.data.joint("tar_z").qpos.copy(), + self.data.joint("tar_x").qvel.copy(), + self.data.joint("tar_y").qvel.copy(), + self.data.joint("tar_z").qvel.copy(), + self._goal_pos.copy(), + ]) + return obs + + +class TableTennisGoalSwitching(TableTennisEnv): + def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99): + super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py new file mode 100644 index 0000000..4d9a2d2 --- /dev/null +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py @@ -0,0 +1,51 @@ +import numpy as np + +jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) +jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) +delay_bound = [0.05, 0.15] +tau_bound = [0.5, 1.5] + +net_height = 0.1 +table_height = 0.77 +table_x_min = -1.1 +table_x_max = 1.1 +table_y_min = -0.6 +table_y_max = 0.6 +g = 9.81 + +def is_init_state_valid(init_state): + assert len(init_state) == 6, "init_state must be a 6D vector (pos+vel),got {}".format(init_state) + x = init_state[0] + y = init_state[1] + z = init_state[2] - table_height + 0.1 + v_x = init_state[3] + v_y = init_state[4] + v_z = init_state[5] + + # check if the initial state is wrong + if x > -0.2: + return False + # check if the ball velocity direction is wrong + if v_x < 0.: + return False + # check if the ball can pass the net + t_n = (-2.*(-v_z)/g + np.sqrt(4*(v_z**2)/g**2 - 8*(net_height-z)/g))/2. + if x + v_x * t_n < 0.05: + return False + # check if ball landing position will violate x bounds + t_l = (-2.*(-v_z)/g + np.sqrt(4*(v_z**2)/g**2 + 8*(z)/g))/2. + if x + v_x * t_l > table_x_max: + return False + # check if ball landing position will violate y bounds + if y + v_y * t_l > table_y_max or y + v_y * t_l < table_y_min: + return False + + return True + +def magnus_force(top_spin=0.0, side_spin=0.0, v_ball=np.zeros(3), v_wind=np.zeros(3)): + rho = 1.225 # Air density + A = 1.256 * 10e-3 # Cross-section area of ball + C_l = 4.68 * 10e-4 - 2.0984 * 10e-5 * (np.linalg.norm(v_ball) - 50) # Lift force coeffient or simply 1.23 + w = np.array([0.0, top_spin, side_spin]) # Angular velocity of ball + f_m = 0.5 * rho * A * C_l * np.linalg.norm(v_ball-v_wind) * np.cross(w, v_ball-v_wind) + return f_m diff --git a/fancy_gym/examples/example_replanning.py b/fancy_gym/examples/example_replanning.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 885cc93..7388b4b 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -162,9 +162,12 @@ if __name__ == '__main__': # ProMP example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) - # ProDMP + # ProDMP with Replanning example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) + example_mp("TableTennis4DReplanProDMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennisWindReplanProDMP-v0", seed=10, iterations=20, render=render) # Altered basis functions obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) diff --git a/fancy_gym/examples/mp_params_tuning.py b/fancy_gym/examples/mp_params_tuning.py new file mode 100644 index 0000000..644d86b --- /dev/null +++ b/fancy_gym/examples/mp_params_tuning.py @@ -0,0 +1,10 @@ +import fancy_gym + +def compare_bases_shape(env1_id, env2_id): + env1 = fancy_gym.make(env1_id, seed=0) + env1.traj_gen.show_scaled_basis(plot=True) + env2 = fancy_gym.make(env2_id, seed=0) + env2.traj_gen.show_scaled_basis(plot=True) + return +if __name__ == '__main__': + compare_bases_shape("TableTennis4DProDMP-v0", "TableTennis4DProMP-v0") \ No newline at end of file diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index d3642a4..778e5d8 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -183,11 +183,11 @@ def make_bb( # set tau bounds to minimum of two env steps otherwise computing the velocity is not possible. # maximum is full duration of one episode. - if phase_kwargs.get('learn_tau'): + if phase_kwargs.get('learn_tau') and phase_kwargs.get('tau_bound') is None: phase_kwargs["tau_bound"] = [env.dt * 2, black_box_kwargs['duration']] # Max delay is full duration minus two steps due to above reason - if phase_kwargs.get('learn_delay'): + if phase_kwargs.get('learn_delay') and phase_kwargs.get('delay_bound') is None: phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] phase_gen = get_phase_generator(**phase_kwargs)