From 5a547d85f9f7b599fe5a93f16328de53be51c684 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Fri, 4 Nov 2022 21:22:32 +0100 Subject: [PATCH] updates --- fancy_gym/black_box/black_box_wrapper.py | 112 +++++++----- fancy_gym/black_box/raw_interface_wrapper.py | 12 +- fancy_gym/envs/__init__.py | 41 ++++- fancy_gym/envs/mujoco/__init__.py | 1 + fancy_gym/envs/mujoco/beerpong/mp_wrapper.py | 4 +- .../envs/mujoco/table_tennis/mp_wrapper.py | 33 +++- .../mujoco/table_tennis/table_tennis_env.py | 173 ++++++++++++++---- .../mujoco/table_tennis/table_tennis_utils.py | 50 +++++ .../examples/examples_movement_primitives.py | 13 +- 9 files changed, 336 insertions(+), 103 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index dc5445e..735efec 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -23,7 +23,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): replanning_schedule: Optional[ Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, - max_planning_times: int = 1, + max_planning_times = None, desired_conditioning: bool = False ): """ @@ -163,8 +163,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): # action = np.concatenate((basis_weights, goal_weights), axis=1).flatten() # TODO remove this part, right now only needed for beer pong - mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen) - position, velocity = self.get_trajectory(mp_params) + # mp_params, env_spec_params, proceed = self.env.episode_callback(action, self.traj_gen) + position, velocity = self.get_trajectory(action) + traj_is_valid = self.env.episode_callback(action, position, velocity) trajectory_length = len(position) rewards = np.zeros(shape=(trajectory_length,)) @@ -176,52 +177,71 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False - self.plan_counts += 1 - for t, (pos, vel) in enumerate(zip(position, velocity)): - step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) - c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) - obs, c_reward, done, info = self.env.step(c_action) - rewards[t] = c_reward + if self.verbose >= 2: + desired_pos_traj = [] + desired_vel_traj = [] + pos_traj = [] + vel_traj = [] + + if traj_is_valid: + self.plan_counts += 1 + for t, (pos, vel) in enumerate(zip(position, velocity)): + step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) + c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) + obs, c_reward, done, info = self.env.step(c_action) + rewards[t] = c_reward + + if self.verbose >= 2: + actions[t, :] = c_action + observations[t, :] = obs + + for k, v in info.items(): + elems = infos.get(k, [None] * trajectory_length) + elems[t] = v + infos[k] = elems + + if self.verbose >= 2: + desired_pos_traj.append(pos) + desired_vel_traj.append(vel) + pos_traj.append(self.current_pos) + vel_traj.append(self.current_vel) + + if self.render_kwargs: + self.env.render(**self.render_kwargs) + + if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + t + 1 + self.current_traj_steps): + + # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: + # continue + + self.condition_pos = pos if self.desired_conditioning else self.current_pos + self.condition_vel = vel if self.desired_conditioning else self.current_vel + + break + + infos.update({k: v[:t+1] for k, v in infos.items()}) + self.current_traj_steps += t + 1 if self.verbose >= 2: - actions[t, :] = c_action - observations[t, :] = obs - - for k, v in info.items(): - elems = infos.get(k, [None] * trajectory_length) - elems[t] = v - infos[k] = elems - - if self.render_kwargs: - self.env.render(**self.render_kwargs) - - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps): - - if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: - continue - - self.condition_pos = pos if self.desired_conditioning else self.current_pos - self.condition_vel = vel if self.desired_conditioning else self.current_vel - - break - - infos.update({k: v[:t+1] for k, v in infos.items()}) - self.current_traj_steps += t + 1 - - if self.verbose >= 2: - infos['desired_pos'] = position[:t+1] - infos['desired_vel'] = velocity[:t+1] - infos['current_pos'] = self.current_pos - infos['current_vel'] = self.current_vel - infos['step_actions'] = actions[:t + 1] - infos['step_observations'] = observations[:t + 1] - infos['step_rewards'] = rewards[:t + 1] - - infos['trajectory_length'] = t + 1 - trajectory_return = self.reward_aggregation(rewards[:t + 1]) - return self.observation(obs), trajectory_return, done, infos + infos['desired_pos'] = position[:t+1] + infos['desired_vel'] = velocity[:t+1] + infos['current_pos'] = self.current_pos + infos['current_vel'] = self.current_vel + infos['step_actions'] = actions[:t + 1] + infos['step_observations'] = observations[:t + 1] + infos['step_rewards'] = rewards[:t + 1] + infos['desired_pos_traj'] = np.array(desired_pos_traj) + infos['desired_vel_traj'] = np.array(desired_vel_traj) + infos['pos_traj'] = np.array(pos_traj) + infos['vel_traj'] = np.array(vel_traj) + infos['trajectory_length'] = t + 1 + trajectory_return = self.reward_aggregation(rewards[:t + 1]) + return self.observation(obs), trajectory_return, done, infos + else: + obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity) + return self.observation(obs), trajectory_return, done, infos def render(self, **kwargs): """Only set render options here, such that they can be used during the rollout. This only needs to be called once""" diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py index 02945a1..de34346 100644 --- a/fancy_gym/black_box/raw_interface_wrapper.py +++ b/fancy_gym/black_box/raw_interface_wrapper.py @@ -52,8 +52,7 @@ class RawInterfaceWrapper(gym.Wrapper): """ return self.env.dt - def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[ - np.ndarray, Union[np.ndarray, None]]: + def episode_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.array) -> Tuple[bool]: """ Used to extract the parameters for the movement primitive and other parameters from an action array which might include other actions like ball releasing time for the beer pong environment. @@ -65,4 +64,11 @@ class RawInterfaceWrapper(gym.Wrapper): Returns: Tuple: mp_arguments and other arguments """ - return action, None + return True + + def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: + """ + Used to return a fake return from the environment if the desired trajectory is invalid. + """ + obs = np.zeros(1) + return obs, 0, True, {} \ No newline at end of file diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index d3dfa8e..cb074e1 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -28,7 +28,9 @@ DEFAULT_BB_DICT_ProMP = { 'trajectory_generator_type': 'promp' }, "phase_generator_kwargs": { - 'phase_generator_type': 'linear' + 'phase_generator_type': 'linear', + 'learn_tau': False, + 'learn_delay': False, }, "controller_kwargs": { 'controller_type': 'motor', @@ -40,6 +42,8 @@ DEFAULT_BB_DICT_ProMP = { 'num_basis': 5, 'num_basis_zero_start': 1, 'basis_bandwidth_factor': 3.0, + }, + "black_box_kwargs": { } } @@ -245,6 +249,18 @@ register( max_episode_steps=FIXED_RELEASE_STEP, ) +# Table Tennis environments +for ctxt_dim in [2, 4]: + register( + id='TableTennis{}D-v0'.format(ctxt_dim), + entry_point='fancy_gym.envs.mujoco:TableTennisEnv', + max_episode_steps=350, + kwargs={ + "ctxt_dim": ctxt_dim, + 'frame_skip': 4 + } + ) + # movement Primitive Environments ## Simple Reacher @@ -515,6 +531,29 @@ for _v in _versions: kwargs=kwargs_dict_box_pushing_prodmp ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) + +## Table Tennis +_versions = ['TableTennis2D-v0', 'TableTennis4D-v0'] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_tt_promp = deepcopy(DEFAULT_BB_DICT_ProMP) + kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.MPWrapper) + kwargs_dict_tt_promp['name'] = _v + kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) + kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = True + kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = True + kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3 + kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 + kwargs_dict_tt_promp['black_box_kwargs']['duration'] = 2. + kwargs_dict_tt_promp['black_box_kwargs']['verbose'] = 2 + register( + id=_env_id, + entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_tt_promp + ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # # ## Walker2DJump # _versions = ['Walker2DJump-v0'] diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py index 3254b4d..2e84dab 100644 --- a/fancy_gym/envs/mujoco/__init__.py +++ b/fancy_gym/envs/mujoco/__init__.py @@ -8,3 +8,4 @@ from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .reacher.reacher import ReacherEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse +from .table_tennis.table_tennis_env import TableTennisEnv diff --git a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py index 8988f5a..17a11e1 100644 --- a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py @@ -28,10 +28,10 @@ class MPWrapper(RawInterfaceWrapper): return self.data.qvel[0:7].copy() # TODO: Fix this - def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: + def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None], bool]: if mp.learn_tau: self.release_step = action[0] / self.dt # Tau value - return action, None + return action, None, True def set_context(self, context): xyz = np.zeros(3) diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index 218cb97..41dd482 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -3,6 +3,7 @@ from typing import Union, Tuple import numpy as np from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound class MPWrapper(RawInterfaceWrapper): @@ -13,10 +14,8 @@ class MPWrapper(RawInterfaceWrapper): return np.hstack([ [False] * 7, # joints position [False] * 7, # joints velocity - [False] * 3, # position of box - [False] * 4, # orientation of box - [True] * 3, # position of target - [True] * 4, # orientation of target + [False] * 3, # position ball + [True] * 2, # target landing position # [True] * 1, # time ]) @@ -26,4 +25,28 @@ class MPWrapper(RawInterfaceWrapper): @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.data.qvel[:7].copy() \ No newline at end of file + return self.data.qvel[:7].copy() + + def episode_callback(self, action, pos_traj, vel_traj): + time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ + or action[1] > delay_bound[1] or action[1] < delay_bound[0] + if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): + return False + return True + + def invalid_traj_callback(self, action, pos_traj: np.ndarray, vel_traj: np.ndarray) \ + -> Tuple[np.ndarray, float, bool, dict]: + tau_invalid_penalty = np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]]) + delay_invalid_penalty = np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]]) + violate_high_bound_error = np.sum(np.maximum(pos_traj - jnt_pos_high, 0)) + violate_low_bound_error = np.sum(np.maximum(jnt_pos_low - pos_traj, 0)) + invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ + violate_high_bound_error + violate_low_bound_error + return self.get_obs(), -invalid_penalty, True, { + "hit_ball": [False], + "ball_returned_success": [False], + "land_dist_error": [10.], + "is_success": [False], + 'trajectory_length': 1, + "num_steps": [1] + } \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index fdde6f5..c384d9b 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -4,6 +4,8 @@ import numpy as np from gym import utils, spaces from gym.envs.mujoco import MujocoEnv +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import check_init_state_validity, magnus_force + import mujoco MAX_EPISODE_STEPS_TABLE_TENNIS = 250 @@ -22,10 +24,23 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): utils.EzPickle.__init__(**locals()) self._steps = 0 - self.hit_ball = False - self.ball_land_on_table = False + self._hit_ball = False + self._ball_land_on_table = False + self._ball_contact_after_hit = False + self._ball_return_success = False + self._ball_landing_pos = None + self._init_ball_state = None + self._episode_end = False + self._id_set = False + + # reward calculation self.ball_landing_pos = None + self._goal_pos = np.zeros(2) + self._ball_traj = [] + self._racket_traj = [] + + MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), frame_skip=frame_skip, @@ -40,11 +55,11 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) def _set_ids(self): - self._floor_contact_id = self.model.geom("floor").bodyid[0] - self._ball_contact_id = self.model.geom("target_ball_contact").bodyid[0] - self._bat_front_id = self.model.geom("bat").bodyid[0] - self._bat_back_id = self.model.geom("bat_back").bodyid[0] - self._table_contact_id = self.model.geom("table_tennis_table").bodyid[0] + self._floor_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "floor") + self._ball_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "target_ball_contact") + self._bat_front_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "bat") + self._bat_back_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "bat_back") + self._table_contact_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, "table_tennis_table") self._id_set = True def step(self, action): @@ -53,40 +68,55 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): unstable_simulation = False - done = False - for _ in range(self.frame_skip): try: - self.do_simulation(action, self.frame_skip) + self.do_simulation(action, 1) except Exception as e: print("Simulation get unstable return with MujocoException: ", e) unstable_simulation = True + self._episode_end = True + break - if not self.hit_ball: - self.hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \ + if not self._hit_ball: + self._hit_ball = self._contact_checker(self._ball_contact_id, self._bat_front_id) or \ self._contact_checker(self._ball_contact_id, self._bat_back_id) - if not self.hit_ball: + if not self._hit_ball: ball_land_on_floor_no_hit = self._contact_checker(self._ball_contact_id, self._floor_contact_id) if ball_land_on_floor_no_hit: - self.ball_landing_pos = self.data.body("target_ball").xpos.copy() - done = True - if self.hit_ball and not self.ball_contact_after_hit: - if not self.ball_contact_after_hit: + self._ball_landing_pos = self.data.body("target_ball").xpos.copy() + self._episode_end = True + if self._hit_ball and not self._ball_contact_after_hit: + if not self._ball_contact_after_hit: if self._contact_checker(self._ball_contact_id, self._floor_contact_id): # first check contact with floor - self.ball_contact_after_hit = True - self.ball_landing_pos = self.sim.data.geom("target_ball_contact").xpos.copy() + self._ball_contact_after_hit = True + self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy() + self._episode_end = True elif self._contact_checker(self._ball_contact_id, self._table_contact_id): # second check contact with table - self.ball_contact_after_hit = True - self.ball_landing_pos = self.sim.data.geom("target_ball_contact").xpos.copy() - if self.ball_landing_pos[0] < 0.: # ball lands on the opponent side - self.ball_return_success = True + self._ball_contact_after_hit = True + self._ball_landing_pos = self.data.geom("target_ball_contact").xpos.copy() + if self._ball_landing_pos[0] < 0.: # ball lands on the opponent side + self._ball_return_success = True + self._episode_end = True + + # update ball trajectory & racket trajectory + self._ball_traj.append(self.data.body("target_ball").xpos.copy()) + self._racket_traj.append(self.data.geom("bat").xpos.copy()) self._steps += 1 - episode_end = True if self._steps >= MAX_EPISODE_STEPS_TABLE_TENNIS else False + self._episode_end = True if self._steps >= MAX_EPISODE_STEPS_TABLE_TENNIS else self._episode_end - obs = self._get_obs() + reward = -25 if unstable_simulation else self._get_reward(self._episode_end) - return obs, 0., False, {} + land_dist_err = np.linalg.norm(self._ball_landing_pos[:-1] - self._goal_pos) \ + if self._ball_landing_pos is not None else 10. + + return self._get_obs(), reward, self._episode_end, { + "hit_ball": self._hit_ball, + "ball_returned_success": self._ball_return_success, + "land_dist_error": land_dist_err, + "is_success": self._ball_return_success and land_dist_err < 0.2, + "num_steps": self._steps, + } def _contact_checker(self, id_1, id_2): for coni in range(0, self.data.ncon): @@ -97,31 +127,94 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def reset_model(self): self._steps = 0 - new_context = self._sample_context() - self.data.joint("tar_x").qpos = new_context[0] - self.data.joint("tar_y").qpos = new_context[1] - self.data.joint("tar_z").qvel = 2. + self._init_ball_state = self._generate_valid_init_ball(random_pos=False, random_vel=False) + self._goal_pos = self.np_random.uniform(low=self.context_bounds[0][-2:], high=self.context_bounds[1][-2:]) + self.data.joint("tar_x").qpos = self._init_ball_state[0] + self.data.joint("tar_y").qpos = self._init_ball_state[1] + self.data.joint("tar_z").qpos = self._init_ball_state[2] + self.data.joint("tar_x").qvel = self._init_ball_state[3] + self.data.joint("tar_y").qvel = self._init_ball_state[4] + self.data.joint("tar_z").qvel = self._init_ball_state[5] - self.ball_landing_pos = None - self.hit_ball = False + self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) + + self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5]) + + mujoco.mj_forward(self.model, self.data) + + self._hit_ball = False + self._ball_land_on_table = False + self._ball_contact_after_hit = False + self._ball_return_success = False + self._ball_landing_pos = None + self._episode_end = False + self._ball_traj = [] + self._racket_traj = [] return self._get_obs() - def _sample_context(self): - return self.np_random.uniform(low=self.context_bounds[0], - high=self.context_bounds[1]) def _get_obs(self): obs = np.concatenate([ - self.data.qpos.flat[:7], - self.data.qvel.flat[:7], + self.data.qpos.flat[:7].copy(), + self.data.qvel.flat[:7].copy(), + self.data.joint("tar_x").qpos.copy(), + self.data.joint("tar_y").qpos.copy(), + self.data.joint("tar_z").qpos.copy(), + # self.data.body("target_ball").xvel.copy(), + self._goal_pos.copy(), ]) return obs + def get_obs(self): + return self._get_obs() + + def _get_reward(self, episode_end): + if not episode_end: + return 0 + else: + min_r_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj) - np.array(self._racket_traj), axis=1)) + if not self._hit_ball: + return 0.2 * (1 - np.tanh(min_r_b_dist**2)) + else: + if self._ball_landing_pos is None: + min_b_des_b_dist = np.min(np.linalg.norm(np.array(self._ball_traj)[:,:2] - self._goal_pos[:2], axis=1)) + return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2)) + else: + min_b_des_b_land_dist = np.linalg.norm(self._goal_pos[:2] - self._ball_landing_pos[:2]) + over_net_bonus = int(self._ball_landing_pos[0] < 0) + return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus + + def _generate_random_ball(self, random_pos=False, random_vel=False): + x_pos, y_pos, z_pos = -0.5, 0.35, 1.75 + x_vel, y_vel, z_vel = 2.5, 0., 0.5 + if random_pos: + x_pos = self.np_random.uniform(low=self.context_bounds[0][0], high=self.context_bounds[1][0], size=1) + y_pos = self.np_random.uniform(low=self.context_bounds[0][1], high=self.context_bounds[1][1], size=1) + if random_vel: + x_vel = self.np_random.uniform(low=2.0, high=3.0, size=1) + init_ball_state = np.array([x_pos, y_pos, z_pos, x_vel, y_vel, z_vel]) + return init_ball_state + + def _generate_valid_init_ball(self, random_pos=False, random_vel=False): + init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) + while not check_init_state_validity(init_ball_state): + init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) + return init_ball_state + + def check_traj_validity(self, traj): + raise NotImplementedError + + def get_invalid_steps(self, traj): + penalty = -100 + return self._get_obs(), penalty, True, {} if __name__ == "__main__": env = TableTennisEnv() - env.reset() for _ in range(1000): - for _ in range(200): + obs = env.reset() + for _ in range(2000): env.render("human") - env.step(env.action_space.sample()) + obs, reward, done, info = env.step(np.zeros(7)) + print(reward) + if done: + break diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py index 8b13789..b7d3eda 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_utils.py @@ -1 +1,51 @@ +import numpy as np +jnt_pos_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) +jnt_pos_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) +delay_bound = [0.05, 0.3] +tau_bound = [0.5, 1.5] + +net_height = 0.1 +table_height = 0.77 +table_x_min = -1.1 +table_x_max = 1.1 +table_y_min = -0.6 +table_y_max = 0.6 +g = 9.81 + +def check_init_state_validity(init_state): + assert len(init_state) == 6, "init_state must be a 6D vector (pos+vel),got {}".format(init_state) + x = init_state[0] + y = init_state[1] + z = init_state[2] - table_height + 0.1 + v_x = init_state[3] + v_y = init_state[4] + v_z = init_state[5] + + # check if the initial state is wrong + if x > -0.2: + return False + # check if the ball velocity direction is wrong + if v_x < 0.: + return False + # check if the ball can pass the net + t_n = (-2.*(-v_z)/g + np.sqrt(4*(v_z**2)/g**2 - 8*(net_height-z)/g))/2. + if x + v_x * t_n < 0.05: + return False + # check if ball landing position will violate x bounds + t_l = (-2.*(-v_z)/g + np.sqrt(4*(v_z**2)/g**2 + 8*(z)/g))/2. + if x + v_x * t_l > table_x_max: + return False + # check if ball landing position will violate y bounds + if y + v_y * t_l > table_y_max or y + v_y * t_l < table_y_min: + return False + + return True + +def magnus_force(top_spin=0.0, side_spin=0.0, v_ball=np.zeros(3), v_wind=np.zeros(3)): + rho = 1.225 # Air density + A = 1.256 * 10e-3 # Cross-section area of ball + C_l = 4.68 * 10e-4 - 2.0984 * 10e-5 * (np.linalg.norm(v_ball) - 50) # Lift force coeffient or simply 1.23 + w = np.array([0.0, top_spin, side_spin]) # Angular velocity of ball + f_m = 0.5 * rho * A * C_l * np.linalg.norm(v_ball-v_wind) * np.cross(w, v_ball-v_wind) + return f_m diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index e19eacb..19d4cd2 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -157,17 +157,18 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = True # DMP - example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("TableTennis4DProMP-v0", seed=10, iterations=5, render=render) # ProDMP - example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) + # example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) # Altered basis functions - obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - example_fully_custom_mp(seed=10, iterations=1, render=render) + # example_fully_custom_mp(seed=10, iterations=1, render=render)