diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 3732863..9d138cc 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -2,7 +2,6 @@ from typing import Tuple, Optional, Callable import gym import numpy as np -import torch from gym import spaces from mp_pytorch.mp.mp_interfaces import MPInterface @@ -23,8 +22,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): replanning_schedule: Optional[ Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, - max_planning_times = None, - desired_conditioning: bool = False + max_planning_times: int = np.inf, + condition_on_desired: bool = False ): """ gym.Wrapper for leveraging a black box approach with a trajectory generator. @@ -59,21 +58,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): # reward computation self.reward_aggregation = reward_aggregation - # self.traj_gen.basis_gn.show_basis(plot=True) # spaces self.return_context_observation = not (learn_sub_trajectories or self.do_replanning) - # self.return_context_observation = True self.traj_gen_action_space = self._get_traj_gen_action_space() self.action_space = self._get_action_space() - # no goal learning - # tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32) - self.action_space.low[0] = 0.8 - self.action_space.high[0] = 1.5 - self.action_space.low[1] = 0.05 - self.action_space.high[1] = 0.15 self.observation_space = self._get_observation_space() # rendering @@ -81,14 +70,12 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.verbose = verbose # condition value - self.desired_conditioning = False + self.condition_on_desired = condition_on_desired self.condition_pos = None self.condition_vel = None self.max_planning_times = max_planning_times - self.plan_counts = 0 - - self.tau_first_prediction = None + self.plan_steps = 0 def observation(self, observation): # return context space if we are @@ -98,9 +85,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): return observation.astype(self.observation_space.dtype) def get_trajectory(self, action: np.ndarray) -> Tuple: - # duration = self.duration - # duration = self.duration - self.current_traj_steps * self.dt - duration = action[0] + duration = self.duration if self.learn_sub_trajectories: duration = None # reset with every new call as we need to set all arguments, such as tau, delay, again. @@ -112,19 +97,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt) # TODO we could think about initializing with the previous desired value in order to have a smooth transition # at least from the planning point of view. - # self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel) - if self.current_traj_steps == 0: - self.condition_pos = self.current_pos - self.condition_vel = self.current_vel - # bc_time = torch.as_tensor(bc_time, dtype=torch.float32) - # self.condition_pos = torch.as_tensor(self.condition_pos, dtype=torch.float32) - # self.condition_vel = torch.as_tensor(self.condition_vel, dtype=torch.float32) - print("bc_time", bc_time) - print("self.condition_pos", self.condition_pos) - print("self.condition_vel", self.condition_vel) - self.traj_gen.set_boundary_conditions(bc_time, self.condition_pos, self.condition_vel) - # self.traj_gen.set_duration(duration, self.dt) + condition_pos = self.condition_pos if self.condition_pos is not None else self.current_pos + condition_vel = self.condition_vel if self.condition_vel is not None else self.current_vel + + self.traj_gen.set_boundary_conditions(bc_time, condition_pos, condition_vel) self.traj_gen.set_duration(duration, self.dt) # traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) position = get_numpy(self.traj_gen.get_traj_pos()) @@ -167,18 +144,10 @@ class BlackBoxWrapper(gym.ObservationWrapper): def step(self, action: np.ndarray): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" - # time_is_valid = self.env.check_time_validity(action) - # - # if time_valid: - # TODO remove this part, right now only needed for beer pong - # mp_params, env_spec_params, proceed = self.env.episode_callback(action, self.traj_gen) position, velocity = self.get_trajectory(action) - - print("position", position) - print("velocity", velocity) - - traj_is_valid = self.env.episode_callback(action, position, velocity) + position, velocity = self.env.set_episode_arguments(action, position, velocity) + traj_is_valid, position, velocity = self.env.preprocessing_and_validity_callback(action, position, velocity) trajectory_length = len(position) rewards = np.zeros(shape=(trajectory_length,)) @@ -190,14 +159,12 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False - if self.verbose >= 2: - desired_pos_traj = [] - desired_vel_traj = [] - pos_traj = [] - vel_traj = [] - - if traj_is_valid: - self.plan_counts += 1 + if traj_is_valid is False: + obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, + self.return_context_observation) + return self.observation(obs), trajectory_return, done, infos + else: + self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) @@ -213,24 +180,15 @@ class BlackBoxWrapper(gym.ObservationWrapper): elems[t] = v infos[k] = elems - if self.verbose >= 2: - desired_pos_traj.append(pos) - desired_vel_traj.append(vel) - pos_traj.append(self.current_pos) - vel_traj.append(self.current_vel) - if self.render_kwargs: self.env.render(**self.render_kwargs) if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps) - and self.max_planning_times is not None and self.plan_counts < self.max_planning_times): + t + 1 + self.current_traj_steps) + and self.plan_steps < self.max_planning_times): - # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: - # continue - - self.condition_pos = pos if self.desired_conditioning else self.current_pos - self.condition_vel = vel if self.desired_conditioning else self.current_vel + self.condition_pos = pos if self.condition_on_desired else None + self.condition_vel = vel if self.condition_on_desired else None break @@ -238,24 +196,15 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.current_traj_steps += t + 1 if self.verbose >= 2: - infos['desired_pos'] = position[:t+1] - infos['desired_vel'] = velocity[:t+1] - infos['current_pos'] = self.current_pos - infos['current_vel'] = self.current_vel + infos['positions'] = position + infos['velocities'] = velocity infos['step_actions'] = actions[:t + 1] infos['step_observations'] = observations[:t + 1] infos['step_rewards'] = rewards[:t + 1] - infos['desired_pos_traj'] = np.array(desired_pos_traj) - infos['desired_vel_traj'] = np.array(desired_vel_traj) - infos['pos_traj'] = np.array(pos_traj) - infos['vel_traj'] = np.array(vel_traj) infos['trajectory_length'] = t + 1 trajectory_return = self.reward_aggregation(rewards[:t + 1]) return self.observation(obs), trajectory_return, done, infos - else: - obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity) - return self.observation(obs), trajectory_return, done, infos def render(self, **kwargs): """Only set render options here, such that they can be used during the rollout. @@ -264,7 +213,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): self.current_traj_steps = 0 - self.plan_counts = 0 - self.tau_first_prediction = None + self.plan_steps = 0 self.traj_gen.reset() + self.condition_vel = None + self.condition_pos = None return super(BlackBoxWrapper, self).reset() diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py index de34346..f41faab 100644 --- a/fancy_gym/black_box/raw_interface_wrapper.py +++ b/fancy_gym/black_box/raw_interface_wrapper.py @@ -52,6 +52,19 @@ class RawInterfaceWrapper(gym.Wrapper): """ return self.env.dt + def preprocessing_and_validity_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) \ + -> Tuple[bool, np.ndarray, np.ndarray]: + """ + Used to preprocess the action and check if the desired trajectory is valid. + """ + return True, pos_traj, vel_traj + + def set_episode_arguments(self, action, pos_traj, vel_traj): + """ + Used to set the arguments for env that valid for the whole episode + """ + return pos_traj, vel_traj + def episode_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.array) -> Tuple[bool]: """ Used to extract the parameters for the movement primitive and other parameters from an action array which might @@ -68,7 +81,6 @@ class RawInterfaceWrapper(gym.Wrapper): def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: """ - Used to return a fake return from the environment if the desired trajectory is invalid. + Used to return a artificial return from the env if the desired trajectory is invalid. """ - obs = np.zeros(1) - return obs, 0, True, {} \ No newline at end of file + return np.zeros(1), 0, True, {} \ No newline at end of file diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index ef52785..ea9aec7 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -18,6 +18,8 @@ from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING +from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \ + MAX_EPISODE_STEPS_TABLE_TENNIS ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} @@ -28,9 +30,7 @@ DEFAULT_BB_DICT_ProMP = { 'trajectory_generator_type': 'promp' }, "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'learn_tau': False, - 'learn_delay': False, + 'phase_generator_type': 'linear' }, "controller_kwargs": { 'controller_type': 'motor', @@ -77,8 +77,6 @@ DEFAULT_BB_DICT_ProDMP = { }, "phase_generator_kwargs": { 'phase_generator_type': 'exp', - 'learn_delay': False, - 'learn_tau': False, }, "controller_kwargs": { 'controller_type': 'motor', @@ -91,9 +89,6 @@ DEFAULT_BB_DICT_ProDMP = { 'num_basis': 5, }, "black_box_kwargs": { - 'replanning_schedule': None, - 'max_planning_times': None, - 'verbose': 2 } } @@ -255,15 +250,28 @@ for ctxt_dim in [2, 4]: register( id='TableTennis{}D-v0'.format(ctxt_dim), entry_point='fancy_gym.envs.mujoco:TableTennisEnv', - max_episode_steps=350, + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, kwargs={ "ctxt_dim": ctxt_dim, 'frame_skip': 4, - 'enable_wind': False, - 'enable_switching_goal': False, + 'goal_switching_step': None, + 'enable_artificial_wind': False, } ) +register( + id='TableTennisWind-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisWind', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, +) + +register( + id='TableTennisGoalSwitching-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, +) + + # movement Primitive Environments ## Simple Reacher @@ -510,24 +518,22 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) kwargs_dict_box_pushing_prodmp['name'] = _v kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) - # kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = np.array([3.4944e+01, 4.3734e+01, 9.6711e+01, 2.4429e+02, 5.8272e+02]) - # kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 3.1264e-01 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desired'] = True register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', @@ -536,21 +542,25 @@ for _v in _versions: ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) ## Table Tennis -_versions = ['TableTennis2D-v0', 'TableTennis4D-v0'] +_versions = ['TableTennis2D-v0', 'TableTennis4D-v0', 'TableTennisWind-v0', 'TableTennisGoalSwitching-v0'] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_tt_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.MPWrapper) + if _v == 'TableTennisWind-v0': + kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) + else: + kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) kwargs_dict_tt_promp['name'] = _v kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = True + kwargs_dict_tt_promp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] + kwargs_dict_tt_promp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_goal'] = 1 - kwargs_dict_tt_promp['black_box_kwargs']['duration'] = 2. kwargs_dict_tt_promp['black_box_kwargs']['verbose'] = 2 register( id=_env_id, @@ -561,22 +571,24 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.MPWrapper) + if _v == 'TableTennisWind-v0': + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) + else: + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) kwargs_dict_tt_prodmp['name'] = _v kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['weights_scale'] = 1.0 - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_scale'] = 1.0 - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = False kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 + kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] + kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. - kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try - kwargs_dict_tt_prodmp['basis_generator_kwargs']['pre_compute_length_factor'] = 5 + kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 50 == 0 diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py index 2e84dab..ff51711 100644 --- a/fancy_gym/envs/mujoco/__init__.py +++ b/fancy_gym/envs/mujoco/__init__.py @@ -8,4 +8,4 @@ from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .reacher.reacher import ReacherEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse -from .table_tennis.table_tennis_env import TableTennisEnv +from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 32be3bf..275bba1 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -71,9 +71,6 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): qpos = self.data.qpos[:7].copy() qvel = self.data.qvel[:7].copy() - if (self._steps + 1) % 10 == 0: - self.calculateOfflineIK(np.array([0.4, 0.3, 0.14]), np.array([0, 1, 0, 0])) - if not unstable_simulation: reward = self._get_reward(episode_end, box_pos, box_quat, target_pos, target_quat, rod_tip_pos, rod_quat, qpos, qvel, action) @@ -96,9 +93,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): def reset_model(self): # rest box to initial position self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing) - random_init_pos = self.sample_context() - # box_init_pos = np.array([0.4, 0.3, -0.01, 0.0, 0.0, 0.0, 1.0]) - box_init_pos = random_init_pos + box_init_pos = np.array([0.4, 0.3, -0.01, 0.0, 0.0, 0.0, 1.0]) self.data.joint("box_joint").qpos = box_init_pos # set target position @@ -224,10 +219,8 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): q_old = q q = q + dt * qd_d q = np.clip(q, q_min, q_max) - self.data.qpos[:7] = q mujoco.mj_forward(self.model, self.data) - current_cart_pos = self.data.body("tcp").xpos.copy() current_cart_quat = self.data.body("tcp").xquat.copy() @@ -239,35 +232,27 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): err = np.hstack((cart_pos_error, cart_quat_error)) err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2) - if err_norm > old_err_norm: - # old_err_norm = err_norm q = q_old dt = 0.7 * dt - continue - else: dt = 1.025 * dt - i += 1 - if err_norm < eps: - print("IK converged in {} iterations".format(i)) break - if i > IT_MAX: - print("IK did not converge in {} iterations".format(i)) break old_err_norm = err_norm - ### get Jacobian by mujoco self.data.qpos[:7] = q mujoco.mj_forward(self.model, self.data) + jacp = self.get_body_jacp("tcp")[:, :7].copy() jacr = self.get_body_jacr("tcp")[:, :7].copy() + J = np.concatenate((jacp, jacr), axis=0) Jw = J.dot(w) @@ -291,7 +276,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): qd_d = w.dot(J.transpose()).dot(qd_d) + qd_null - # i += 1 + i += 1 return q @@ -375,14 +360,3 @@ class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase): reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward return reward - -if __name__=="__main__": - env = BoxPushingTemporalSpatialSparse(frame_skip=10) - env.reset() - for i in range(100): - env.reset() - for _ in range(100): - env.render("human") - action = env.action_space.sample() - obs, reward, done, info = env.step(action) - # print("info: {}".format(info)) diff --git a/fancy_gym/envs/mujoco/table_tennis/__init__.py b/fancy_gym/envs/mujoco/table_tennis/__init__.py index 989b5a9..1438432 100644 --- a/fancy_gym/envs/mujoco/table_tennis/__init__.py +++ b/fancy_gym/envs/mujoco/table_tennis/__init__.py @@ -1 +1 @@ -from .mp_wrapper import MPWrapper \ No newline at end of file +from .mp_wrapper import TT_MPWrapper, TTVelObs_MPWrapper diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index 9564a7d..e33ed6c 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -6,7 +6,7 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound -class MPWrapper(RawInterfaceWrapper): +class TT_MPWrapper(RawInterfaceWrapper): # Random x goal + random init pos @property @@ -16,7 +16,7 @@ class MPWrapper(RawInterfaceWrapper): [False] * 7, # joints velocity [True] * 2, # position ball x, y [False] * 1, # position ball z - # [False] * 3, # velocity ball x, y, z + #[True] * 3, # velocity ball x, y, z [True] * 2, # target landing position # [True] * 1, # time ]) @@ -29,46 +29,26 @@ class MPWrapper(RawInterfaceWrapper): def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: return self.data.qvel[:7].copy() - # def check_time_validity(self, action): - # return action[0] <= tau_bound[1] and action[0] >= tau_bound[0] \ - # and action[1] <= delay_bound[1] and action[1] >= delay_bound[0] - # - # def time_invalid_traj_callback(self, action, pos_traj, vel_traj) \ - # -> Tuple[np.ndarray, float, bool, dict]: - # tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) - # delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) - # invalid_penalty = tau_invalid_penalty + delay_invalid_penalty - # obs = np.concatenate([self.get_obs(), np.array([0])]) - # return obs, -invalid_penalty, True, { - # "hit_ball": [False], - # "ball_returned_success": [False], - # "land_dist_error": [10.], - # "is_success": [False], - # 'trajectory_length': 1, - # "num_steps": [1] - # } + def preprocessing_and_validity_callback(self, action, pos_traj, vel_traj): + return self.check_traj_validity(action, pos_traj, vel_traj) - def episode_callback(self, action, pos_traj, vel_traj): - time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ - or action[1] > delay_bound[1] or action[1] < delay_bound[0] - if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): - return False - return True + def set_episode_arguments(self, action, pos_traj, vel_traj): + return pos_traj, vel_traj - def invalid_traj_callback(self, action, pos_traj: np.ndarray, vel_traj: np.ndarray) \ - -> Tuple[np.ndarray, float, bool, dict]: - tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) - delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) - violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) - violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0)) - invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ - violate_high_bound_error + violate_low_bound_error - obs = np.concatenate([self.get_obs(), np.array([0])]) - return obs, -invalid_penalty, True, { - "hit_ball": [False], - "ball_returned_success": [False], - "land_dist_error": [10.], - "is_success": [False], - 'trajectory_length': 1, - "num_steps": [1] - } + def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray, + return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]: + return self.get_invalid_traj_step_return(action, pos_traj, return_contextual_obs) + +class TTVelObs_MPWrapper(TT_MPWrapper): + + @property + def context_mask(self): + return np.hstack([ + [False] * 7, # joints position + [False] * 7, # joints velocity + [True] * 2, # position ball x, y + [False] * 1, # position ball z + [True] * 3, # velocity ball x, y, z + [True] * 2, # target landing position + # [True] * 1, # time + ]) \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 72b92e0..e574a38 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -5,10 +5,11 @@ from gym import utils, spaces from gym.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import check_init_state_validity, magnus_force +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound import mujoco -MAX_EPISODE_STEPS_TABLE_TENNIS = 250 +MAX_EPISODE_STEPS_TABLE_TENNIS = 350 CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]]) CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65], @@ -21,11 +22,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): """ 7 DoF table tennis environment """ - def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, - enable_switching_goal: bool = False, - enable_wind: bool = False, enable_magnus: bool = False, - enable_air: bool = False): + goal_switching_step: int = None, + enable_artificial_wind: bool = False): utils.EzPickle.__init__(**locals()) self._steps = 0 @@ -45,8 +44,11 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self._ball_traj = [] self._racket_traj = [] + self._goal_switching_step = goal_switching_step - self._enable_goal_switching = enable_switching_goal + self._enable_artificial_wind = enable_artificial_wind + + self._artificial_force = 0. MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), @@ -56,20 +58,13 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.context_bounds = CONTEXT_BOUNDS_2DIMS elif ctxt_dim == 4: self.context_bounds = CONTEXT_BOUNDS_4DIMS - if self._enable_goal_switching: + if self._goal_switching_step is not None: self.context_bounds = CONTEXT_BOUNDS_SWICHING else: raise NotImplementedError self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) - # complex dynamics settings - if enable_air: - self.model.opt.density = 1.225 - self.model.opt.viscosity = 2.27e-5 - - self._enable_wind = enable_wind - self._enable_magnus = enable_magnus self._wind_vel = np.zeros(3) def _set_ids(self): @@ -86,13 +81,16 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): unstable_simulation = False - if self._enable_goal_switching: - if self._steps == 45 and self.np_random.uniform(0, 1) < 0.5: - self._goal_pos[1] = -self._goal_pos[1] + if self._steps == self._goal_switching_step and self.np_random.uniform(0, 1) < 0.5: + new_goal_pos = self._generate_goal_pos(random=True) + new_goal_pos[1] = -new_goal_pos[1] + self._goal_pos = new_goal_pos self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) mujoco.mj_forward(self.model, self.data) for _ in range(self.frame_skip): + if self._enable_artificial_wind: + self.data.qfrc_applied[-2] = self._artificial_force try: self.do_simulation(action, 1) except Exception as e: @@ -160,16 +158,16 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_y").qvel = self._init_ball_state[4] self.data.joint("tar_z").qvel = self._init_ball_state[5] + if self._enable_artificial_wind: + self._artificial_force = self.np_random.uniform(low=-0.1, high=0.1) + self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5]) + self.data.qvel[:7] = np.zeros(7) mujoco.mj_forward(self.model, self.data) - if self._enable_wind: - self._wind_vel[1] = self.np_random.uniform(low=-5, high=5, size=1) - self.model.opt.wind[:3] = self._wind_vel - self._hit_ball = False self._ball_land_on_table = False self._ball_contact_after_hit = False @@ -193,10 +191,6 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_x").qpos.copy(), self.data.joint("tar_y").qpos.copy(), self.data.joint("tar_z").qpos.copy(), - # self.data.joint("tar_x").qvel.copy(), - # self.data.joint("tar_y").qvel.copy(), - # self.data.joint("tar_z").qvel.copy(), - # self.data.body("target_ball").xvel.copy(), self._goal_pos.copy(), ]) return obs @@ -237,51 +231,55 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) return init_ball_state -def plot_ball_traj(x_traj, y_traj, z_traj): - import matplotlib.pyplot as plt - fig = plt.figure() - ax = fig.add_subplot(111, projection='3d') - ax.plot(x_traj, y_traj, z_traj) - plt.show() + def _get_traj_invalid_penalty(self, action, pos_traj): + tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) + delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) + violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) + violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0)) + invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ + violate_high_bound_error + violate_low_bound_error + return -invalid_penalty -def plot_ball_traj_2d(x_traj, y_traj): - import matplotlib.pyplot as plt - fig = plt.figure() - ax = fig.add_subplot(111) - ax.plot(x_traj, y_traj) - plt.show() + def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs): + obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj + penalty = self._get_traj_invalid_penalty(action, pos_traj) + return obs, penalty, True, { + "hit_ball": [False], + "ball_returned_success": [False], + "land_dist_error": [10.], + "is_success": [False], + "trajectory_length": 1, + "num_steps": [1], + } -def plot_single_axis(traj, title): - import matplotlib.pyplot as plt - fig = plt.figure() - ax = fig.add_subplot(111) - ax.plot(traj) - ax.set_title(title) - plt.show() + @staticmethod + def check_traj_validity(action, pos_traj, vel_traj): + time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ + or action[1] > delay_bound[1] or action[1] < delay_bound[0] + if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): + return False, pos_traj, vel_traj + return True, pos_traj, vel_traj -if __name__ == "__main__": - env = TableTennisEnv(enable_air=True) - # env_with_air = TableTennisEnv(enable_air=True) - for _ in range(1): - obs1 = env.reset() - # obs2 = env_with_air.reset() - x_pos = [] - y_pos = [] - z_pos = [] - x_vel = [] - y_vel = [] - z_vel = [] - for _ in range(2000): - obs, reward, done, info = env.step(np.zeros(7)) - # _, _, _, _ = env_no_air.step(np.zeros(7)) - x_pos.append(env.data.joint("tar_x").qpos[0]) - y_pos.append(env.data.joint("tar_y").qpos[0]) - z_pos.append(env.data.joint("tar_z").qpos[0]) - x_vel.append(env.data.joint("tar_x").qvel[0]) - y_vel.append(env.data.joint("tar_y").qvel[0]) - z_vel.append(env.data.joint("tar_z").qvel[0]) - # print(reward) - if done: - # plot_ball_traj_2d(x_pos, y_pos) - plot_single_axis(x_pos, title="x_vel without air") - break + +class TableTennisWind(TableTennisEnv): + def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4): + super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, enable_artificial_wind=True) + + def _get_obs(self): + obs = np.concatenate([ + self.data.qpos.flat[:7].copy(), + self.data.qvel.flat[:7].copy(), + self.data.joint("tar_x").qpos.copy(), + self.data.joint("tar_y").qpos.copy(), + self.data.joint("tar_z").qpos.copy(), + self.data.joint("tar_x").qvel.copy(), + self.data.joint("tar_y").qvel.copy(), + self.data.joint("tar_z").qvel.copy(), + self._goal_pos.copy(), + ]) + return obs + + +class TableTennisGoalSwitching(TableTennisEnv): + def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99): + super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step) diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index 392e9d4..977ce9e 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -1,38 +1,62 @@ import fancy_gym -import numpy as np -import matplotlib.pyplot as plt -def plot_trajectory(traj): - plt.figure() - plt.plot(traj[:, 3]) - plt.legend() - plt.show() - -def run_replanning_envs(env_name="BoxPushingProDMP-v0", seed=1, iterations=1, render=True): +def example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False): env = fancy_gym.make(env_name, seed=seed) env.reset() for i in range(iterations): done = False - desired_pos_traj = np.zeros((100, 7)) - desired_vel_traj = np.zeros((100, 7)) - real_pos_traj = np.zeros((100, 7)) - real_vel_traj = np.zeros((100, 7)) - t = 0 while done is False: ac = env.action_space.sample() obs, reward, done, info = env.step(ac) - desired_pos_traj[t: t + 25, :] = info['desired_pos'] - desired_vel_traj[t: t + 25, :] = info['desired_vel'] - # real_pos_traj.append(info['current_pos']) - # real_vel_traj.append(info['current_vel']) - t += 25 if render: env.render(mode="human") if done: env.reset() - plot_trajectory(desired_pos_traj) env.close() del env +def example_custom_replanning_envs(seed=0, iteration=100, render=True): + # id for a step-based environment + base_env_id = "BoxPushingDense-v0" + + wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] + + trajectory_generator_kwargs = {'trajectory_generator_type': 'prodmp', + 'weight_scale': 1} + phase_generator_kwargs = {'phase_generator_type': 'exp'} + controller_kwargs = {'controller_type': 'velocity'} + basis_generator_kwargs = {'basis_generator_type': 'prodmp', + 'num_basis': 5} + + # max_planning_times: the maximum number of plans can be generated + # replanning_schedule: the trigger for replanning + # condition_on_desired: use desired state as the boundary condition for the next plan + black_box_kwargs = {'max_planning_times': 4, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, + 'condition_on_desired': True} + + env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, + traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + seed=seed) + if render: + env.render(mode="human") + + obs = env.reset() + + for i in range(iteration): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + if done: + env.reset() + + env.close() + del env + + if __name__ == "__main__": - run_replanning_envs(env_name="BoxPushingDenseProDMP-v0", seed=1, iterations=1, render=False) \ No newline at end of file + # run a registered replanning environment + example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False) + + # run a custom replanning environment + example_custom_replanning_envs(seed=0, iteration=8, render=True) \ No newline at end of file diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index e632f54..b533e9c 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -17,8 +17,6 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True # It takes care of seeding and enables the use of a variety of external environments using the gym interface. env = fancy_gym.make(env_name, seed) - # env.traj_gen.basis_gn.show_basis(plot=True) - returns = 0 # env.render(mode=None) obs = env.reset() @@ -40,22 +38,16 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True # Now the action space is not the raw action but the parametrization of the trajectory generator, # such as a ProMP ac = env.action_space.sample() - # ac[0] = 0.6866657733917236 - # ac[1] = 0.08587364107370377 # This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal # to the return of a trajectory. Default is the sum over the step-wise rewards. - print(f'target obs: {obs[-3:]}') obs, reward, done, info = env.step(ac) - print(f'steps: {info["num_steps"][-1]}') # Aggregated returns returns += reward if done: - # print(reward) + print(reward) obs = env.reset() - print("=================New Episode======================") - # print("steps: {}".format(info["num_steps"][-1])) def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render=True): @@ -163,21 +155,22 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = True + render = False # DMP - # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) - # example_mp("TableTennis4DProMP-v0", seed=10, iterations=10, render=True) + example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) - # ProDMP - # example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) - example_mp("TableTennis4DProDMP-v0", seed=10, iterations=5000, render=render) + # ProDMP with Replanning + example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) + example_mp("TableTennis4DReplanProDMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennisWindReplanProDMP-v0", seed=10, iterations=20, render=render) # Altered basis functions - # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - # example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp(seed=10, iterations=1, render=render) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 3c73ba9..2e04d71 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -168,11 +168,11 @@ def make_bb( # set tau bounds to minimum of two env steps otherwise computing the velocity is not possible. # maximum is full duration of one episode. - if phase_kwargs.get('learn_tau'): + if phase_kwargs.get('learn_tau') and phase_kwargs.get('tau_bound') is None: phase_kwargs["tau_bound"] = [env.dt * 2, black_box_kwargs['duration']] # Max delay is full duration minus two steps due to above reason - if phase_kwargs.get('learn_delay'): + if phase_kwargs.get('learn_delay') and phase_kwargs.get('delay_bound') is None: phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] phase_gen = get_phase_generator(**phase_kwargs) diff --git a/setup.py b/setup.py index 36dbe56..20e4f3e 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ setup( ], extras_require=extras, install_requires=[ - 'gym[mujoco]<0.25.0,>=0.24.0', + 'gym[mujoco]<0.25.0,>=0.24.1', 'mp_pytorch @ git+https://github.com/ALRhub/MP_PyTorch.git@main' ], packages=[package for package in find_packages() if package.startswith("fancy_gym")], diff --git a/test/test_black_box.py b/test/test_black_box.py index d5e3a88..5ade1ae 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -67,28 +67,32 @@ def test_missing_wrapper(env_id: str): fancy_gym.make_bb(env_id, [], {}, {}, {}, {}, {}) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) def test_missing_local_state(mp_type: str): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() with pytest.raises(NotImplementedError): env.step(env.action_space.sample()) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) @pytest.mark.parametrize('verbose', [1, 2]) def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]], verbose: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env_id, wrapper_class = env_wrap env = fancy_gym.make_bb(env_id, [wrapper_class], {'verbose': verbose}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() info_keys = list(env.step(env.action_space.sample())[3].keys()) @@ -104,15 +108,17 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] assert all(e in info_keys for e in mp_keys) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env_id, wrapper_class = env_wrap env = fancy_gym.make_bb(env_id, [wrapper_class], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) for _ in range(5): env.reset() @@ -121,14 +127,15 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): assert length == env.spec.max_episode_steps -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('reward_aggregation', [np.sum, np.mean, np.median, lambda x: np.mean(x[::2])]) def test_aggregation(mp_type: str, reward_aggregation: Callable[[np.ndarray], float]): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'reward_aggregation': reward_aggregation}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() # ToyEnv only returns 1 as reward assert env.step(env.action_space.sample())[1] == reward_aggregation(np.ones(50, )) @@ -149,12 +156,13 @@ def test_context_space(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapp assert env.observation_space.shape == wrapper.context_mask[wrapper.context_mask].shape -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('num_dof', [0, 1, 2, 5]) @pytest.mark.parametrize('num_basis', [0, 1, 2, 5]) @pytest.mark.parametrize('learn_tau', [True, False]) @pytest.mark.parametrize('learn_delay', [True, False]) def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: bool, learn_delay: bool): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, {'trajectory_generator_type': mp_type, 'action_dim': num_dof @@ -164,28 +172,29 @@ def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: boo 'learn_tau': learn_tau, 'learn_delay': learn_delay }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, 'num_basis': num_basis }) base_dims = num_dof * num_basis - additional_dims = num_dof if mp_type == 'dmp' else 0 + additional_dims = num_dof if 'dmp' in mp_type else 0 traj_modification_dims = int(learn_tau) + int(learn_delay) assert env.action_space.shape[0] == base_dims + traj_modification_dims + additional_dims -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('a', [1]) @pytest.mark.parametrize('b', [1.0]) @pytest.mark.parametrize('c', [[1], [1.0], ['str'], [{'a': 'b'}], [np.ones(3, )]]) @pytest.mark.parametrize('d', [{'a': 1}, {1: 2.0}, {'a': [1.0]}, {'a': np.ones(3, )}, {'a': {'a': 'b'}}]) @pytest.mark.parametrize('e', [Object()]) def test_change_env_kwargs(mp_type: str, a: int, b: float, c: list, d: dict, e: Object): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, + {'basis_generator_type': basis_generator_type}, a=a, b=b, c=c, d=d, e=e ) assert a is env.a @@ -196,18 +205,20 @@ def test_change_env_kwargs(mp_type: str, a: int, b: float, c: list, d: dict, e: assert e is env.e -@pytest.mark.parametrize('mp_type', ['promp']) +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1]) def test_learn_tau(mp_type: str, tau: float): + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': True, 'learn_delay': False }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) d = True @@ -228,26 +239,29 @@ def test_learn_tau(mp_type: str, tau: float): vel = info['velocities'].flatten() # Check end is all same (only true for linear basis) - assert np.all(pos[tau_time_steps:] == pos[-1]) - assert np.all(vel[tau_time_steps:] == vel[-1]) + if phase_generator_type == "linear": + assert np.all(pos[tau_time_steps:] == pos[-1]) + assert np.all(vel[tau_time_steps:] == vel[-1]) # Check active trajectory section is different to end values assert np.all(pos[:tau_time_steps - 1] != pos[-1]) assert np.all(vel[:tau_time_steps - 2] != vel[-1]) - - -@pytest.mark.parametrize('mp_type', ['promp']) +# +# +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75]) def test_learn_delay(mp_type: str, delay: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': False, 'learn_delay': True }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) d = True @@ -274,21 +288,23 @@ def test_learn_delay(mp_type: str, delay: float): # Check active trajectory section is different to beginning values assert np.all(pos[max(1, delay_time_steps):] != pos[0]) assert np.all(vel[max(1, delay_time_steps)] != vel[0]) - - -@pytest.mark.parametrize('mp_type', ['promp']) +# +# +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1]) @pytest.mark.parametrize('delay', [0.25, 0.5, 0.75, 1]) def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': True, 'learn_delay': True }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) if env.spec.max_episode_steps * env.dt < delay + tau: @@ -315,8 +331,9 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): vel = info['velocities'].flatten() # Check end is all same (only true for linear basis) - assert np.all(pos[joint_time_steps:] == pos[-1]) - assert np.all(vel[joint_time_steps:] == vel[-1]) + if phase_generator_type == "linear": + assert np.all(pos[joint_time_steps:] == pos[-1]) + assert np.all(vel[joint_time_steps:] == vel[-1]) # Check beginning is all same (only true for linear basis) assert np.all(pos[:delay_time_steps - 1] == pos[0]) @@ -326,4 +343,4 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): active_pos = pos[delay_time_steps: joint_time_steps - 1] active_vel = vel[delay_time_steps: joint_time_steps - 2] assert np.all(active_pos != pos[-1]) and np.all(active_pos != pos[0]) - assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) + assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) \ No newline at end of file diff --git a/test/test_replanning_envs.py b/test/test_replanning_envs.py deleted file mode 100644 index 300faed..0000000 --- a/test/test_replanning_envs.py +++ /dev/null @@ -1,23 +0,0 @@ -from itertools import chain - -import pytest - -import fancy_gym -from test.utils import run_env, run_env_determinism - -Fancy_ProDMP_IDS = fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] - -All_ProDMP_IDS = fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] - - - -@pytest.mark.parametrize('env_id', All_ProDMP_IDS) -def test_replanning_envs(env_id: str): - """Tests that ProDMP environments run without errors using random actions.""" - run_env(env_id) - -@pytest.mark.parametrize('env_id', All_ProDMP_IDS) -def test_replanning_determinism(env_id: str): - """Tests that ProDMP environments are deterministic.""" - run_env_determinism(env_id, 0) - diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index a42bb65..9d04d02 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -98,7 +98,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter assert length <= np.round(env.traj_gen.tau.numpy() / env.dt) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) @pytest.mark.parametrize('add_time_aware_wrapper_before', [True, False]) @pytest.mark.parametrize('replanning_time', [10, 100, 1000]) @@ -114,11 +114,14 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra replanning_schedule = lambda c_pos, c_vel, obs, c_action, t: t % replanning_time == 0 + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if 'dmp' in mp_type else 'linear' + env = fancy_gym.make_bb(env_id, [wrapper_class], {'replanning_schedule': replanning_schedule, 'verbose': 2}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, - {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, seed=SEED) + {'phase_generator_type': phase_generator_type}, + {'basis_generator_type': basis_generator_type}, seed=SEED) assert env.do_replanning assert callable(env.replanning_schedule) @@ -142,3 +145,189 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra env.reset() assert replanning_schedule(None, None, None, None, length) + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + _, _, d, _ = env.step(env.action_space.sample()) + planning_times += 1 + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +@pytest.mark.parametrize('tau', [0.5, 1.0, 1.5, 2.0]) +def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_segment_steps: int, tau: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': True, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = tau + _, _, d, info = env.step(action) + planning_times += 1 + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +@pytest.mark.parametrize('delay', [0.1, 0.25, 0.5, 0.75]) +def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_segment_steps: int, delay: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = delay + _, _, d, info = env.step(action) + + delay_time_steps = int(np.round(delay / env.dt)) + pos = info['positions'].flatten() + vel = info['velocities'].flatten() + + # Check beginning is all same (only true for linear basis) + if planning_times == 0: + assert np.all(pos[:max(1, delay_time_steps - 1)] == pos[0]) + assert np.all(vel[:max(1, delay_time_steps - 2)] == vel[0]) + + # only valid when delay < sub_segment_steps + elif planning_times > 0 and delay_time_steps < sub_segment_steps: + assert np.all(pos[1:max(1, delay_time_steps - 1)] != pos[0]) + assert np.all(vel[1:max(1, delay_time_steps - 2)] != vel[0]) + + # Check active trajectory section is different to beginning values + assert np.all(pos[max(1, delay_time_steps):] != pos[0]) + assert np.all(vel[max(1, delay_time_steps)] != vel[0]) + + planning_times += 1 + + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10, 15]) +@pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75]) +@pytest.mark.parametrize('tau', [0.5, 0.75, 1.0]) +def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: int, sub_segment_steps: int, + delay: float, tau: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': True, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = tau + action[1] = delay + _, _, d, info = env.step(action) + + delay_time_steps = int(np.round(delay / env.dt)) + + pos = info['positions'].flatten() + vel = info['velocities'].flatten() + + # Delay only applies to first planning time + if planning_times == 0: + # Check delay is applied + assert np.all(pos[:max(1, delay_time_steps - 1)] == pos[0]) + assert np.all(vel[:max(1, delay_time_steps - 2)] == vel[0]) + # Check active trajectory section is different to beginning values + assert np.all(pos[max(1, delay_time_steps):] != pos[0]) + assert np.all(vel[max(1, delay_time_steps)] != vel[0]) + + planning_times += 1 + + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + for i in range(max_planning_times): + _, _, d, _ = env.step(env.action_space.sample()) + assert d