Merge pull request #73 from HongyiZhouCN/69-mp-params-validity

1. The tau and delay bounds can be correctly set in init.py 
2. Add a random initialization flag to all box-pushing environments
This commit is contained in:
Hongyi Zhou 2023-09-05 10:24:37 +02:00 committed by GitHub
commit 70e2404452
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 69 additions and 37 deletions

1
.gitignore vendored
View File

@ -106,6 +106,7 @@ venv.bak/
# pycharm # pycharm
.DS_Store .DS_Store
.idea .idea
.vscode
#configs #configs
/configs/db.cfg /configs/db.cfg

View File

@ -55,6 +55,14 @@ class BlackBoxWrapper(gym.ObservationWrapper):
# self.traj_gen.set_mp_times(self.time_steps) # self.traj_gen.set_mp_times(self.time_steps)
self.traj_gen.set_duration(self.duration, self.dt) self.traj_gen.set_duration(self.duration, self.dt)
# check
self.tau_bound = [-np.inf, np.inf]
self.delay_bound = [-np.inf, np.inf]
if hasattr(self.traj_gen.phase_gn, "tau_bound"):
self.tau_bound = self.traj_gen.phase_gn.tau_bound
if hasattr(self.traj_gen.phase_gn, "delay_bound"):
self.delay_bound = self.traj_gen.phase_gn.delay_bound
# reward computation # reward computation
self.reward_aggregation = reward_aggregation self.reward_aggregation = reward_aggregation
@ -139,7 +147,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
position, velocity = self.get_trajectory(action) position, velocity = self.get_trajectory(action)
position, velocity = self.env.set_episode_arguments(action, position, velocity) position, velocity = self.env.set_episode_arguments(action, position, velocity)
traj_is_valid, position, velocity = self.env.preprocessing_and_validity_callback(action, position, velocity) traj_is_valid, position, velocity = self.env.preprocessing_and_validity_callback(action, position, velocity,
self.tau_bound, self.delay_bound)
trajectory_length = len(position) trajectory_length = len(position)
rewards = np.zeros(shape=(trajectory_length,)) rewards = np.zeros(shape=(trajectory_length,))
@ -153,7 +162,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
if not traj_is_valid: if not traj_is_valid:
obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity,
self.return_context_observation) self.return_context_observation,
self.tau_bound, self.delay_bound)
return self.observation(obs), trajectory_return, done, infos return self.observation(obs), trajectory_return, done, infos
self.plan_steps += 1 self.plan_steps += 1

View File

@ -52,7 +52,8 @@ class RawInterfaceWrapper(gym.Wrapper):
""" """
return self.env.dt return self.env.dt
def preprocessing_and_validity_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) \ def preprocessing_and_validity_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray,
tau_bound: list = None, delay_bound: list = None ) \
-> Tuple[bool, np.ndarray, np.ndarray]: -> Tuple[bool, np.ndarray, np.ndarray]:
""" """
Used to preprocess the action and check if the desired trajectory is valid. Used to preprocess the action and check if the desired trajectory is valid.
@ -61,6 +62,8 @@ class RawInterfaceWrapper(gym.Wrapper):
specified, else only traj_gen parameters specified, else only traj_gen parameters
pos_traj: a vector instance of the raw position trajectory pos_traj: a vector instance of the raw position trajectory
vel_traj: a vector instance of the raw velocity trajectory vel_traj: a vector instance of the raw velocity trajectory
tau_bound: a list of two elements, the lower and upper bound of the trajectory length scaling factor
delay_bound: a list of two elements, the lower and upper bound of the time to wait before execute
Returns: Returns:
validity flag: bool, True if the raw trajectory is valid, False if not validity flag: bool, True if the raw trajectory is valid, False if not
pos_traj: a vector instance of the preprocessed position trajectory pos_traj: a vector instance of the preprocessed position trajectory
@ -97,7 +100,8 @@ class RawInterfaceWrapper(gym.Wrapper):
""" """
return True return True
def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray,
tau_bound: list, delay_bound: list) -> Tuple[np.ndarray, float, bool, dict]:
""" """
Used to return a artificial return from the env if the desired trajectory is invalid. Used to return a artificial return from the env if the desired trajectory is invalid.
Args: Args:
@ -105,6 +109,8 @@ class RawInterfaceWrapper(gym.Wrapper):
specified, else only traj_gen parameters specified, else only traj_gen parameters
pos_traj: a vector instance of the raw position trajectory pos_traj: a vector instance of the raw position trajectory
vel_traj: a vector instance of the raw velocity trajectory vel_traj: a vector instance of the raw velocity trajectory
tau_bound: a list of two elements, the lower and upper bound of the trajectory length scaling factor
delay_bound: a list of two elements, the lower and upper bound of the time to wait before execute
Returns: Returns:
obs: artificial observation if the trajectory is invalid, by default a zero vector obs: artificial observation if the trajectory is invalid, by default a zero vector
reward: artificial reward if the trajectory is invalid, by default 0 reward: artificial reward if the trajectory is invalid, by default 0

View File

@ -237,6 +237,12 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]:
entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type),
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING,
) )
register(
id='BoxPushingRandomInit{}-v0'.format(reward_type),
entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type),
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING,
kwargs={"random_init": True}
)
# Here we use the same reward as in BeerPong-v0, but now consider after the release, # Here we use the same reward as in BeerPong-v0, but now consider after the release,
# only one time step, i.e. we simulate until the end of th episode # only one time step, i.e. we simulate until the end of th episode
@ -500,7 +506,9 @@ for _v in _versions:
# ######################################################################################################################## # ########################################################################################################################
## Box Pushing ## Box Pushing
_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0'] _versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0',
'BoxPushingRandomInitDense-v0', 'BoxPushingRandomInitTemporalSparse-v0',
'BoxPushingRandomInitTemporalSpatialSparse-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -518,6 +526,27 @@ for _v in _versions:
) )
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProDMP-{_name[1]}'
kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP)
kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper)
kwargs_dict_box_pushing_prodmp['name'] = _v
kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.])
kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.])
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_box_pushing_prodmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id)
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}'
@ -529,9 +558,7 @@ for _v in _versions:
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['disable_goal'] = True
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4
@ -557,8 +584,8 @@ for _v in _versions:
kwargs_dict_tt_promp['name'] = _v kwargs_dict_tt_promp['name'] = _v
kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0])
kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = False kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = False kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = True
kwargs_dict_tt_promp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] kwargs_dict_tt_promp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5]
kwargs_dict_tt_promp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] kwargs_dict_tt_promp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15]
kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3

View File

@ -370,8 +370,8 @@ class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase):
class BoxPushingTemporalSpatialSparse2(BoxPushingEnvBase): class BoxPushingTemporalSpatialSparse2(BoxPushingEnvBase):
def __init__(self, frame_skip: int = 10): def __init__(self, frame_skip: int = 10, random_init: bool = False):
super(BoxPushingTemporalSpatialSparse2, self).__init__(frame_skip=frame_skip) super(BoxPushingTemporalSpatialSparse2, self).__init__(frame_skip=frame_skip, random_init=random_init)
def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat, def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat,
rod_tip_pos, rod_quat, qpos, qvel, action): rod_tip_pos, rod_quat, qpos, qvel, action):
@ -405,9 +405,9 @@ class BoxPushingTemporalSpatialSparse2(BoxPushingEnvBase):
return reward return reward
class BoxPushingBruceSparse(BoxPushingEnvBase): class BoxPushingNoConstraintSparse(BoxPushingEnvBase):
def __init__(self, frame_skip: int = 10): def __init__(self, frame_skip: int = 10, random_init: bool = False):
super(BoxPushingBruceSparse, self).__init__(frame_skip=frame_skip) super(BoxPushingNoConstraintSparse, self).__init__(frame_skip=frame_skip, random_init=random_init)
def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat, def _get_reward(self, episode_end, box_pos, box_quat, target_pos, target_quat,
rod_tip_pos, rod_quat, qpos, qvel, action): rod_tip_pos, rod_quat, qpos, qvel, action):
@ -435,16 +435,3 @@ class BoxPushingBruceSparse(BoxPushingEnvBase):
box_rot_vel = box_rot_pos_vel[:3] box_rot_vel = box_rot_pos_vel[:3]
box_pos_vel = box_rot_pos_vel[3:] box_pos_vel = box_rot_pos_vel[3:]
return -rot_coeff * np.linalg.norm(box_rot_vel) - pos_coeff * np.linalg.norm(box_pos_vel) return -rot_coeff * np.linalg.norm(box_rot_vel) - pos_coeff * np.linalg.norm(box_pos_vel)
if __name__=="__main__":
import fancy_gym
env = fancy_gym.make("BoxPushingDenseProDMP-v0", seed=0)
env.reset()
for i in range(1000):
env.render(mode="human")
obs, rew, done, info = env.step(env.action_space.sample())
if done:
env.reset()
# print(f"box_end_velocity: {info['box_end_vel']}")

View File

@ -29,15 +29,16 @@ class TT_MPWrapper(RawInterfaceWrapper):
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qvel[:7].copy() return self.data.qvel[:7].copy()
def preprocessing_and_validity_callback(self, action, pos_traj, vel_traj): def preprocessing_and_validity_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray,
return self.check_traj_validity(action, pos_traj, vel_traj) tau_bound: list, delay_bound:list):
return self.check_traj_validity(action, pos_traj, vel_traj, tau_bound, delay_bound)
def set_episode_arguments(self, action, pos_traj, vel_traj): def set_episode_arguments(self, action, pos_traj, vel_traj):
return pos_traj, vel_traj return pos_traj, vel_traj
def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray, def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray,
return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]: return_contextual_obs: bool, tau_bound:list, delay_bound:list) -> Tuple[np.ndarray, float, bool, dict]:
return self.get_invalid_traj_step_return(action, pos_traj, return_contextual_obs) return self.get_invalid_traj_step_return(action, pos_traj, return_contextual_obs, tau_bound, delay_bound)
class TTVelObs_MPWrapper(TT_MPWrapper): class TTVelObs_MPWrapper(TT_MPWrapper):

View File

@ -5,7 +5,7 @@ from gym import utils, spaces
from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco import MujocoEnv
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import is_init_state_valid, magnus_force
from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high
import mujoco import mujoco
@ -225,7 +225,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel)
return init_ball_state return init_ball_state
def _get_traj_invalid_penalty(self, action, pos_traj): def _get_traj_invalid_penalty(self, action, pos_traj, tau_bound, delay_bound):
tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]]))
delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]]))
violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0))
@ -234,9 +234,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
violate_high_bound_error + violate_low_bound_error violate_high_bound_error + violate_low_bound_error
return -invalid_penalty return -invalid_penalty
def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs): def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs, tau_bound, delay_bound):
obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj
penalty = self._get_traj_invalid_penalty(action, pos_traj) penalty = self._get_traj_invalid_penalty(action, pos_traj, tau_bound, delay_bound)
return obs, penalty, True, { return obs, penalty, True, {
"hit_ball": [False], "hit_ball": [False],
"ball_returned_success": [False], "ball_returned_success": [False],
@ -247,7 +247,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
} }
@staticmethod @staticmethod
def check_traj_validity(action, pos_traj, vel_traj): def check_traj_validity(action, pos_traj, vel_traj, tau_bound, delay_bound):
time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \
or action[1] > delay_bound[1] or action[1] < delay_bound[0] or action[1] > delay_bound[1] or action[1] < delay_bound[0]
if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low):