add random initialized box pushing

This commit is contained in:
hongyi.zhou 2023-05-15 13:19:04 +02:00
parent 1c9e973bbd
commit d667cd0ff2

View File

@ -237,6 +237,12 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]:
entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type),
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING,
) )
register(
id='BoxPushingRandomInit{}-v0'.format(reward_type),
entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type),
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING,
kwargs={"random_init": True}
)
# Here we use the same reward as in BeerPong-v0, but now consider after the release, # Here we use the same reward as in BeerPong-v0, but now consider after the release,
# only one time step, i.e. we simulate until the end of th episode # only one time step, i.e. we simulate until the end of th episode
@ -500,7 +506,9 @@ for _v in _versions:
# ######################################################################################################################## # ########################################################################################################################
## Box Pushing ## Box Pushing
_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0'] _versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0',
'BoxPushingRandomInitDense-v0', 'BoxPushingRandomInitTemporalSparse-v0',
'BoxPushingRandomInitTemporalSpatialSparse-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -518,6 +526,27 @@ for _v in _versions:
) )
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProDMP-{_name[1]}'
kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP)
kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper)
kwargs_dict_box_pushing_prodmp['name'] = _v
kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.])
kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.])
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_box_pushing_prodmp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id)
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}'
@ -529,9 +558,7 @@ for _v in _versions:
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['disable_goal'] = True
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4