add random initialized box pushing
This commit is contained in:
parent
1c9e973bbd
commit
d667cd0ff2
@ -237,6 +237,12 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]:
|
||||
entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type),
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING,
|
||||
)
|
||||
register(
|
||||
id='BoxPushingRandomInit{}-v0'.format(reward_type),
|
||||
entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type),
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING,
|
||||
kwargs={"random_init": True}
|
||||
)
|
||||
|
||||
# Here we use the same reward as in BeerPong-v0, but now consider after the release,
|
||||
# only one time step, i.e. we simulate until the end of th episode
|
||||
@ -500,7 +506,9 @@ for _v in _versions:
|
||||
# ########################################################################################################################
|
||||
|
||||
## Box Pushing
|
||||
_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0']
|
||||
_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0',
|
||||
'BoxPushingRandomInitDense-v0', 'BoxPushingRandomInitTemporalSparse-v0',
|
||||
'BoxPushingRandomInitTemporalSpatialSparse-v0']
|
||||
for _v in _versions:
|
||||
_name = _v.split("-")
|
||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
@ -518,6 +526,27 @@ for _v in _versions:
|
||||
)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
for _v in _versions:
|
||||
_name = _v.split("-")
|
||||
_env_id = f'{_name[0]}ProDMP-{_name[1]}'
|
||||
kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP)
|
||||
kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper)
|
||||
kwargs_dict_box_pushing_prodmp['name'] = _v
|
||||
kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.])
|
||||
kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.])
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
|
||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4
|
||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
|
||||
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_box_pushing_prodmp
|
||||
)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id)
|
||||
|
||||
for _v in _versions:
|
||||
_name = _v.split("-")
|
||||
_env_id = f'{_name[0]}ReplanProDMP-{_name[1]}'
|
||||
@ -529,9 +558,7 @@ for _v in _versions:
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0
|
||||
kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['disable_goal'] = True
|
||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5
|
||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4
|
||||
kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3
|
||||
kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3
|
||||
kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4
|
||||
|
Loading…
Reference in New Issue
Block a user