From d667cd0ff2323210af1696e5f16bfc57d302b10c Mon Sep 17 00:00:00 2001 From: "hongyi.zhou" Date: Mon, 15 May 2023 13:19:04 +0200 Subject: [PATCH] add random initialized box pushing --- fancy_gym/envs/__init__.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index b5bc154..32bd8f8 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -237,6 +237,12 @@ for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, ) + register( + id='BoxPushingRandomInit{}-v0'.format(reward_type), + entry_point='fancy_gym.envs.mujoco:BoxPushing{}'.format(reward_type), + max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING, + kwargs={"random_init": True} + ) # Here we use the same reward as in BeerPong-v0, but now consider after the release, # only one time step, i.e. we simulate until the end of th episode @@ -500,7 +506,9 @@ for _v in _versions: # ######################################################################################################################## ## Box Pushing -_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0'] +_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0', + 'BoxPushingRandomInitDense-v0', 'BoxPushingRandomInitTemporalSparse-v0', + 'BoxPushingRandomInitTemporalSpatialSparse-v0'] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' @@ -518,6 +526,27 @@ for _v in _versions: ) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProDMP-{_name[1]}' + kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) + kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) + kwargs_dict_box_pushing_prodmp['name'] = _v + kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) + kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) + kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 + kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 + kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 + kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 + register( + id=_env_id, + entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_box_pushing_prodmp + ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) + for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' @@ -529,9 +558,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True - kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['disable_goal'] = True - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 5 + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4