diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 68e2177..8adea09 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -47,6 +47,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): # trajectory generation self.traj_gen = trajectory_generator + + self.traj_gen.basis_gn.show_basis(plot=True) self.tracking_controller = tracking_controller # self.time_steps = np.linspace(0, self.duration, self.traj_steps) # self.traj_gen.set_mp_times(self.time_steps) @@ -159,7 +161,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): t + 1 + self.current_traj_steps): break - infos.update({k: v[:t] for k, v in infos.items()}) + infos.update({k: v[:t+1] for k, v in infos.items()}) self.current_traj_steps += t + 1 if self.verbose >= 2: diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 9f0299e..4176b1e 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -16,6 +16,7 @@ from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP +from .mujoco.box_pushing.box_pushing_env import BoxPushingEnv, MAX_EPISODE_STEPS_BOX_PUSHING ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} @@ -36,7 +37,8 @@ DEFAULT_BB_DICT_ProMP = { "basis_generator_kwargs": { 'basis_generator_type': 'zero_rbf', 'num_basis': 5, - 'num_basis_zero_start': 1 + 'num_basis_zero_start': 1, + 'basis_bandwidth_factor': 3.0, } } @@ -197,6 +199,18 @@ register( max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, ) +# Box pushing environments with different rewards +for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]: + register( + id='BoxPushing{}-v0'.format(reward_type), + entry_point='fancy_gym.envs.mujoco:BoxPushingEnv', + max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING//10, # divided by frames skip + kwargs={ + "reward_type": reward_type, + "frame_skip": 10 + } + ) + # Here we use the same reward as in BeerPong-v0, but now consider after the release, # only one time step, i.e. we simulate until the end of th episode register( @@ -431,7 +445,26 @@ for _v in _versions: ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ######################################################################################################################## -# + +## Box Pushing +_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0'] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_box_pushing_promp = deepcopy(DEFAULT_BB_DICT_ProMP) + kwargs_dict_box_pushing_promp['wrappers'].append(mujoco.box_pushing.MPWrapper) + kwargs_dict_box_pushing_promp['name'] = _v + kwargs_dict_box_pushing_promp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) + kwargs_dict_box_pushing_promp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) + kwargs_dict_box_pushing_promp['basis_generator_kwargs']['basis_bandwidth_factor'] = 2 + + register( + id=_env_id, + entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_box_pushing_promp + ) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + # # ## Walker2DJump # _versions = ['Walker2DJump-v0'] diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py index 840691f..684594c 100644 --- a/fancy_gym/envs/mujoco/__init__.py +++ b/fancy_gym/envs/mujoco/__init__.py @@ -7,3 +7,4 @@ from .hopper_throw.hopper_throw import HopperThrowEnv from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .reacher.reacher import ReacherEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv +from .box_pushing.box_pushing_env import BoxPushingEnv diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 2ca6b01..a6bc27b 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -26,21 +26,22 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle): 3. time-spatial-depend sparse reward """ - def __init__(self, reward_type: str = "dense", frame_skip: int = 10): + def __init__(self, reward_type: str = "Dense", frame_skip: int = 10): utils.EzPickle.__init__(**locals()) self._steps = 0 self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.]) self.init_qvel_box_pushing = np.zeros(15) - assert reward_type in ["dense", "temporal_sparse", "temporal_spatial_sparse"], "unrecognized reward type" + self.frame_skip = frame_skip + assert reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"], "unrecognized reward type" self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max) + self._episode_energy = 0. MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"), - frame_skip=frame_skip, + frame_skip=self.frame_skip, mujoco_bindings="mujoco") self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) def step(self, action): - episode_end = False action = 10 * np.clip(action, self.action_space.low, self.action_space.high) resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max) @@ -53,8 +54,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle): unstable_simulation = True self._steps += 1 - if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING: - episode_end = True + self._episode_energy += np.sum(np.square(action)) + + episode_end = True if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING//self.frame_skip else False box_pos = self.data.body("box_0").xpos.copy() box_quat = self.data.body("box_0").xquat.copy() @@ -72,7 +74,16 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle): reward = -50 obs = self._get_obs() - infos = dict() + box_goal_pos_dist = 0. if not episode_end else np.linalg.norm(box_pos - target_pos) + box_goal_quat_dist = 0. if not episode_end else rotation_distance(box_quat, target_quat) + infos = { + 'episode_end': episode_end, + 'box_goal_pos_dist': box_goal_pos_dist, + 'box_goal_rot_dist': box_goal_quat_dist, + 'episode_energy': 0. if not episode_end else self._episode_energy, + 'is_success': True if episode_end and box_goal_pos_dist < 0.05 and box_goal_quat_dist < 0.5 else False, + 'num_steps': self._steps + } return obs, reward, episode_end, infos def reset_model(self): @@ -83,6 +94,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle): # set target position box_target_pos = self.sample_context() + box_target_pos[0] = 0.4 + box_target_pos[1] = -0.3 + box_target_pos[-4:] = np.array([0.0, 0.0, 0.0, 1.0]) self.model.body_pos[2] = box_target_pos[:3] self.model.body_quat[2] = box_target_pos[-4:] self.model.body_pos[3] = box_target_pos[:3] @@ -96,6 +110,7 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle): mujoco.mj_forward(self.model, self.data) self._steps = 0 + self._episode_energy = 0. return self._get_obs() @@ -189,7 +204,6 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle): err = np.hstack((cart_pos_error, cart_quat_error)) err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2) - print("err_norm: {}, old_err_norm: {}".format(err_norm, old_err_norm)) if err_norm > old_err_norm: q = q_old dt = 0.7 * dt @@ -245,3 +259,4 @@ if __name__=="__main__": env.render("human") action = env.action_space.sample() obs, reward, done, info = env.step(action) + print("info: {}".format(info)) diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py index cfa15ff..9b6d78b 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py @@ -183,11 +183,11 @@ class TemporalSpatialSparseReward(RewardBase): def BoxPushingReward(reward_type, q_max, q_min, q_dot_max): - if reward_type == 'dense': + if reward_type == 'Dense': return DenseReward(q_max, q_min, q_dot_max) - elif reward_type == 'temporal_sparse': + elif reward_type == 'TemporalSparse': return TemporalSparseReward(q_max, q_min, q_dot_max) - elif reward_type == 'temporal_spatial_sparse': + elif reward_type == 'TemporalSpatialSparse': return TemporalSpatialSparseReward(q_max, q_min, q_dot_max) else: raise NotImplementedError \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py index 172005a..ad6fab1 100644 --- a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py @@ -16,17 +16,17 @@ class MPWrapper(RawInterfaceWrapper): [False] * 7, # joints gravity compensation [False] * 3, # position of rod tip [False] * 4, # orientation of rod - [True] * 3, # position of box - [True] * 4, # orientation of box + [False] * 3, # position of box + [False] * 4, # orientation of box [True] * 3, # position of target [True] * 4, # orientation of target - [True] * 1, # time + # [True] * 1, # time ]) @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.data.qpos[3:6].copy() + return self.data.qpos[:7].copy() @property def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.data.qvel[3:6].copy() + return self.data.qvel[:7].copy() diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index f1c25e2..c6636dc 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -161,9 +161,10 @@ if __name__ == '__main__': # ProMP # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + example_mp("BoxPushingDenseProMP-v0", seed=10, iterations=50, render=render) # Altered basis functions - obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - example_fully_custom_mp(seed=10, iterations=1, render=render) + # example_fully_custom_mp(seed=10, iterations=1, render=render)