update infos of box pushing envs

This commit is contained in:
Hongyi Zhou 2022-10-12 16:45:13 +02:00
parent 38e8958ebd
commit ad8201ea67
7 changed files with 73 additions and 21 deletions

View File

@ -47,6 +47,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
# trajectory generation # trajectory generation
self.traj_gen = trajectory_generator self.traj_gen = trajectory_generator
self.traj_gen.basis_gn.show_basis(plot=True)
self.tracking_controller = tracking_controller self.tracking_controller = tracking_controller
# self.time_steps = np.linspace(0, self.duration, self.traj_steps) # self.time_steps = np.linspace(0, self.duration, self.traj_steps)
# self.traj_gen.set_mp_times(self.time_steps) # self.traj_gen.set_mp_times(self.time_steps)
@ -159,7 +161,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
t + 1 + self.current_traj_steps): t + 1 + self.current_traj_steps):
break break
infos.update({k: v[:t] for k, v in infos.items()}) infos.update({k: v[:t+1] for k, v in infos.items()})
self.current_traj_steps += t + 1 self.current_traj_steps += t + 1
if self.verbose >= 2: if self.verbose >= 2:

View File

@ -16,6 +16,7 @@ from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
from .mujoco.box_pushing.box_pushing_env import BoxPushingEnv, MAX_EPISODE_STEPS_BOX_PUSHING
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
@ -36,7 +37,8 @@ DEFAULT_BB_DICT_ProMP = {
"basis_generator_kwargs": { "basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf', 'basis_generator_type': 'zero_rbf',
'num_basis': 5, 'num_basis': 5,
'num_basis_zero_start': 1 'num_basis_zero_start': 1,
'basis_bandwidth_factor': 3.0,
} }
} }
@ -197,6 +199,18 @@ register(
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
) )
# Box pushing environments with different rewards
for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]:
register(
id='BoxPushing{}-v0'.format(reward_type),
entry_point='fancy_gym.envs.mujoco:BoxPushingEnv',
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING//10, # divided by frames skip
kwargs={
"reward_type": reward_type,
"frame_skip": 10
}
)
# Here we use the same reward as in BeerPong-v0, but now consider after the release, # Here we use the same reward as in BeerPong-v0, but now consider after the release,
# only one time step, i.e. we simulate until the end of th episode # only one time step, i.e. we simulate until the end of th episode
register( register(
@ -431,7 +445,26 @@ for _v in _versions:
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ######################################################################################################################## # ########################################################################################################################
#
## Box Pushing
_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0']
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_box_pushing_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_box_pushing_promp['wrappers'].append(mujoco.box_pushing.MPWrapper)
kwargs_dict_box_pushing_promp['name'] = _v
kwargs_dict_box_pushing_promp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.])
kwargs_dict_box_pushing_promp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.])
kwargs_dict_box_pushing_promp['basis_generator_kwargs']['basis_bandwidth_factor'] = 2
register(
id=_env_id,
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_box_pushing_promp
)
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# #
# ## Walker2DJump # ## Walker2DJump
# _versions = ['Walker2DJump-v0'] # _versions = ['Walker2DJump-v0']

View File

@ -7,3 +7,4 @@ from .hopper_throw.hopper_throw import HopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv from .reacher.reacher import ReacherEnv
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
from .box_pushing.box_pushing_env import BoxPushingEnv

View File

@ -26,21 +26,22 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
3. time-spatial-depend sparse reward 3. time-spatial-depend sparse reward
""" """
def __init__(self, reward_type: str = "dense", frame_skip: int = 10): def __init__(self, reward_type: str = "Dense", frame_skip: int = 10):
utils.EzPickle.__init__(**locals()) utils.EzPickle.__init__(**locals())
self._steps = 0 self._steps = 0
self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.]) self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.])
self.init_qvel_box_pushing = np.zeros(15) self.init_qvel_box_pushing = np.zeros(15)
assert reward_type in ["dense", "temporal_sparse", "temporal_spatial_sparse"], "unrecognized reward type" self.frame_skip = frame_skip
assert reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"], "unrecognized reward type"
self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max) self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max)
self._episode_energy = 0.
MujocoEnv.__init__(self, MujocoEnv.__init__(self,
model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"), model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"),
frame_skip=frame_skip, frame_skip=self.frame_skip,
mujoco_bindings="mujoco") mujoco_bindings="mujoco")
self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
def step(self, action): def step(self, action):
episode_end = False
action = 10 * np.clip(action, self.action_space.low, self.action_space.high) action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max) resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max)
@ -53,8 +54,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
unstable_simulation = True unstable_simulation = True
self._steps += 1 self._steps += 1
if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING: self._episode_energy += np.sum(np.square(action))
episode_end = True
episode_end = True if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING//self.frame_skip else False
box_pos = self.data.body("box_0").xpos.copy() box_pos = self.data.body("box_0").xpos.copy()
box_quat = self.data.body("box_0").xquat.copy() box_quat = self.data.body("box_0").xquat.copy()
@ -72,7 +74,16 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
reward = -50 reward = -50
obs = self._get_obs() obs = self._get_obs()
infos = dict() box_goal_pos_dist = 0. if not episode_end else np.linalg.norm(box_pos - target_pos)
box_goal_quat_dist = 0. if not episode_end else rotation_distance(box_quat, target_quat)
infos = {
'episode_end': episode_end,
'box_goal_pos_dist': box_goal_pos_dist,
'box_goal_rot_dist': box_goal_quat_dist,
'episode_energy': 0. if not episode_end else self._episode_energy,
'is_success': True if episode_end and box_goal_pos_dist < 0.05 and box_goal_quat_dist < 0.5 else False,
'num_steps': self._steps
}
return obs, reward, episode_end, infos return obs, reward, episode_end, infos
def reset_model(self): def reset_model(self):
@ -83,6 +94,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
# set target position # set target position
box_target_pos = self.sample_context() box_target_pos = self.sample_context()
box_target_pos[0] = 0.4
box_target_pos[1] = -0.3
box_target_pos[-4:] = np.array([0.0, 0.0, 0.0, 1.0])
self.model.body_pos[2] = box_target_pos[:3] self.model.body_pos[2] = box_target_pos[:3]
self.model.body_quat[2] = box_target_pos[-4:] self.model.body_quat[2] = box_target_pos[-4:]
self.model.body_pos[3] = box_target_pos[:3] self.model.body_pos[3] = box_target_pos[:3]
@ -96,6 +110,7 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
mujoco.mj_forward(self.model, self.data) mujoco.mj_forward(self.model, self.data)
self._steps = 0 self._steps = 0
self._episode_energy = 0.
return self._get_obs() return self._get_obs()
@ -189,7 +204,6 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
err = np.hstack((cart_pos_error, cart_quat_error)) err = np.hstack((cart_pos_error, cart_quat_error))
err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2) err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2)
print("err_norm: {}, old_err_norm: {}".format(err_norm, old_err_norm))
if err_norm > old_err_norm: if err_norm > old_err_norm:
q = q_old q = q_old
dt = 0.7 * dt dt = 0.7 * dt
@ -245,3 +259,4 @@ if __name__=="__main__":
env.render("human") env.render("human")
action = env.action_space.sample() action = env.action_space.sample()
obs, reward, done, info = env.step(action) obs, reward, done, info = env.step(action)
print("info: {}".format(info))

View File

@ -183,11 +183,11 @@ class TemporalSpatialSparseReward(RewardBase):
def BoxPushingReward(reward_type, q_max, q_min, q_dot_max): def BoxPushingReward(reward_type, q_max, q_min, q_dot_max):
if reward_type == 'dense': if reward_type == 'Dense':
return DenseReward(q_max, q_min, q_dot_max) return DenseReward(q_max, q_min, q_dot_max)
elif reward_type == 'temporal_sparse': elif reward_type == 'TemporalSparse':
return TemporalSparseReward(q_max, q_min, q_dot_max) return TemporalSparseReward(q_max, q_min, q_dot_max)
elif reward_type == 'temporal_spatial_sparse': elif reward_type == 'TemporalSpatialSparse':
return TemporalSpatialSparseReward(q_max, q_min, q_dot_max) return TemporalSpatialSparseReward(q_max, q_min, q_dot_max)
else: else:
raise NotImplementedError raise NotImplementedError

View File

@ -16,17 +16,17 @@ class MPWrapper(RawInterfaceWrapper):
[False] * 7, # joints gravity compensation [False] * 7, # joints gravity compensation
[False] * 3, # position of rod tip [False] * 3, # position of rod tip
[False] * 4, # orientation of rod [False] * 4, # orientation of rod
[True] * 3, # position of box [False] * 3, # position of box
[True] * 4, # orientation of box [False] * 4, # orientation of box
[True] * 3, # position of target [True] * 3, # position of target
[True] * 4, # orientation of target [True] * 4, # orientation of target
[True] * 1, # time # [True] * 1, # time
]) ])
@property @property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qpos[3:6].copy() return self.data.qpos[:7].copy()
@property @property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qvel[3:6].copy() return self.data.qvel[:7].copy()

View File

@ -161,9 +161,10 @@ if __name__ == '__main__':
# ProMP # ProMP
# example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
example_mp("BoxPushingDenseProMP-v0", seed=10, iterations=50, render=render)
# Altered basis functions # Altered basis functions
obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)
# Custom MP # Custom MP
example_fully_custom_mp(seed=10, iterations=1, render=render) # example_fully_custom_mp(seed=10, iterations=1, render=render)