update infos of box pushing envs
This commit is contained in:
parent
38e8958ebd
commit
ad8201ea67
@ -47,6 +47,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
|
||||
# trajectory generation
|
||||
self.traj_gen = trajectory_generator
|
||||
|
||||
self.traj_gen.basis_gn.show_basis(plot=True)
|
||||
self.tracking_controller = tracking_controller
|
||||
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
||||
# self.traj_gen.set_mp_times(self.time_steps)
|
||||
@ -159,7 +161,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
t + 1 + self.current_traj_steps):
|
||||
break
|
||||
|
||||
infos.update({k: v[:t] for k, v in infos.items()})
|
||||
infos.update({k: v[:t+1] for k, v in infos.items()})
|
||||
self.current_traj_steps += t + 1
|
||||
|
||||
if self.verbose >= 2:
|
||||
|
@ -16,6 +16,7 @@ from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
|
||||
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
|
||||
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
|
||||
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||
from .mujoco.box_pushing.box_pushing_env import BoxPushingEnv, MAX_EPISODE_STEPS_BOX_PUSHING
|
||||
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||
|
||||
@ -36,7 +37,8 @@ DEFAULT_BB_DICT_ProMP = {
|
||||
"basis_generator_kwargs": {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 1
|
||||
'num_basis_zero_start': 1,
|
||||
'basis_bandwidth_factor': 3.0,
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,6 +199,18 @@ register(
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||
)
|
||||
|
||||
# Box pushing environments with different rewards
|
||||
for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]:
|
||||
register(
|
||||
id='BoxPushing{}-v0'.format(reward_type),
|
||||
entry_point='fancy_gym.envs.mujoco:BoxPushingEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING//10, # divided by frames skip
|
||||
kwargs={
|
||||
"reward_type": reward_type,
|
||||
"frame_skip": 10
|
||||
}
|
||||
)
|
||||
|
||||
# Here we use the same reward as in BeerPong-v0, but now consider after the release,
|
||||
# only one time step, i.e. we simulate until the end of th episode
|
||||
register(
|
||||
@ -431,7 +445,26 @@ for _v in _versions:
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
# ########################################################################################################################
|
||||
#
|
||||
|
||||
## Box Pushing
|
||||
_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0']
|
||||
for _v in _versions:
|
||||
_name = _v.split("-")
|
||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
kwargs_dict_box_pushing_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_box_pushing_promp['wrappers'].append(mujoco.box_pushing.MPWrapper)
|
||||
kwargs_dict_box_pushing_promp['name'] = _v
|
||||
kwargs_dict_box_pushing_promp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.])
|
||||
kwargs_dict_box_pushing_promp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.])
|
||||
kwargs_dict_box_pushing_promp['basis_generator_kwargs']['basis_bandwidth_factor'] = 2
|
||||
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_box_pushing_promp
|
||||
)
|
||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
#
|
||||
# ## Walker2DJump
|
||||
# _versions = ['Walker2DJump-v0']
|
||||
|
@ -7,3 +7,4 @@ from .hopper_throw.hopper_throw import HopperThrowEnv
|
||||
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
|
||||
from .reacher.reacher import ReacherEnv
|
||||
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
|
||||
from .box_pushing.box_pushing_env import BoxPushingEnv
|
||||
|
@ -26,21 +26,22 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
||||
3. time-spatial-depend sparse reward
|
||||
"""
|
||||
|
||||
def __init__(self, reward_type: str = "dense", frame_skip: int = 10):
|
||||
def __init__(self, reward_type: str = "Dense", frame_skip: int = 10):
|
||||
utils.EzPickle.__init__(**locals())
|
||||
self._steps = 0
|
||||
self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.])
|
||||
self.init_qvel_box_pushing = np.zeros(15)
|
||||
assert reward_type in ["dense", "temporal_sparse", "temporal_spatial_sparse"], "unrecognized reward type"
|
||||
self.frame_skip = frame_skip
|
||||
assert reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"], "unrecognized reward type"
|
||||
self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max)
|
||||
self._episode_energy = 0.
|
||||
MujocoEnv.__init__(self,
|
||||
model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"),
|
||||
frame_skip=frame_skip,
|
||||
frame_skip=self.frame_skip,
|
||||
mujoco_bindings="mujoco")
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
|
||||
|
||||
def step(self, action):
|
||||
episode_end = False
|
||||
action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
|
||||
resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max)
|
||||
|
||||
@ -53,8 +54,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
||||
unstable_simulation = True
|
||||
|
||||
self._steps += 1
|
||||
if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING:
|
||||
episode_end = True
|
||||
self._episode_energy += np.sum(np.square(action))
|
||||
|
||||
episode_end = True if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING//self.frame_skip else False
|
||||
|
||||
box_pos = self.data.body("box_0").xpos.copy()
|
||||
box_quat = self.data.body("box_0").xquat.copy()
|
||||
@ -72,7 +74,16 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
||||
reward = -50
|
||||
|
||||
obs = self._get_obs()
|
||||
infos = dict()
|
||||
box_goal_pos_dist = 0. if not episode_end else np.linalg.norm(box_pos - target_pos)
|
||||
box_goal_quat_dist = 0. if not episode_end else rotation_distance(box_quat, target_quat)
|
||||
infos = {
|
||||
'episode_end': episode_end,
|
||||
'box_goal_pos_dist': box_goal_pos_dist,
|
||||
'box_goal_rot_dist': box_goal_quat_dist,
|
||||
'episode_energy': 0. if not episode_end else self._episode_energy,
|
||||
'is_success': True if episode_end and box_goal_pos_dist < 0.05 and box_goal_quat_dist < 0.5 else False,
|
||||
'num_steps': self._steps
|
||||
}
|
||||
return obs, reward, episode_end, infos
|
||||
|
||||
def reset_model(self):
|
||||
@ -83,6 +94,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
# set target position
|
||||
box_target_pos = self.sample_context()
|
||||
box_target_pos[0] = 0.4
|
||||
box_target_pos[1] = -0.3
|
||||
box_target_pos[-4:] = np.array([0.0, 0.0, 0.0, 1.0])
|
||||
self.model.body_pos[2] = box_target_pos[:3]
|
||||
self.model.body_quat[2] = box_target_pos[-4:]
|
||||
self.model.body_pos[3] = box_target_pos[:3]
|
||||
@ -96,6 +110,7 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
mujoco.mj_forward(self.model, self.data)
|
||||
self._steps = 0
|
||||
self._episode_energy = 0.
|
||||
|
||||
return self._get_obs()
|
||||
|
||||
@ -189,7 +204,6 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
err = np.hstack((cart_pos_error, cart_quat_error))
|
||||
err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2)
|
||||
print("err_norm: {}, old_err_norm: {}".format(err_norm, old_err_norm))
|
||||
if err_norm > old_err_norm:
|
||||
q = q_old
|
||||
dt = 0.7 * dt
|
||||
@ -245,3 +259,4 @@ if __name__=="__main__":
|
||||
env.render("human")
|
||||
action = env.action_space.sample()
|
||||
obs, reward, done, info = env.step(action)
|
||||
print("info: {}".format(info))
|
||||
|
@ -183,11 +183,11 @@ class TemporalSpatialSparseReward(RewardBase):
|
||||
|
||||
|
||||
def BoxPushingReward(reward_type, q_max, q_min, q_dot_max):
|
||||
if reward_type == 'dense':
|
||||
if reward_type == 'Dense':
|
||||
return DenseReward(q_max, q_min, q_dot_max)
|
||||
elif reward_type == 'temporal_sparse':
|
||||
elif reward_type == 'TemporalSparse':
|
||||
return TemporalSparseReward(q_max, q_min, q_dot_max)
|
||||
elif reward_type == 'temporal_spatial_sparse':
|
||||
elif reward_type == 'TemporalSpatialSparse':
|
||||
return TemporalSpatialSparseReward(q_max, q_min, q_dot_max)
|
||||
else:
|
||||
raise NotImplementedError
|
@ -16,17 +16,17 @@ class MPWrapper(RawInterfaceWrapper):
|
||||
[False] * 7, # joints gravity compensation
|
||||
[False] * 3, # position of rod tip
|
||||
[False] * 4, # orientation of rod
|
||||
[True] * 3, # position of box
|
||||
[True] * 4, # orientation of box
|
||||
[False] * 3, # position of box
|
||||
[False] * 4, # orientation of box
|
||||
[True] * 3, # position of target
|
||||
[True] * 4, # orientation of target
|
||||
[True] * 1, # time
|
||||
# [True] * 1, # time
|
||||
])
|
||||
|
||||
@property
|
||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.data.qpos[3:6].copy()
|
||||
return self.data.qpos[:7].copy()
|
||||
|
||||
@property
|
||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.data.qvel[3:6].copy()
|
||||
return self.data.qvel[:7].copy()
|
||||
|
@ -161,9 +161,10 @@ if __name__ == '__main__':
|
||||
|
||||
# ProMP
|
||||
# example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
|
||||
example_mp("BoxPushingDenseProMP-v0", seed=10, iterations=50, render=render)
|
||||
|
||||
# Altered basis functions
|
||||
obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)
|
||||
# obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)
|
||||
|
||||
# Custom MP
|
||||
example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||
# example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||
|
Loading…
Reference in New Issue
Block a user