update infos of box pushing envs
This commit is contained in:
parent
38e8958ebd
commit
ad8201ea67
@ -47,6 +47,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
|
|
||||||
# trajectory generation
|
# trajectory generation
|
||||||
self.traj_gen = trajectory_generator
|
self.traj_gen = trajectory_generator
|
||||||
|
|
||||||
|
self.traj_gen.basis_gn.show_basis(plot=True)
|
||||||
self.tracking_controller = tracking_controller
|
self.tracking_controller = tracking_controller
|
||||||
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
# self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
||||||
# self.traj_gen.set_mp_times(self.time_steps)
|
# self.traj_gen.set_mp_times(self.time_steps)
|
||||||
@ -159,7 +161,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
t + 1 + self.current_traj_steps):
|
t + 1 + self.current_traj_steps):
|
||||||
break
|
break
|
||||||
|
|
||||||
infos.update({k: v[:t] for k, v in infos.items()})
|
infos.update({k: v[:t+1] for k, v in infos.items()})
|
||||||
self.current_traj_steps += t + 1
|
self.current_traj_steps += t + 1
|
||||||
|
|
||||||
if self.verbose >= 2:
|
if self.verbose >= 2:
|
||||||
|
@ -16,6 +16,7 @@ from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
|
|||||||
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
|
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
|
||||||
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
|
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
|
||||||
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||||
|
from .mujoco.box_pushing.box_pushing_env import BoxPushingEnv, MAX_EPISODE_STEPS_BOX_PUSHING
|
||||||
|
|
||||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||||
|
|
||||||
@ -36,7 +37,8 @@ DEFAULT_BB_DICT_ProMP = {
|
|||||||
"basis_generator_kwargs": {
|
"basis_generator_kwargs": {
|
||||||
'basis_generator_type': 'zero_rbf',
|
'basis_generator_type': 'zero_rbf',
|
||||||
'num_basis': 5,
|
'num_basis': 5,
|
||||||
'num_basis_zero_start': 1
|
'num_basis_zero_start': 1,
|
||||||
|
'basis_bandwidth_factor': 3.0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -197,6 +199,18 @@ register(
|
|||||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Box pushing environments with different rewards
|
||||||
|
for reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"]:
|
||||||
|
register(
|
||||||
|
id='BoxPushing{}-v0'.format(reward_type),
|
||||||
|
entry_point='fancy_gym.envs.mujoco:BoxPushingEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_BOX_PUSHING//10, # divided by frames skip
|
||||||
|
kwargs={
|
||||||
|
"reward_type": reward_type,
|
||||||
|
"frame_skip": 10
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Here we use the same reward as in BeerPong-v0, but now consider after the release,
|
# Here we use the same reward as in BeerPong-v0, but now consider after the release,
|
||||||
# only one time step, i.e. we simulate until the end of th episode
|
# only one time step, i.e. we simulate until the end of th episode
|
||||||
register(
|
register(
|
||||||
@ -431,7 +445,26 @@ for _v in _versions:
|
|||||||
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
# ########################################################################################################################
|
# ########################################################################################################################
|
||||||
#
|
|
||||||
|
## Box Pushing
|
||||||
|
_versions = ['BoxPushingDense-v0', 'BoxPushingTemporalSparse-v0', 'BoxPushingTemporalSpatialSparse-v0']
|
||||||
|
for _v in _versions:
|
||||||
|
_name = _v.split("-")
|
||||||
|
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||||
|
kwargs_dict_box_pushing_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
|
kwargs_dict_box_pushing_promp['wrappers'].append(mujoco.box_pushing.MPWrapper)
|
||||||
|
kwargs_dict_box_pushing_promp['name'] = _v
|
||||||
|
kwargs_dict_box_pushing_promp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.])
|
||||||
|
kwargs_dict_box_pushing_promp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.])
|
||||||
|
kwargs_dict_box_pushing_promp['basis_generator_kwargs']['basis_bandwidth_factor'] = 2
|
||||||
|
|
||||||
|
register(
|
||||||
|
id=_env_id,
|
||||||
|
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||||
|
kwargs=kwargs_dict_box_pushing_promp
|
||||||
|
)
|
||||||
|
ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
#
|
#
|
||||||
# ## Walker2DJump
|
# ## Walker2DJump
|
||||||
# _versions = ['Walker2DJump-v0']
|
# _versions = ['Walker2DJump-v0']
|
||||||
|
@ -7,3 +7,4 @@ from .hopper_throw.hopper_throw import HopperThrowEnv
|
|||||||
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
|
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
|
||||||
from .reacher.reacher import ReacherEnv
|
from .reacher.reacher import ReacherEnv
|
||||||
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
|
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
|
||||||
|
from .box_pushing.box_pushing_env import BoxPushingEnv
|
||||||
|
@ -26,21 +26,22 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
|||||||
3. time-spatial-depend sparse reward
|
3. time-spatial-depend sparse reward
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, reward_type: str = "dense", frame_skip: int = 10):
|
def __init__(self, reward_type: str = "Dense", frame_skip: int = 10):
|
||||||
utils.EzPickle.__init__(**locals())
|
utils.EzPickle.__init__(**locals())
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.])
|
self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.])
|
||||||
self.init_qvel_box_pushing = np.zeros(15)
|
self.init_qvel_box_pushing = np.zeros(15)
|
||||||
assert reward_type in ["dense", "temporal_sparse", "temporal_spatial_sparse"], "unrecognized reward type"
|
self.frame_skip = frame_skip
|
||||||
|
assert reward_type in ["Dense", "TemporalSparse", "TemporalSpatialSparse"], "unrecognized reward type"
|
||||||
self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max)
|
self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max)
|
||||||
|
self._episode_energy = 0.
|
||||||
MujocoEnv.__init__(self,
|
MujocoEnv.__init__(self,
|
||||||
model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"),
|
model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"),
|
||||||
frame_skip=frame_skip,
|
frame_skip=self.frame_skip,
|
||||||
mujoco_bindings="mujoco")
|
mujoco_bindings="mujoco")
|
||||||
self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
|
self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
episode_end = False
|
|
||||||
action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
|
action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
|
||||||
resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max)
|
resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max)
|
||||||
|
|
||||||
@ -53,8 +54,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
|||||||
unstable_simulation = True
|
unstable_simulation = True
|
||||||
|
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING:
|
self._episode_energy += np.sum(np.square(action))
|
||||||
episode_end = True
|
|
||||||
|
episode_end = True if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING//self.frame_skip else False
|
||||||
|
|
||||||
box_pos = self.data.body("box_0").xpos.copy()
|
box_pos = self.data.body("box_0").xpos.copy()
|
||||||
box_quat = self.data.body("box_0").xquat.copy()
|
box_quat = self.data.body("box_0").xquat.copy()
|
||||||
@ -72,7 +74,16 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
|||||||
reward = -50
|
reward = -50
|
||||||
|
|
||||||
obs = self._get_obs()
|
obs = self._get_obs()
|
||||||
infos = dict()
|
box_goal_pos_dist = 0. if not episode_end else np.linalg.norm(box_pos - target_pos)
|
||||||
|
box_goal_quat_dist = 0. if not episode_end else rotation_distance(box_quat, target_quat)
|
||||||
|
infos = {
|
||||||
|
'episode_end': episode_end,
|
||||||
|
'box_goal_pos_dist': box_goal_pos_dist,
|
||||||
|
'box_goal_rot_dist': box_goal_quat_dist,
|
||||||
|
'episode_energy': 0. if not episode_end else self._episode_energy,
|
||||||
|
'is_success': True if episode_end and box_goal_pos_dist < 0.05 and box_goal_quat_dist < 0.5 else False,
|
||||||
|
'num_steps': self._steps
|
||||||
|
}
|
||||||
return obs, reward, episode_end, infos
|
return obs, reward, episode_end, infos
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
@ -83,6 +94,9 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
# set target position
|
# set target position
|
||||||
box_target_pos = self.sample_context()
|
box_target_pos = self.sample_context()
|
||||||
|
box_target_pos[0] = 0.4
|
||||||
|
box_target_pos[1] = -0.3
|
||||||
|
box_target_pos[-4:] = np.array([0.0, 0.0, 0.0, 1.0])
|
||||||
self.model.body_pos[2] = box_target_pos[:3]
|
self.model.body_pos[2] = box_target_pos[:3]
|
||||||
self.model.body_quat[2] = box_target_pos[-4:]
|
self.model.body_quat[2] = box_target_pos[-4:]
|
||||||
self.model.body_pos[3] = box_target_pos[:3]
|
self.model.body_pos[3] = box_target_pos[:3]
|
||||||
@ -96,6 +110,7 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
mujoco.mj_forward(self.model, self.data)
|
mujoco.mj_forward(self.model, self.data)
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
self._episode_energy = 0.
|
||||||
|
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
@ -189,7 +204,6 @@ class BoxPushingEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
err = np.hstack((cart_pos_error, cart_quat_error))
|
err = np.hstack((cart_pos_error, cart_quat_error))
|
||||||
err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2)
|
err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2)
|
||||||
print("err_norm: {}, old_err_norm: {}".format(err_norm, old_err_norm))
|
|
||||||
if err_norm > old_err_norm:
|
if err_norm > old_err_norm:
|
||||||
q = q_old
|
q = q_old
|
||||||
dt = 0.7 * dt
|
dt = 0.7 * dt
|
||||||
@ -245,3 +259,4 @@ if __name__=="__main__":
|
|||||||
env.render("human")
|
env.render("human")
|
||||||
action = env.action_space.sample()
|
action = env.action_space.sample()
|
||||||
obs, reward, done, info = env.step(action)
|
obs, reward, done, info = env.step(action)
|
||||||
|
print("info: {}".format(info))
|
||||||
|
@ -183,11 +183,11 @@ class TemporalSpatialSparseReward(RewardBase):
|
|||||||
|
|
||||||
|
|
||||||
def BoxPushingReward(reward_type, q_max, q_min, q_dot_max):
|
def BoxPushingReward(reward_type, q_max, q_min, q_dot_max):
|
||||||
if reward_type == 'dense':
|
if reward_type == 'Dense':
|
||||||
return DenseReward(q_max, q_min, q_dot_max)
|
return DenseReward(q_max, q_min, q_dot_max)
|
||||||
elif reward_type == 'temporal_sparse':
|
elif reward_type == 'TemporalSparse':
|
||||||
return TemporalSparseReward(q_max, q_min, q_dot_max)
|
return TemporalSparseReward(q_max, q_min, q_dot_max)
|
||||||
elif reward_type == 'temporal_spatial_sparse':
|
elif reward_type == 'TemporalSpatialSparse':
|
||||||
return TemporalSpatialSparseReward(q_max, q_min, q_dot_max)
|
return TemporalSpatialSparseReward(q_max, q_min, q_dot_max)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
@ -16,17 +16,17 @@ class MPWrapper(RawInterfaceWrapper):
|
|||||||
[False] * 7, # joints gravity compensation
|
[False] * 7, # joints gravity compensation
|
||||||
[False] * 3, # position of rod tip
|
[False] * 3, # position of rod tip
|
||||||
[False] * 4, # orientation of rod
|
[False] * 4, # orientation of rod
|
||||||
[True] * 3, # position of box
|
[False] * 3, # position of box
|
||||||
[True] * 4, # orientation of box
|
[False] * 4, # orientation of box
|
||||||
[True] * 3, # position of target
|
[True] * 3, # position of target
|
||||||
[True] * 4, # orientation of target
|
[True] * 4, # orientation of target
|
||||||
[True] * 1, # time
|
# [True] * 1, # time
|
||||||
])
|
])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.data.qpos[3:6].copy()
|
return self.data.qpos[:7].copy()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.data.qvel[3:6].copy()
|
return self.data.qvel[:7].copy()
|
||||||
|
@ -161,9 +161,10 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# ProMP
|
# ProMP
|
||||||
# example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
|
# example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
|
||||||
|
example_mp("BoxPushingDenseProMP-v0", seed=10, iterations=50, render=render)
|
||||||
|
|
||||||
# Altered basis functions
|
# Altered basis functions
|
||||||
obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)
|
# obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
# Custom MP
|
# Custom MP
|
||||||
example_fully_custom_mp(seed=10, iterations=1, render=render)
|
# example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||||
|
Loading…
Reference in New Issue
Block a user