Ported box_pushing to new mujoco bindings

This commit is contained in:
Dominik Moritz Roth 2023-05-19 14:01:31 +02:00
parent bf3ed8a06c
commit e75ab89a37

View File

@ -13,6 +13,7 @@ MAX_EPISODE_STEPS_BOX_PUSHING = 100
BOX_POS_BOUND = np.array([[0.3, -0.45, -0.01], [0.6, 0.45, -0.01]]) BOX_POS_BOUND = np.array([[0.3, -0.45, -0.01], [0.6, 0.45, -0.01]])
class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
""" """
franka box pushing environment franka box pushing environment
@ -41,8 +42,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
self._episode_energy = 0. self._episode_energy = 0.
MujocoEnv.__init__(self, MujocoEnv.__init__(self,
model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"), model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"),
frame_skip=self.frame_skip, frame_skip=self.frame_skip)
mujoco_bindings="mujoco")
self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
def step(self, action): def step(self, action):
@ -246,7 +246,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
old_err_norm = err_norm old_err_norm = err_norm
### get Jacobian by mujoco # get Jacobian by mujoco
self.data.qpos[:7] = q self.data.qpos[:7] = q
mujoco.mj_forward(self.model, self.data) mujoco.mj_forward(self.model, self.data)
@ -280,6 +280,7 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle):
return q return q
class BoxPushingDense(BoxPushingEnvBase): class BoxPushingDense(BoxPushingEnvBase):
def __init__(self, frame_skip: int = 10): def __init__(self, frame_skip: int = 10):
super(BoxPushingDense, self).__init__(frame_skip=frame_skip) super(BoxPushingDense, self).__init__(frame_skip=frame_skip)
@ -295,7 +296,7 @@ class BoxPushingDense(BoxPushingEnvBase):
energy_cost = -0.0005 * np.sum(np.square(action)) energy_cost = -0.0005 * np.sum(np.square(action))
reward = joint_penalty + tcp_box_dist_reward + \ reward = joint_penalty + tcp_box_dist_reward + \
box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost
rod_inclined_angle = rotation_distance(rod_quat, self._desired_rod_quat) rod_inclined_angle = rotation_distance(rod_quat, self._desired_rod_quat)
if rod_inclined_angle > np.pi / 4: if rod_inclined_angle > np.pi / 4:
@ -303,6 +304,7 @@ class BoxPushingDense(BoxPushingEnvBase):
return reward return reward
class BoxPushingTemporalSparse(BoxPushingEnvBase): class BoxPushingTemporalSparse(BoxPushingEnvBase):
def __init__(self, frame_skip: int = 10): def __init__(self, frame_skip: int = 10):
super(BoxPushingTemporalSparse, self).__init__(frame_skip=frame_skip) super(BoxPushingTemporalSparse, self).__init__(frame_skip=frame_skip)
@ -331,6 +333,7 @@ class BoxPushingTemporalSparse(BoxPushingEnvBase):
return reward return reward
class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase): class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase):
def __init__(self, frame_skip: int = 10): def __init__(self, frame_skip: int = 10):