diff --git a/fancy_gym/envs/mujoco/box_pushing/__init__.py b/fancy_gym/envs/mujoco/box_pushing/__init__.py new file mode 100644 index 0000000..c5e6d2f --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/box_pushing.xml b/fancy_gym/envs/mujoco/box_pushing/assets/box_pushing.xml new file mode 100644 index 0000000..516482f --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/box_pushing.xml @@ -0,0 +1,42 @@ + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/kit_lab_surrounding.xml b/fancy_gym/envs/mujoco/box_pushing/assets/kit_lab_surrounding.xml new file mode 100644 index 0000000..ca60a9c --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/kit_lab_surrounding.xml @@ -0,0 +1,118 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/d435v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/d435v.stl new file mode 100644 index 0000000..809a0da Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/d435v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/finger.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/finger.stl new file mode 100644 index 0000000..3b87289 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/finger.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/fingerv.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/fingerv.stl new file mode 100644 index 0000000..0b11382 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/fingerv.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/hand.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/hand.stl new file mode 100644 index 0000000..4e82090 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/hand.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/handv.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/handv.stl new file mode 100644 index 0000000..92f60bd Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/handv.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0.stl new file mode 100644 index 0000000..def070c Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0v.stl new file mode 100644 index 0000000..72dba55 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1.stl new file mode 100644 index 0000000..426bcf2 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1v.stl new file mode 100644 index 0000000..b42f97b Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2.stl new file mode 100644 index 0000000..b369f15 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2v.stl new file mode 100644 index 0000000..d72bbe4 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3.stl new file mode 100644 index 0000000..25162ee Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3v.stl new file mode 100644 index 0000000..904de9d Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4.stl new file mode 100644 index 0000000..76c8c33 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4v.stl new file mode 100644 index 0000000..da74ed6 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5.stl new file mode 100644 index 0000000..3006a0b Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5v.stl new file mode 100644 index 0000000..f795374 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6.stl new file mode 100644 index 0000000..2e9594a Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6v.stl new file mode 100644 index 0000000..8b2a7f3 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7.stl new file mode 100644 index 0000000..0532d05 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7v.stl new file mode 100644 index 0000000..82b5946 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/panda_rod.xml b/fancy_gym/envs/mujoco/box_pushing/assets/panda_rod.xml new file mode 100644 index 0000000..ac85629 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/panda_rod.xml @@ -0,0 +1,159 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/push_box.xml b/fancy_gym/envs/mujoco/box_pushing/assets/push_box.xml new file mode 100644 index 0000000..25313f8 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/push_box.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_mocap_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_mocap_control.xml new file mode 100644 index 0000000..a36c987 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_mocap_control.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_position_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_position_control.xml new file mode 100644 index 0000000..c83939a --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_position_control.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_torque_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_torque_control.xml new file mode 100644 index 0000000..7b70a75 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_torque_control.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_velocity_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_velocity_control.xml new file mode 100644 index 0000000..4fda9de --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_velocity_control.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/depr/panda_gym.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/depr/panda_gym.xml new file mode 100644 index 0000000..27f3415 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/depr/panda_gym.xml @@ -0,0 +1,157 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda.xml new file mode 100644 index 0000000..8718e42 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda.xml @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_bimanual.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_bimanual.xml new file mode 100644 index 0000000..b970726 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_bimanual.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_mocap.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_mocap.xml new file mode 100644 index 0000000..7de5c38 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_mocap.xml @@ -0,0 +1,140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/pandas_kit_lab.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/pandas_kit_lab.xml new file mode 100644 index 0000000..b970726 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/pandas_kit_lab.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/surroundings/base.xml b/fancy_gym/envs/mujoco/box_pushing/assets/surroundings/base.xml new file mode 100644 index 0000000..7a40fbd --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/assets/surroundings/base.xml @@ -0,0 +1,19 @@ + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/box_push_env.py b/fancy_gym/envs/mujoco/box_pushing/box_push_env.py new file mode 100644 index 0000000..63dd8b5 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/box_push_env.py @@ -0,0 +1,416 @@ +import os +import time + +import numpy as np +import mujoco_py +from gym import utils, spaces +from gym.envs.mujoco import MujocoEnv +# from alr_envs.alr.mujoco.box_pushing.box_push_utils import PushBoxReward, rot_to_quat, q_max, q_min, get_quaternion_error +# from alr_envs.alr.mujoco.box_pushing.box_push_utils import q_dot_dot_max, rotation_distance +# from alr_envs.alr.mujoco.box_pushing.box_push_utils import TrajectoryRecoder +from fancy_gym.envs.mujoco.box_pushing.box_push_utils import PushBoxReward, rot_to_quat, q_max, q_min, get_quaternion_error +from fancy_gym.envs.mujoco.box_pushing.box_push_utils import q_dot_dot_max, rotation_distance +from fancy_gym.envs.mujoco.box_pushing.box_push_utils import TrajectoryRecoder + +MAX_EPISODE_STEPS = 1000 + +INIT_BOX_POS_BOUND = np.array([[0.3, -0.45, -0.01], [0.6, 0.45, -0.01]]) + + +class BOX_PUSH_Env_Gym(MujocoEnv, utils.EzPickle): + def __init__(self, enable_gravity_comp=False, frame_skip=1, reward_type="Dense"): + model_path = os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml") + self.reward_type = reward_type + assert reward_type in ["Dense", "Sparse1", "Sparse2"], "reward_type must be one of Dense, Sparse1, Sparse2" + self.enable_gravity_comp = enable_gravity_comp + self.frame_skip = frame_skip + self.max_episode_steps = MAX_EPISODE_STEPS // self.frame_skip + + self.time_steps = 0 + self.init_qpos_box_push = np.array([ + 0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., + 0. + ]) + self.init_qvel_box_push = np.zeros(15) + self._id_set = False + self.reward = PushBoxReward() + + # utilities for IK + self.J_reg = 1e-6 + self.W = np.diag([1, 1, 1, 1, 1, 1, 1]) + self.target_th_null = np.array([ + 3.57795216e-09, + 1.74532920e-01, + 3.30500960e-08, + -8.72664630e-01, + -1.14096181e-07, + 1.22173047e00, + 7.85398126e-01, + ]) + + self.torque_bound_low = -q_dot_dot_max + self.torque_bound_high = q_dot_dot_max + + self.episode_energy = 0. + self.episode_end_pos_dist = 0. + self.episode_end_rot_dist = 0. + # self.trajectory_recorder = TrajectoryRecoder(None, max_length=100) + # end of IK utilities + super(BOX_PUSH_Env_Gym, self).__init__(model_path=model_path, + frame_skip=self.frame_skip, + mujoco_bindings="mujoco_py") + utils.EzPickle.__init__(self) + + action_space_low = np.array([-1.0] * 7) + action_space_high = np.array([1.0] * 7) + self.action_space = spaces.Box(low=action_space_low, + high=action_space_high, + dtype='float32') + # self.trajectory_recorder.update_sim(self.sim) + + def _set_ids(self): + self.box_id = self.sim.model._body_name2id["box_0"] + self.target_id = self.sim.model._body_name2id["target_pos"] + self.rod_tip_site_id = self.sim.model.site_name2id("rod_tip") + self._id_set = True + + def sample_context(self): + # return np.random.uniform(INIT_BOX_POS_BOUND[0], + # INIT_BOX_POS_BOUND[1], + # size=INIT_BOX_POS_BOUND[0].shape) + # pos = np.random.uniform(INIT_BOX_POS_BOUND[0], + # INIT_BOX_POS_BOUND[1], + # size=INIT_BOX_POS_BOUND[0].shape) + pos = np.array([0.4, 0.3, -0.01]) # was 0.45 0.4 + # theta = np.random.uniform(0, np.pi * 2) + theta = 0.0 + quat = rot_to_quat(theta, np.array([0, 0, 1])) + return np.concatenate((pos, quat)) + + def generate_specified_context(self, goal_x, goal_y, goal_rot): + pos = np.array([goal_x, goal_y, -0.01]) + quat = rot_to_quat(goal_rot, np.array([0, 0, 1])) + return np.concatenate((pos, quat)) + + def reset_model(self): + self.reward.reset() + + self.episode_energy = 0. + self.episode_end_pos_dist = 0. + self.episode_end_rot_dist = 0. + + self.set_state(self.init_qpos_box_push, self.init_qvel_box_push) + box_init_pos = self.sample_context() + box_init_pos[0] = 0.4 + box_init_pos[1] = 0.3 + box_init_pos[-4:] = np.array([0, 0, 0, 1.]) + + box_target_pos = self.sample_context() + + # if both box and target are in the same position, sample again + # while np.linalg.norm(box_init_pos[:3] - box_target_pos[:3]) < 0.3: + # box_target_pos = self.sample_context() + box_target_pos[0] = 0.4 # was 0.4 + box_target_pos[1] = -0.3 # was -0.3 + # box_target_pos = self.generate_specified_context(0.45, -0.25, np.pi) + + self.sim.model.body_pos[2] = box_target_pos[:3] + self.sim.model.body_quat[2] = box_target_pos[-4:] + + self.sim.model.body_pos[3] = box_target_pos[:3] + self.sim.model.body_quat[3] = box_target_pos[-4:] + + desired_ee_pos = box_init_pos[:3].copy() + desired_ee_pos[2] += 0.15 + desired_ee_quat = np.array([0, 1, 0, 0]) + desired_joint_pos = self.findJointPosition(desired_ee_pos, + desired_ee_quat) + self.sim.data.qpos[:7] = desired_joint_pos + self.sim.data.set_joint_qpos('box_joint', box_init_pos) + # self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] + self.sim.forward() + self.time_steps = 0 + + # reset trajectory recorder + # self.trajectory_recorder.plot_trajectories() + # self.trajectory_recorder.save_trajectory("ppo_dense") + # self.trajectory_recorder.reset() + + return self._get_obs() + + def _get_obs(self): + box_pos = self.sim.data.get_body_xpos("box_0") + box_quat = self.sim.data.get_body_xquat("box_0") + target_pos = self.sim.data.get_body_xpos("replan_target_pos") + target_quat = self.sim.data.get_body_xquat("replan_target_pos") + rod_tip_pos = self.sim.data.site_xpos[self.rod_tip_site_id] + rod_quat = self.sim.data.get_body_xquat("push_rod") + obs = np.concatenate([ + self.sim.data.qpos[:7].copy(), + self.sim.data.qvel[:7].copy(), + self.sim.data.qfrc_bias[:7].copy(), + # self.sim.data.qfrc_actuator[:7].copy(), + rod_tip_pos, + rod_quat, + box_pos, + box_quat, + target_pos, + target_quat + # np.array([self.time_steps / 100.]) + ]) + return obs + + def step(self, action): + if not self._id_set: + self._set_ids() + invalid_flag = False + done = False + self.time_steps += 1 + + action = np.clip(action, self.action_space.low, self.action_space.high) + + if self.enable_gravity_comp: + action = action * 10. # rescale action + resultant_action = action + self.sim.data.qfrc_bias[:7].copy() + else: + resultant_action = action * 10. + + resultant_action = np.clip(resultant_action, self.torque_bound_low, + self.torque_bound_high) + + # the replan_target_pos was align with the target_pos before 800 + # if self.time_steps == 20: + # new_target_pos = np.array([1., 1., -0.01]) + # new_target_quat = np.array([0, 1, 0, 0]) + # while new_target_pos[0] < 0.4 or new_target_pos[0] > 0.6 or abs( + # new_target_pos[1]) > 0.45: + # pos_change = np.random.uniform(-0.25, 0.2, 3) + # pos_change[-1] = 0. + # # pos_change[-2] = 0. + # # self.sim.model.body_pos[3] = self.sim.data.get_body_xpos("target_pos") + pos_change + # # self.sim.model.body_quat[3] = self.sim.data.get_body_xquat("target_pos") + # new_target_pos = self.sim.data.get_body_xpos( + # "target_pos") + pos_change + # # new_target_quat = self.sim.data.get_body_xquat("target_pos") + # old_target_quat = self.sim.data.get_body_xquat("target_pos") + # new_target_quat = None + # while new_target_quat is None or rotation_distance( + # new_target_quat, old_target_quat) > np.pi / 2.: + # theta = np.random.uniform(0, np.pi * 2) + # new_target_quat = rot_to_quat(theta, np.array([0, 0, 1])) + # + # self.sim.model.body_pos[3] = new_target_pos + # self.sim.model.body_quat[3] = new_target_quat + # self.sim.forward() + + try: + self.do_simulation(resultant_action, self.frame_skip) + except mujoco_py.MujocoException as e: + print(e) + invalid_flag = True + + # record the trajectory + # if self.time_steps % (10//self.frame_skip) == 0: + # self.trajectory_recorder.record() + + box_pos = self.sim.data.get_body_xpos("box_0") + box_quat = self.sim.data.get_body_xquat("box_0") + target_pos = self.sim.data.get_body_xpos("replan_target_pos") + target_quat = self.sim.data.get_body_xquat("replan_target_pos") + rod_tip_pos = self.sim.data.site_xpos[self.rod_tip_site_id].copy() + rod_quat = self.sim.data.get_body_xquat("push_rod") + qpos = self.sim.data.qpos[:7].copy() + qvel = self.sim.data.qvel[:7].copy() + + + episode_end = False + self.episode_energy += np.sum(np.square(action)) + if self.time_steps >= 100 - 1: + episode_end = True + done = True + self.episode_end_pos_dist = np.linalg.norm(box_pos - target_pos) + self.episode_end_rot_dist = rotation_distance( + box_quat, target_quat) + + if self.reward_type == "Dense": + reward = self.reward.step_reward(box_pos, box_quat, target_pos, + target_quat, rod_tip_pos, rod_quat, + qpos, qvel, action) + elif self.reward_type == "Sparse1": + reward = self.reward.sparse1_reward(episode_end, box_pos, box_quat, + target_pos, target_quat, + rod_tip_pos, rod_quat, qpos, qvel, + action) + elif self.reward_type == "Sparse2": + reward = self.reward.sparse2_reward(episode_end, box_pos, box_quat, + target_pos, target_quat, rod_tip_pos, + rod_quat, qpos, qvel, action) + else: + raise NotImplementedError("Unknown reward type: {}".format( + self.reward_type)) + + if invalid_flag: + reward = -25 + + obs = self._get_obs() + infos = { + 'episode_end': + episode_end, + 'box_goal_pos_dist': + self.episode_end_pos_dist, + 'box_goal_rot_dist': + self.episode_end_rot_dist, + 'episode_energy': + self.episode_energy if episode_end else 0., + 'is_success': + True if episode_end and self.episode_end_pos_dist < 0.05 + and self.episode_end_rot_dist < 0.5 else False, + 'num_steps': + self.time_steps + } + return obs, reward, done, infos + + def getJacobian(self, body_id="tcp", q=np.zeros(7)): + + self.sim.data.qpos[:7] = q + self.sim.forward() + jacp = self.sim.data.get_body_jacp(body_id).reshape(3, -1)[:, :7] + jacr = self.sim.data.get_body_jacr(body_id).reshape(3, -1)[:, :7] + jac = np.concatenate((jacp, jacr), axis=0) + return jac + + def findJointPosition(self, desiredPos=None, desiredQuat=None): + + eps = 1e-5 + IT_MAX = 1000 + DT = 1e-3 + + i = 0 + self.pgain = [ + 33.9403713446798, + 30.9403713446798, + 33.9403713446798, + 27.69370238555632, + 33.98706171459314, + 30.9185531893281, + ] + self.pgain_null = 5 * np.array([ + 7.675519770796831, + 2.676935478437176, + 8.539040163444975, + 1.270446361314313, + 8.87752182480855, + 2.186782233762969, + 4.414432577659688, + ]) + self.pgain_limit = 20 + + q = self.sim.data.qpos[:7].copy() + qd_d = np.zeros(q.shape) + oldErrNorm = np.inf + + # if (desiredPos is None): + # desiredPos = self.desiredTaskPosition[:3] + # + # if (desiredQuat is None): + # desiredQuat = self.desiredTaskPosition[3:] + + while True: + oldQ = q + q = q + DT * qd_d + + q = np.clip(q, q_min, q_max) + + # cartPos, orient = self.sim.data.site_xpos[self.rod_tip_site_id] + cartPos = self.sim.data.get_body_xpos("tcp") + orient = self.sim.data.get_body_xquat("tcp") + cpos_err = desiredPos - cartPos + + if np.linalg.norm(orient - + desiredQuat) > np.linalg.norm(orient + + desiredQuat): + orient = -orient + + cpos_err = np.clip(cpos_err, -0.1, 0.1) + cquat_err = np.clip( + get_quaternion_error(orient, desiredQuat), + -0.5, + 0.5, + ) + err = np.hstack((cpos_err, cquat_err)) + + errNorm = np.sum(cpos_err**2) + np.sum((orient - desiredQuat)**2) + + if errNorm > oldErrNorm: + q = oldQ + DT = DT * 0.7 + continue + else: + DT = DT * 1.025 + + if errNorm < eps: + success = True + break + if i >= IT_MAX: + success = False + break + + # if not i % 1: + #print('%d: error = %s, %s, %s' % (i, errNorm, oldErrNorm, DT)) + + oldErrNorm = errNorm + + J = self.getJacobian(q=q) + + Jw = J.dot(self.W) + + # J * W * J' + reg * I + JwJ_reg = Jw.dot(J.T) + self.J_reg * np.eye(J.shape[0]) + + # Null space movement + qd_null = self.pgain_null * (self.target_th_null - q) + + margin_to_limit = 0.1 + qd_null_limit = np.zeros(qd_null.shape) + qd_null_limit_max = self.pgain_limit * (q_max - margin_to_limit - + q) + qd_null_limit_min = self.pgain_limit * (q_min + margin_to_limit - + q) + qd_null_limit[q > q_max - margin_to_limit] += qd_null_limit_max[ + q > q_max - margin_to_limit] + qd_null_limit[q < q_min + margin_to_limit] += qd_null_limit_min[ + q < q_min + margin_to_limit] + + qd_null += qd_null_limit + # W J.T (J W J' + reg I)^-1 xd_d + (I - W J.T (J W J' + reg I)^-1 J qd_null + qd_d = np.linalg.solve(JwJ_reg, self.pgain * err - J.dot(qd_null)) + # qd_d = self.pgain * err + qd_d = self.W.dot(J.transpose()).dot(qd_d) + qd_null + + i += 1 + + # print("Final IK error (%d iterations): %s" % (i, errNorm)) + return q + + +if __name__ == "__main__": + env = BOX_PUSH_Env_Gym(enable_gravity_comp=True, frame_skip=10) + env.reset() + for j in range(60): + old_obs = env.reset() + done = False + for _ in range(100): + env.render(mode='human') + # action = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) + # action = np.array([0.0] * 7) + action = env.action_space.sample() + obs, reward, done, info = env.step(action) + qpos = env.sim.data.qpos + # qpos[:7] = [2.8973, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973] + # qpos[:7] = [-2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973] + # qvel = env.sim.data.qvel + # qvel[:7] = [0, 0, 0, 0, 0, 0, 0] + # env.set_state(qpos, qvel) + # print("diff between old and new obs: ", np.linalg.norm(obs - old_obs)) + old_obs = obs + print("========================================") diff --git a/fancy_gym/envs/mujoco/box_pushing/box_push_utils.py b/fancy_gym/envs/mujoco/box_pushing/box_push_utils.py new file mode 100644 index 0000000..e543607 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/box_push_utils.py @@ -0,0 +1,342 @@ +import matplotlib.pyplot as plt + +import numpy as np +import pandas as pd + +from gym.envs.mujoco.mujoco_env import MujocoEnv + +from scipy.interpolate import make_interp_spline + +# joint constraints for Franka robot +q_max = np.array([2.8973, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973]) +q_min = np.array( + [-2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973]) + +q_dot_max = np.array([2.1750, 2.1750, 2.1750, 2.1750, 2.6100, 2.6100, 2.6100]) +q_dot_dot_max = np.array([90., 90., 90., 90., 12., 12., 12.]) + +desired_rod_quat = np.array([0.0, 1.0, 0.0, 0.0]) + + +def get_quaternion_error(curr_quat, des_quat): + """ + Calculates the difference between the current quaternion and the desired quaternion. + See Siciliano textbook page 140 Eq 3.91 + + :param curr_quat: current quaternion + :param des_quat: desired quaternion + :return: difference between current quaternion and desired quaternion + """ + quatError = np.zeros((3, )) + + quatError[0] = (curr_quat[0] * des_quat[1] - des_quat[0] * curr_quat[1] - + curr_quat[3] * des_quat[2] + curr_quat[2] * des_quat[3]) + + quatError[1] = (curr_quat[0] * des_quat[2] - des_quat[0] * curr_quat[2] + + curr_quat[3] * des_quat[1] - curr_quat[1] * des_quat[3]) + + quatError[2] = (curr_quat[0] * des_quat[3] - des_quat[0] * curr_quat[3] - + curr_quat[2] * des_quat[1] + curr_quat[1] * des_quat[2]) + + return quatError + + +def rotation_distance(p: np.array, q: np.array): + """ + p: quaternion + q: quaternion + theta: rotation angle between p and q (rad) + """ + assert p.shape == q.shape, "p and q should be quaternion" + product = p[0] * q[0] + p[1] * q[1] + p[2] * q[2] + p[3] * q[3] + theta = 2 * np.arccos(abs(product)) + return theta + + +def joint_limit_violate_penalty(joint_pos, + joint_vel, + enable_pos_limit=False, + enable_vel_limit=False): + penalty = 0. + p_coeff = 1. + v_coeff = 1. + # q_limit + if enable_pos_limit: + higher_indice = np.where(joint_pos > q_max) + lower_indice = np.where(joint_pos < q_min) + higher_error = joint_pos - q_max + lower_error = q_min - joint_pos + penalty -= p_coeff * (abs(np.sum(higher_error[higher_indice])) + + abs(np.sum(lower_error[lower_indice]))) + # q_dot_limit + if enable_vel_limit: + q_dot_error = abs(joint_vel) - abs(q_dot_max) + q_dot_violate_idx = np.where(q_dot_error > 0.) + penalty -= v_coeff * abs(np.sum(q_dot_error[q_dot_violate_idx])) + return penalty + + +def rot_to_quat(theta, axis): + quant = np.zeros(4) + quant[0] = np.sin(theta / 2.) + quant[1] = np.cos(theta / 2.) * axis[0] + quant[2] = np.cos(theta / 2.) * axis[1] + quant[3] = np.cos(theta / 2.) * axis[2] + return quant + + +class PushBoxReward: + def __init__(self): + self.box_size = np.array([0.05, 0.05, 0.045]) + self.step_reward_joint_penalty = [] + self.step_reward_tcp = [] + self.step_reward_pos = [] + self.step_reward_rot = [] + self.step_reward_flip = [] + self.energy_cost = [] + + def get_reward_trajectory(self): + return np.array(self.step_reward_pos.copy()), np.array( + self.step_reward_rot.copy()) + + def reset(self): + self.step_reward_pos = [] + self.step_reward_rot = [] + self.step_reward_tcp = [] + self.step_reward_joint_penalty = [] + self.step_reward_flip = [] + self.energy_cost = [] + + def step_reward(self, box_pos, box_quat, target_pos, target_quat, + rod_tip_pos, rod_quat, qpos, qvel, action): + + joint_penalty = joint_limit_violate_penalty(qpos, + qvel, + enable_pos_limit=True, + enable_vel_limit=True) + tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100) + box_goal_pos_dist_reward = -3.5 * np.linalg.norm(box_pos - target_pos) + box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi + energy_cost = -0.0005 * np.sum(np.square(action)) + + reward = joint_penalty + tcp_box_dist_reward + \ + box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost + + rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat) + if rod_inclined_angle > np.pi / 4: + reward -= rod_inclined_angle / (np.pi) + + # self.step_reward_joint_penalty.append(joint_penalty) + # self.step_reward_tcp.append(tcp_box_dist_reward) + # self.step_reward_pos.append(box_goal_pos_dist_reward) + # self.step_reward_rot.append(box_goal_rot_dist_reward) + # self.energy_cost.append(energy_cost) + return reward + + def sparse1_reward(self, episodic_end, box_pos, box_quat, target_pos, + target_quat, rod_tip_pos, rod_quat, qpos, qvel, action): + + reward = 0. + joint_penalty = joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True) + energy_cost = -0.0005 * np.sum(np.square(action)) + tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100) + reward += joint_penalty + tcp_box_dist_reward + energy_cost + rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat) + + if rod_inclined_angle > np.pi / 4: + reward -= rod_inclined_angle / (np.pi) + + if not episodic_end: + return reward + + box_goal_dist = np.linalg.norm(box_pos - target_pos) + + box_goal_pos_dist_reward = -3.5 * box_goal_dist * 100 + box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi * 100 + + reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward + + return reward + + def sparse2_reward(self, episodic_end, box_pos, box_quat, target_pos, + target_quat, rod_tip_pos, rod_quat, qpos, qvel, action): + + reward = 0. + joint_penalty = joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True) + energy_cost = -0.0005 * np.sum(np.square(action)) + tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100) + reward += joint_penalty + tcp_box_dist_reward + energy_cost + rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat) + + if rod_inclined_angle > np.pi / 4: + reward -= rod_inclined_angle / (np.pi) + + if not episodic_end: + return reward + + box_goal_dist = np.linalg.norm(box_pos - target_pos) + + if box_goal_dist < 0.1: + reward += 300 + box_goal_pos_dist_reward = np.clip(- 3.5 * box_goal_dist * 100 * 3, -100, 0) + box_goal_rot_dist_reward = np.clip(- rotation_distance(box_quat, target_quat)/np.pi * 100 * 1.5, -100, 0) + reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward + + return reward + + def plotRewards(self): + length = np.array(self.step_reward_pos).shape[0] + t = np.arange(0, length) + fig, axs = plt.subplots(1, 1) + axs.plot(t, self.step_reward_pos, label='pos') + axs.plot(t, self.step_reward_rot, label='rot') + axs.plot(t, self.step_reward_tcp, label='tcp') + axs.plot(t, + np.array(self.step_reward_joint_penalty) / 0.5, + label='joint_penalty') + # axs.plot(t, self.step_reward_flip, label='flip') + axs.plot(t, self.energy_cost, label='energy_cost') + plt.legend(loc='upper right') + plt.xlabel('Steps') + plt.title('Reward') + plt.show() + +class TrajectoryRecoder(object): + def __init__(self, sim, prefix="episodic", max_length=1000): + self.sim = sim + self.prefix = prefix + self.max_length = max_length + self.reset() + + def update_sim(self, sim): + self.sim = sim + + def reset(self): + self.trajectory = [] + self.box_pos_dist_trajectory = [] # trajectory of box position distance to goal + self.box_rot_dist_trajectory = [] # trajectory of box rotation distance to goal + self.joint_pos_trajectory = [] # trajectory of joint position + self.joint_vel_trajectory = [] # trajectory of joint velocity + self.joint_torque_trajectory = [] # trajectory of joint torque + self.length = 0 + + def record(self): + if self.sim is None: + return + + self.joint_pos_trajectory.append(self.sim.data.qpos[:7].copy()) + self.joint_vel_trajectory.append(self.sim.data.qvel[:7].copy()) + self.joint_torque_trajectory.append(self.sim.data.qfrc_actuator[:7].copy()) + + box_pos = self.sim.data.get_body_xpos("box_0") + box_quat = self.sim.data.get_body_xquat("box_0") + target_pos = self.sim.data.get_body_xpos("replan_target_pos") + target_quat = self.sim.data.get_body_xquat("replan_target_pos") + + self.box_pos_dist_trajectory.append(np.linalg.norm(box_pos - target_pos)) + self.box_rot_dist_trajectory.append(rotation_distance(box_quat, target_quat)) + + self.length += 1 + if self.length > self.max_length: + self.joint_vel_trajectory.pop(0) + self.joint_pos_trajectory.pop(0) + self.joint_torque_trajectory.pop(0) + self.box_pos_dist_trajectory.pop(0) + self.box_rot_dist_trajectory.pop(0) + self.length -= 1 + + def get_trajectory(self): + return self.trajectory + + def get_length(self): + return self.length + + def plot_trajectories(self): + self.plot_trajectory(self.joint_pos_trajectory, + "joint_pos_trajectory") + self.plot_trajectory(self.joint_vel_trajectory, + "joint_vel_trajectory") + self.plot_trajectory(self.joint_torque_trajectory, + "joint_acc_trajectory") + self.plot_trajectory(self.box_pos_dist_trajectory, + "box_pos_dist_trajectory") + self.plot_trajectory(self.box_rot_dist_trajectory, + "box_rot_dist_trajectory") + + def plot_trajectory(self, trajectory, title: str): + if len(trajectory) == 0: + return + trajectory = np.array(trajectory) + length = trajectory.shape[0] + t = np.arange(0, length) + dim = trajectory.shape[1] if len(trajectory.shape) > 1 else 1 + fig, axs = plt.subplots(dim, 1, sharex=True) + if dim == 1: + axs.plot(t, trajectory) + else: + for i in range(dim): + axs[i].plot(t, trajectory[:, i]) + # plt.legend(loc='upper right') + plt.xlabel('Steps') + plt.title(self.prefix + ": " + title) + plt.show() + + def plot_box_trajectory(self): + pass + + def save_trajectory(self, path: str): + + joint_pos_trajectory = np.array(self.joint_pos_trajectory) + joint_vel_trajectory = np.array(self.joint_vel_trajectory) + joint_torque_trajectory = np.array(self.joint_torque_trajectory) + box_pos_dist_trajectory = np.array(self.box_pos_dist_trajectory) + box_rot_dist_trajectory = np.array(self.box_rot_dist_trajectory) + pd_dict = {} + if joint_pos_trajectory.shape[0] > 0: + for i in range(7): + pd_dict["qpos_" + str(i)] = joint_pos_trajectory[:, i] + pd_dict["qvel_" + str(i)] = joint_vel_trajectory[:, i] + pd_dict["qfrc_" + str(i)] = joint_torque_trajectory[:, i] + pd_dict["box_pos_dist"] = box_pos_dist_trajectory + pd_dict["box_rot_dist"] = box_rot_dist_trajectory + + df = pd.DataFrame(pd_dict) + folder_path = "/home/i53/student/hongyi_zhou/py_ws/alr_envs/alr_envs/alr/mujoco/box_pushing/recorded_trajectory/" + save_path = folder_path + path + ".csv" + df.to_csv(save_path) + +def plot_trajectories(traj_dict:dict, traj_name:str): + + fig, axs = plt.subplots(1, 1, sharex=True) + + for key in traj_dict.keys(): + t = traj_dict[key][traj_name].shape[0] + t = np.arange(0, t) + spline = make_interp_spline(t, traj_dict[key][traj_name]) + t_ = np.linspace(t.min(), t.max(), 1000) + y_ = spline(t_) + axs.plot(t_, y_, label=key) + + plt.legend(loc='upper right') + plt.title(traj_name) + plt.xlabel('Steps') + plt.show() + + +if __name__ == "__main__": + + ## sparse1 nodistcheck sparse2 withdistcheck + folder_path = "/home/i53/student/hongyi_zhou/py_ws/alr_envs/alr_envs/alr/mujoco/box_pushing/recorded_trajectory/" + exp_name = ["ppo_dense", "promp_dense_reward", "promp_sparse_nodistcheck", "promp_sparse_withdistcheck"] + # exp_name = ["promp_sparse_nodistcheck"] + trajectory_dict = { } + for name in exp_name: + data_path = folder_path + name + ".csv" + df = pd.read_csv(data_path) + trajectory_dict[name] = df + + # plot_trajectories(trajectory_dict, "box_pos_dist") + # plot_trajectories(trajectory_dict, "box_rot_dist") + plot_trajectories(trajectory_dict, "qpos_6") + plot_trajectories(trajectory_dict, "qvel_6") + plot_trajectories(trajectory_dict, "qfrc_6") diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py new file mode 100644 index 0000000..73a2412 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -0,0 +1,248 @@ +import os + +import numpy as np +from gym import utils, spaces +from gym.envs.mujoco import MujocoEnv +from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance +from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max +from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import BoxPushingReward + +import mujoco + +MAX_EPISODE_STEPS_BOX_PUSHING = 1000 + +BOX_POS_BOUND = np.array([[0.3, -0.45, -0.01], [0.6, 0.45, -0.01]]) + +class BoxPushingEnv(MujocoEnv, utils.EzPickle): + """ + franka box pushing environment + action space: + normalized joints torque * 7 , range [-1, 1] + observation space: + + rewards: + 1. dense reward + 2. time-depend sparse reward + 3. time-spatial-depend sparse reward + """ + + def __init__(self, reward_type: str = "dense", frame_skip: int = 10): + utils.EzPickle.__init__(**locals()) + self._steps = 0 + self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.]) + self.init_qvel_box_pushing = np.zeros(15) + assert reward_type in ["dense", "temporal_sparse", "temporal_spatial_sparse"], "unrecognized reward type" + self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max) + MujocoEnv.__init__(self, + model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"), + frame_skip=frame_skip, + mujoco_bindings="mujoco") + self.action_space = spaces.Box(low=-1, high=1, shape=(7,)) + + def step(self, action): + episode_end = False + action = 10 * np.clip(action, self.action_space.low, self.action_space.high) + resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max) + + unstable_simulation = False + + try: + self.do_simulation(resultant_action, self.frame_skip) + except Exception as e: + print(e) + unstable_simulation = True + + self._steps += 1 + if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING: + episode_end = True + + box_pos = self.data.body("box_0").xpos.copy() + box_quat = self.data.body("box_0").xquat.copy() + target_pos = self.data.body("replan_target_pos").xpos.copy() + target_quat = self.data.body("replan_target_pos").xquat.copy() + rod_tip_pos = self.data.site("rod_tip").xpos.copy() + rod_quat = self.data.body("push_rod").xquat.copy() + qpos = self.data.qpos[:7].copy() + qvel = self.data.qvel[:7].copy() + + if not unstable_simulation: + reward = self.reward.get_reward(episode_end, box_pos, box_quat, target_pos, target_quat, + rod_tip_pos, rod_quat, qpos, qvel, action) + else: + reward = -50 + + obs = self._get_obs() + infos = dict() + return obs, reward, episode_end, infos + + def reset_model(self): + # rest box to initial position + self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing) + box_init_pos = np.array([0.4, 0.3, -0.01, 0.0, 0.0, 0.0, 1.0]) + self.data.body("box_0").xpos = box_init_pos[:3] + self.data.body("box_0").xquat = box_init_pos[3:] + + # set target position + box_target_pos = self.sample_context() + self.model.body_pos[2] = box_target_pos[:3] + self.model.body_quat[2] = box_target_pos[-4:] + self.model.body_pos[3] = box_target_pos[:3] + self.model.body_quat[3] = box_target_pos[-4:] + + # set the robot to the right configuration (rod tip in the box) + desired_tcp_pos = box_target_pos[:3] + np.array([0.0, 0.0, 0.15]) + desired_tcp_quat = np.array([0, 1, 0, 0]) + desired_joint_pos = self.calculateOfflineIK(desired_tcp_pos, desired_tcp_quat) + self.data.qpos[:7] = desired_joint_pos + + mujoco.mj_forward(self.model, self.data) + self._steps = 0 + + return self._get_obs() + + def sample_context(self): + pos = np.random.uniform(low=BOX_POS_BOUND[0], high=BOX_POS_BOUND[1], size=3) + theta = np.random.uniform(low=0, high=np.pi * 2) + quat = rot_to_quat(theta, np.array([0, 0, 1])) + return np.concatenate([pos, quat]) + + def _get_obs(self): + obs = np.concatenate([ + self.data.qpos[:7].copy(), # joint position + self.data.qvel[:7].copy(), # joint velocity + self.data.qfrc_bias[:7].copy(), # joint gravity compensation + self.data.site("rod_tip").xpos.copy(), # position of rod tip + self.data.body("push_rod").xquat.copy(), # orientation of rod + self.data.body("box_0").xpos.copy(), # position of box + self.data.body("box_0").xquat.copy(), # orientation of box + self.data.body("replan_target_pos").xpos.copy(), # position of target + self.data.body("replan_target_pos").xquat.copy() # orientation of target + ]) + return obs + + def get_body_jacp(self, name): + id = mujoco.mj_name2id(self.model, 1, name) + jacp = np.zeros((3, self.model.nv)) + mujoco.mj_jacBody(self.model, self.data, jacp, None, id) + return jacp + + def get_body_jacr(self, name): + id = mujoco.mj_name2id(self.model, 1, name) + jacr = np.zeros((3, self.model.nv)) + mujoco.mj_jacBody(self.model, self.data, None, jacr, id) + return jacr + + def calculateOfflineIK(self, desired_cart_pos, desired_cart_quat): + """ + calculate offline inverse kinematics for franka pandas + :param desired_cart_pos: desired cartesian position of tool center point + :param desired_cart_quat: desired cartesian quaternion of tool center point + :return: joint angles + """ + J_reg = 1e-6 + w = np.diag([1, 1, 1, 1, 1, 1, 1]) + target_theta_null = np.array([ + 3.57795216e-09, + 1.74532920e-01, + 3.30500960e-08, + -8.72664630e-01, + -1.14096181e-07, + 1.22173047e00, + 7.85398126e-01]) + eps = 1e-5 # threshold for convergence + IT_MAX = 1000 + dt = 1e-3 + i = 0 + pgain = [ + 33.9403713446798, + 30.9403713446798, + 33.9403713446798, + 27.69370238555632, + 33.98706171459314, + 30.9185531893281, + ] + pgain_null = 5 * np.array([ + 7.675519770796831, + 2.676935478437176, + 8.539040163444975, + 1.270446361314313, + 8.87752182480855, + 2.186782233762969, + 4.414432577659688, + ]) + pgain_limit = 20 + q = self.data.qpos[:7].copy() + qd_d = np.zeros(q.shape) + old_err_norm = np.inf + + while True: + q_old = q + q = q + dt * qd_d + q = np.clip(q, q_min, q_max) + current_cart_pos = self.data.body("tcp").xpos.copy() + current_cart_quat = self.data.body("tcp").xquat.copy() + + cart_pos_error = np.clip(desired_cart_pos - current_cart_pos, -0.1, 0.1) + + if np.linalg.norm(current_cart_quat - desired_cart_quat) > np.linalg.norm(current_cart_quat + desired_cart_quat): + current_cart_quat = -current_cart_quat + cart_quat_error = np.clip(get_quaternion_error(current_cart_quat, desired_cart_quat), -0.5, 0.5) + + err = np.hstack((cart_pos_error, cart_quat_error)) + err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2) + print("err_norm: {}, old_err_norm: {}".format(err_norm, old_err_norm)) + if err_norm > old_err_norm: + q = q_old + dt = 0.7 * dt + continue + else: + dt = 1.025 * dt + + if err_norm < eps: + break + if i > IT_MAX: + break + + old_err_norm = err_norm + + ### get Jacobian by mujoco + self.data.qpos[:7] = q + mujoco.mj_forward(self.model, self.data) + jacp = self.get_body_jacp("tcp")[:, :7].copy() + jacr = self.get_body_jacr("tcp")[:, :7].copy() + J = np.concatenate((jacp, jacr), axis=0) + + Jw = J.dot(w) + + # J * W * J.T + J_reg * I + JwJ_reg = Jw.dot(J.T) + J_reg * np.eye(J.shape[0]) + + # Null space velocity, points to home position + qd_null = pgain_null * (target_theta_null - q) + + margin_to_limit = 0.1 + qd_null_limit = np.zeros(qd_null.shape) + qd_null_limit_max = pgain_limit * (q_max - margin_to_limit - q) + qd_null_limit_min = pgain_limit * (q_min + margin_to_limit - q) + qd_null_limit[q > q_max - margin_to_limit] += qd_null_limit_max[q > q_max - margin_to_limit] + qd_null_limit[q < q_min + margin_to_limit] += qd_null_limit_min[q < q_min + margin_to_limit] + qd_null += qd_null_limit + + # W J.T (J W J' + reg I)^-1 xd_d + (I - W J.T (J W J' + reg I)^-1 J qd_null + qd_d = np.linalg.solve(JwJ_reg, pgain * err - J.dot(qd_null)) + + qd_d = w.dot(J.transpose()).dot(qd_d) + qd_null + + i += 1 + + return q + +if __name__=="__main__": + env = BoxPushingEnv(reward_type="dense", frame_skip=10) + env.reset() + for i in range(100): + env.reset() + for _ in range(100): + env.render("human") + action = env.action_space.sample() + obs, reward, done, info = env.step(action) diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py new file mode 100644 index 0000000..cfa15ff --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py @@ -0,0 +1,193 @@ +import numpy as np + + +# joint constraints for Franka robot +q_max = np.array([2.8973, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973]) +q_min = np.array([-2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973]) + +q_dot_max = np.array([2.1750, 2.1750, 2.1750, 2.1750, 2.6100, 2.6100, 2.6100]) +q_torque_max = np.array([90., 90., 90., 90., 12., 12., 12.]) +# +desired_rod_quat = np.array([0.0, 1.0, 0.0, 0.0]) + +def get_quaternion_error(curr_quat, des_quat): + """ + Calculates the difference between the current quaternion and the desired quaternion. + See Siciliano textbook page 140 Eq 3.91 + + :param curr_quat: current quaternion + :param des_quat: desired quaternion + :return: difference between current quaternion and desired quaternion + """ + quatError = np.zeros((3, )) + + quatError[0] = (curr_quat[0] * des_quat[1] - des_quat[0] * curr_quat[1] - + curr_quat[3] * des_quat[2] + curr_quat[2] * des_quat[3]) + + quatError[1] = (curr_quat[0] * des_quat[2] - des_quat[0] * curr_quat[2] + + curr_quat[3] * des_quat[1] - curr_quat[1] * des_quat[3]) + + quatError[2] = (curr_quat[0] * des_quat[3] - des_quat[0] * curr_quat[3] - + curr_quat[2] * des_quat[1] + curr_quat[1] * des_quat[2]) + + return quatError + + +def rotation_distance(p: np.array, q: np.array): + """ + p: quaternion + q: quaternion + theta: rotation angle between p and q (rad) + """ + assert p.shape == q.shape, "p and q should be quaternion" + product = p[0] * q[0] + p[1] * q[1] + p[2] * q[2] + p[3] * q[3] + theta = 2 * np.arccos(abs(product)) + return theta + + +def rot_to_quat(theta, axis): + quant = np.zeros(4) + quant[0] = np.sin(theta / 2.) + quant[1] = np.cos(theta / 2.) * axis[0] + quant[2] = np.cos(theta / 2.) * axis[1] + quant[3] = np.cos(theta / 2.) * axis[2] + return quant + + + +class RewardBase(): + def __init__(self, q_max, q_min, q_dot_max): + self._reward = 0. + self._done = False + self._q_max = q_max + self._q_min = q_min + self._q_dot_max = q_dot_max + + def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat, + rod_tip_pos, rod_quat, qpos, qvel, action): + raise NotImplementedError + + def _joint_limit_violate_penalty(self, + qpos, + qvel, + enable_pos_limit=False, + enable_vel_limit=False): + penalty = 0. + p_coeff = 1. + v_coeff = 1. + # q_limit + if enable_pos_limit: + higher_indice = np.where(qpos > self._q_max) + lower_indice = np.where(qpos < self._q_min) + higher_error = qpos - self._q_max + lower_error = self._q_min - qpos + penalty -= p_coeff * (abs(np.sum(higher_error[higher_indice])) + + abs(np.sum(lower_error[lower_indice]))) + # q_dot_limit + if enable_vel_limit: + q_dot_error = abs(qvel) - abs(self._q_dot_max) + q_dot_violate_idx = np.where(q_dot_error > 0.) + penalty -= v_coeff * abs(np.sum(q_dot_error[q_dot_violate_idx])) + return penalty + + + + +class DenseReward(RewardBase): + def __init__(self, q_max, q_min, q_dot_max): + super(DenseReward, self).__init__(q_max, q_min, q_dot_max) + + def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat, + rod_tip_pos, rod_quat, qpos, qvel, action): + joint_penalty = self._joint_limit_violate_penalty(qpos, + qvel, + enable_pos_limit=True, + enable_vel_limit=True) + tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100) + box_goal_pos_dist_reward = -3.5 * np.linalg.norm(box_pos - target_pos) + box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi + energy_cost = -0.0005 * np.sum(np.square(action)) + + reward = joint_penalty + tcp_box_dist_reward + \ + box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost + + rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat) + if rod_inclined_angle > np.pi / 4: + reward -= rod_inclined_angle / (np.pi) + + return reward + + + + +class TemporalSparseReward(RewardBase): + def __init__(self, q_max, q_min, q_dot_max): + super(TemporalSparseReward, self).__init__(q_max, q_min, q_dot_max) + + def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat, + rod_tip_pos, rod_quat, qpos, qvel, action): + reward = 0. + joint_penalty = self._joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True) + energy_cost = -0.0005 * np.sum(np.square(action)) + tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100) + reward += joint_penalty + tcp_box_dist_reward + energy_cost + rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat) + + if rod_inclined_angle > np.pi / 4: + reward -= rod_inclined_angle / (np.pi) + + if not episodic_end: + return reward + + box_goal_dist = np.linalg.norm(box_pos - target_pos) + + box_goal_pos_dist_reward = -3.5 * box_goal_dist * 100 + box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi * 100 + + reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward + + return reward + + + + +class TemporalSpatialSparseReward(RewardBase): + def __init__(self, q_max, q_min, q_dot_max): + super(TemporalSpatialSparseReward, self).__init__(q_max, q_min, q_dot_max) + + def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat, + rod_tip_pos, rod_quat, qpos, qvel, action): + reward = 0. + joint_penalty = self._joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True) + energy_cost = -0.0005 * np.sum(np.square(action)) + tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100) + reward += joint_penalty + tcp_box_dist_reward + energy_cost + rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat) + + if rod_inclined_angle > np.pi / 4: + reward -= rod_inclined_angle / (np.pi) + + if not episodic_end: + return reward + + box_goal_dist = np.linalg.norm(box_pos - target_pos) + + if box_goal_dist < 0.1: + reward += 300 + box_goal_pos_dist_reward = np.clip(- 3.5 * box_goal_dist * 100 * 3, -100, 0) + box_goal_rot_dist_reward = np.clip(- rotation_distance(box_quat, target_quat)/np.pi * 100 * 1.5, -100, 0) + reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward + + return reward + + + +def BoxPushingReward(reward_type, q_max, q_min, q_dot_max): + if reward_type == 'dense': + return DenseReward(q_max, q_min, q_dot_max) + elif reward_type == 'temporal_sparse': + return TemporalSparseReward(q_max, q_min, q_dot_max) + elif reward_type == 'temporal_spatial_sparse': + return TemporalSpatialSparseReward(q_max, q_min, q_dot_max) + else: + raise NotImplementedError \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box.xml new file mode 100644 index 0000000..3e71094 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box.xml @@ -0,0 +1,9 @@ + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box_with_lid.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box_with_lid.xml new file mode 100644 index 0000000..e4bc5d6 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box_with_lid.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/cubic_with_sites.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/cubic_with_sites.xml new file mode 100644 index 0000000..e88e02f --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/cubic_with_sites.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/goal.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/goal.xml new file mode 100644 index 0000000..74bd7a9 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/goal.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/push_box.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/push_box.xml new file mode 100644 index 0000000..25313f8 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/push_box.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/tray.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/tray.xml new file mode 100644 index 0000000..cca0617 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/tray.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_mocap_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_mocap_control.xml new file mode 100644 index 0000000..a36c987 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_mocap_control.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_position_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_position_control.xml new file mode 100644 index 0000000..c83939a --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_position_control.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_torque_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_torque_control.xml new file mode 100644 index 0000000..7b70a75 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_torque_control.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_velocity_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_velocity_control.xml new file mode 100644 index 0000000..4fda9de --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_velocity_control.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/depr/panda_gym.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/depr/panda_gym.xml new file mode 100644 index 0000000..27f3415 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/depr/panda_gym.xml @@ -0,0 +1,157 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/d435v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/d435v.stl new file mode 100644 index 0000000..809a0da Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/d435v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/finger.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/finger.stl new file mode 100644 index 0000000..3b87289 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/finger.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/fingerv.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/fingerv.stl new file mode 100644 index 0000000..0b11382 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/fingerv.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/hand.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/hand.stl new file mode 100644 index 0000000..4e82090 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/hand.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/handv.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/handv.stl new file mode 100644 index 0000000..92f60bd Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/handv.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0.stl new file mode 100644 index 0000000..def070c Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0v.stl new file mode 100644 index 0000000..72dba55 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1.stl new file mode 100644 index 0000000..426bcf2 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1v.stl new file mode 100644 index 0000000..b42f97b Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2.stl new file mode 100644 index 0000000..b369f15 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2v.stl new file mode 100644 index 0000000..d72bbe4 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3.stl new file mode 100644 index 0000000..25162ee Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3v.stl new file mode 100644 index 0000000..904de9d Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4.stl new file mode 100644 index 0000000..76c8c33 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4v.stl new file mode 100644 index 0000000..da74ed6 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5.stl new file mode 100644 index 0000000..3006a0b Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5v.stl new file mode 100644 index 0000000..f795374 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6.stl new file mode 100644 index 0000000..2e9594a Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6v.stl new file mode 100644 index 0000000..8b2a7f3 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7.stl new file mode 100644 index 0000000..0532d05 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7v.stl new file mode 100644 index 0000000..82b5946 Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7v.stl differ diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda.xml new file mode 100644 index 0000000..8718e42 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda.xml @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_bimanual.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_bimanual.xml new file mode 100644 index 0000000..b970726 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_bimanual.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_mocap.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_mocap.xml new file mode 100644 index 0000000..7de5c38 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_mocap.xml @@ -0,0 +1,140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_rod.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_rod.xml new file mode 100644 index 0000000..0a3e433 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_rod.xml @@ -0,0 +1,159 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/pandas_kit_lab.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/pandas_kit_lab.xml new file mode 100644 index 0000000..b970726 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/pandas_kit_lab.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/surroundings/assembled_scene.xml b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/assembled_scene.xml new file mode 100644 index 0000000..832c53e --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/assembled_scene.xml @@ -0,0 +1,42 @@ + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/surroundings/base.xml b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/base.xml new file mode 100644 index 0000000..7a40fbd --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/base.xml @@ -0,0 +1,19 @@ + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/surroundings/kit_lab_surrounding.xml b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/kit_lab_surrounding.xml new file mode 100644 index 0000000..ca60a9c --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/kit_lab_surrounding.xml @@ -0,0 +1,118 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fancy_gym/envs/mujoco/box_pushing/model/workspace/large_workspace.xml b/fancy_gym/envs/mujoco/box_pushing/model/workspace/large_workspace.xml new file mode 100644 index 0000000..a92eaf4 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/workspace/large_workspace.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/workspace/medium_workspace.xml b/fancy_gym/envs/mujoco/box_pushing/model/workspace/medium_workspace.xml new file mode 100644 index 0000000..7c0bf13 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/workspace/medium_workspace.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/model/workspace/small_workspace.xml b/fancy_gym/envs/mujoco/box_pushing/model/workspace/small_workspace.xml new file mode 100644 index 0000000..1a34694 --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/model/workspace/small_workspace.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py new file mode 100644 index 0000000..172005a --- /dev/null +++ b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py @@ -0,0 +1,32 @@ +from typing import Union, Tuple + +import numpy as np + +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper + + +class MPWrapper(RawInterfaceWrapper): + + # Random x goal + random init pos + @property + def context_mask(self): + return np.hstack([ + [False] * 7, # joints position + [False] * 7, # joints velocity + [False] * 7, # joints gravity compensation + [False] * 3, # position of rod tip + [False] * 4, # orientation of rod + [True] * 3, # position of box + [True] * 4, # orientation of box + [True] * 3, # position of target + [True] * 4, # orientation of target + [True] * 1, # time + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.data.qpos[3:6].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.data.qvel[3:6].copy() diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index da7c94d..f1c25e2 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -155,12 +155,12 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = False + render = True # DMP - example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) # Altered basis functions obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)