diff --git a/fancy_gym/envs/mujoco/box_pushing/__init__.py b/fancy_gym/envs/mujoco/box_pushing/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/box_pushing.xml b/fancy_gym/envs/mujoco/box_pushing/assets/box_pushing.xml
new file mode 100644
index 0000000..516482f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/box_pushing.xml
@@ -0,0 +1,42 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/kit_lab_surrounding.xml b/fancy_gym/envs/mujoco/box_pushing/assets/kit_lab_surrounding.xml
new file mode 100644
index 0000000..ca60a9c
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/kit_lab_surrounding.xml
@@ -0,0 +1,118 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/d435v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/d435v.stl
new file mode 100644
index 0000000..809a0da
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/d435v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/finger.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/finger.stl
new file mode 100644
index 0000000..3b87289
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/finger.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/fingerv.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/fingerv.stl
new file mode 100644
index 0000000..0b11382
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/fingerv.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/hand.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/hand.stl
new file mode 100644
index 0000000..4e82090
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/hand.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/handv.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/handv.stl
new file mode 100644
index 0000000..92f60bd
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/handv.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0.stl
new file mode 100644
index 0000000..def070c
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0v.stl
new file mode 100644
index 0000000..72dba55
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link0v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1.stl
new file mode 100644
index 0000000..426bcf2
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1v.stl
new file mode 100644
index 0000000..b42f97b
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link1v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2.stl
new file mode 100644
index 0000000..b369f15
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2v.stl
new file mode 100644
index 0000000..d72bbe4
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link2v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3.stl
new file mode 100644
index 0000000..25162ee
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3v.stl
new file mode 100644
index 0000000..904de9d
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link3v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4.stl
new file mode 100644
index 0000000..76c8c33
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4v.stl
new file mode 100644
index 0000000..da74ed6
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link4v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5.stl
new file mode 100644
index 0000000..3006a0b
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5v.stl
new file mode 100644
index 0000000..f795374
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link5v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6.stl
new file mode 100644
index 0000000..2e9594a
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6v.stl
new file mode 100644
index 0000000..8b2a7f3
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link6v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7.stl
new file mode 100644
index 0000000..0532d05
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7v.stl b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7v.stl
new file mode 100644
index 0000000..82b5946
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/assets/meshes/panda/link7v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/panda_rod.xml b/fancy_gym/envs/mujoco/box_pushing/assets/panda_rod.xml
new file mode 100644
index 0000000..ac85629
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/panda_rod.xml
@@ -0,0 +1,159 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/push_box.xml b/fancy_gym/envs/mujoco/box_pushing/assets/push_box.xml
new file mode 100644
index 0000000..25313f8
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/push_box.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_mocap_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_mocap_control.xml
new file mode 100644
index 0000000..a36c987
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_mocap_control.xml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_position_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_position_control.xml
new file mode 100644
index 0000000..c83939a
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_position_control.xml
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_torque_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_torque_control.xml
new file mode 100644
index 0000000..7b70a75
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_torque_control.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_velocity_control.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_velocity_control.xml
new file mode 100644
index 0000000..4fda9de
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/controller/panda_velocity_control.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/depr/panda_gym.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/depr/panda_gym.xml
new file mode 100644
index 0000000..27f3415
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/depr/panda_gym.xml
@@ -0,0 +1,157 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda.xml
new file mode 100644
index 0000000..8718e42
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda.xml
@@ -0,0 +1,155 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_bimanual.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_bimanual.xml
new file mode 100644
index 0000000..b970726
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_bimanual.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_mocap.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_mocap.xml
new file mode 100644
index 0000000..7de5c38
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/panda_mocap.xml
@@ -0,0 +1,140 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/robots/pandas_kit_lab.xml b/fancy_gym/envs/mujoco/box_pushing/assets/robots/pandas_kit_lab.xml
new file mode 100644
index 0000000..b970726
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/robots/pandas_kit_lab.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/assets/surroundings/base.xml b/fancy_gym/envs/mujoco/box_pushing/assets/surroundings/base.xml
new file mode 100644
index 0000000..7a40fbd
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/assets/surroundings/base.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/box_push_env.py b/fancy_gym/envs/mujoco/box_pushing/box_push_env.py
new file mode 100644
index 0000000..63dd8b5
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/box_push_env.py
@@ -0,0 +1,416 @@
+import os
+import time
+
+import numpy as np
+import mujoco_py
+from gym import utils, spaces
+from gym.envs.mujoco import MujocoEnv
+# from alr_envs.alr.mujoco.box_pushing.box_push_utils import PushBoxReward, rot_to_quat, q_max, q_min, get_quaternion_error
+# from alr_envs.alr.mujoco.box_pushing.box_push_utils import q_dot_dot_max, rotation_distance
+# from alr_envs.alr.mujoco.box_pushing.box_push_utils import TrajectoryRecoder
+from fancy_gym.envs.mujoco.box_pushing.box_push_utils import PushBoxReward, rot_to_quat, q_max, q_min, get_quaternion_error
+from fancy_gym.envs.mujoco.box_pushing.box_push_utils import q_dot_dot_max, rotation_distance
+from fancy_gym.envs.mujoco.box_pushing.box_push_utils import TrajectoryRecoder
+
+MAX_EPISODE_STEPS = 1000
+
+INIT_BOX_POS_BOUND = np.array([[0.3, -0.45, -0.01], [0.6, 0.45, -0.01]])
+
+
+class BOX_PUSH_Env_Gym(MujocoEnv, utils.EzPickle):
+ def __init__(self, enable_gravity_comp=False, frame_skip=1, reward_type="Dense"):
+ model_path = os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml")
+ self.reward_type = reward_type
+ assert reward_type in ["Dense", "Sparse1", "Sparse2"], "reward_type must be one of Dense, Sparse1, Sparse2"
+ self.enable_gravity_comp = enable_gravity_comp
+ self.frame_skip = frame_skip
+ self.max_episode_steps = MAX_EPISODE_STEPS // self.frame_skip
+
+ self.time_steps = 0
+ self.init_qpos_box_push = np.array([
+ 0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0.,
+ 0.
+ ])
+ self.init_qvel_box_push = np.zeros(15)
+ self._id_set = False
+ self.reward = PushBoxReward()
+
+ # utilities for IK
+ self.J_reg = 1e-6
+ self.W = np.diag([1, 1, 1, 1, 1, 1, 1])
+ self.target_th_null = np.array([
+ 3.57795216e-09,
+ 1.74532920e-01,
+ 3.30500960e-08,
+ -8.72664630e-01,
+ -1.14096181e-07,
+ 1.22173047e00,
+ 7.85398126e-01,
+ ])
+
+ self.torque_bound_low = -q_dot_dot_max
+ self.torque_bound_high = q_dot_dot_max
+
+ self.episode_energy = 0.
+ self.episode_end_pos_dist = 0.
+ self.episode_end_rot_dist = 0.
+ # self.trajectory_recorder = TrajectoryRecoder(None, max_length=100)
+ # end of IK utilities
+ super(BOX_PUSH_Env_Gym, self).__init__(model_path=model_path,
+ frame_skip=self.frame_skip,
+ mujoco_bindings="mujoco_py")
+ utils.EzPickle.__init__(self)
+
+ action_space_low = np.array([-1.0] * 7)
+ action_space_high = np.array([1.0] * 7)
+ self.action_space = spaces.Box(low=action_space_low,
+ high=action_space_high,
+ dtype='float32')
+ # self.trajectory_recorder.update_sim(self.sim)
+
+ def _set_ids(self):
+ self.box_id = self.sim.model._body_name2id["box_0"]
+ self.target_id = self.sim.model._body_name2id["target_pos"]
+ self.rod_tip_site_id = self.sim.model.site_name2id("rod_tip")
+ self._id_set = True
+
+ def sample_context(self):
+ # return np.random.uniform(INIT_BOX_POS_BOUND[0],
+ # INIT_BOX_POS_BOUND[1],
+ # size=INIT_BOX_POS_BOUND[0].shape)
+ # pos = np.random.uniform(INIT_BOX_POS_BOUND[0],
+ # INIT_BOX_POS_BOUND[1],
+ # size=INIT_BOX_POS_BOUND[0].shape)
+ pos = np.array([0.4, 0.3, -0.01]) # was 0.45 0.4
+ # theta = np.random.uniform(0, np.pi * 2)
+ theta = 0.0
+ quat = rot_to_quat(theta, np.array([0, 0, 1]))
+ return np.concatenate((pos, quat))
+
+ def generate_specified_context(self, goal_x, goal_y, goal_rot):
+ pos = np.array([goal_x, goal_y, -0.01])
+ quat = rot_to_quat(goal_rot, np.array([0, 0, 1]))
+ return np.concatenate((pos, quat))
+
+ def reset_model(self):
+ self.reward.reset()
+
+ self.episode_energy = 0.
+ self.episode_end_pos_dist = 0.
+ self.episode_end_rot_dist = 0.
+
+ self.set_state(self.init_qpos_box_push, self.init_qvel_box_push)
+ box_init_pos = self.sample_context()
+ box_init_pos[0] = 0.4
+ box_init_pos[1] = 0.3
+ box_init_pos[-4:] = np.array([0, 0, 0, 1.])
+
+ box_target_pos = self.sample_context()
+
+ # if both box and target are in the same position, sample again
+ # while np.linalg.norm(box_init_pos[:3] - box_target_pos[:3]) < 0.3:
+ # box_target_pos = self.sample_context()
+ box_target_pos[0] = 0.4 # was 0.4
+ box_target_pos[1] = -0.3 # was -0.3
+ # box_target_pos = self.generate_specified_context(0.45, -0.25, np.pi)
+
+ self.sim.model.body_pos[2] = box_target_pos[:3]
+ self.sim.model.body_quat[2] = box_target_pos[-4:]
+
+ self.sim.model.body_pos[3] = box_target_pos[:3]
+ self.sim.model.body_quat[3] = box_target_pos[-4:]
+
+ desired_ee_pos = box_init_pos[:3].copy()
+ desired_ee_pos[2] += 0.15
+ desired_ee_quat = np.array([0, 1, 0, 0])
+ desired_joint_pos = self.findJointPosition(desired_ee_pos,
+ desired_ee_quat)
+ self.sim.data.qpos[:7] = desired_joint_pos
+ self.sim.data.set_joint_qpos('box_joint', box_init_pos)
+ # self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
+ self.sim.forward()
+ self.time_steps = 0
+
+ # reset trajectory recorder
+ # self.trajectory_recorder.plot_trajectories()
+ # self.trajectory_recorder.save_trajectory("ppo_dense")
+ # self.trajectory_recorder.reset()
+
+ return self._get_obs()
+
+ def _get_obs(self):
+ box_pos = self.sim.data.get_body_xpos("box_0")
+ box_quat = self.sim.data.get_body_xquat("box_0")
+ target_pos = self.sim.data.get_body_xpos("replan_target_pos")
+ target_quat = self.sim.data.get_body_xquat("replan_target_pos")
+ rod_tip_pos = self.sim.data.site_xpos[self.rod_tip_site_id]
+ rod_quat = self.sim.data.get_body_xquat("push_rod")
+ obs = np.concatenate([
+ self.sim.data.qpos[:7].copy(),
+ self.sim.data.qvel[:7].copy(),
+ self.sim.data.qfrc_bias[:7].copy(),
+ # self.sim.data.qfrc_actuator[:7].copy(),
+ rod_tip_pos,
+ rod_quat,
+ box_pos,
+ box_quat,
+ target_pos,
+ target_quat
+ # np.array([self.time_steps / 100.])
+ ])
+ return obs
+
+ def step(self, action):
+ if not self._id_set:
+ self._set_ids()
+ invalid_flag = False
+ done = False
+ self.time_steps += 1
+
+ action = np.clip(action, self.action_space.low, self.action_space.high)
+
+ if self.enable_gravity_comp:
+ action = action * 10. # rescale action
+ resultant_action = action + self.sim.data.qfrc_bias[:7].copy()
+ else:
+ resultant_action = action * 10.
+
+ resultant_action = np.clip(resultant_action, self.torque_bound_low,
+ self.torque_bound_high)
+
+ # the replan_target_pos was align with the target_pos before 800
+ # if self.time_steps == 20:
+ # new_target_pos = np.array([1., 1., -0.01])
+ # new_target_quat = np.array([0, 1, 0, 0])
+ # while new_target_pos[0] < 0.4 or new_target_pos[0] > 0.6 or abs(
+ # new_target_pos[1]) > 0.45:
+ # pos_change = np.random.uniform(-0.25, 0.2, 3)
+ # pos_change[-1] = 0.
+ # # pos_change[-2] = 0.
+ # # self.sim.model.body_pos[3] = self.sim.data.get_body_xpos("target_pos") + pos_change
+ # # self.sim.model.body_quat[3] = self.sim.data.get_body_xquat("target_pos")
+ # new_target_pos = self.sim.data.get_body_xpos(
+ # "target_pos") + pos_change
+ # # new_target_quat = self.sim.data.get_body_xquat("target_pos")
+ # old_target_quat = self.sim.data.get_body_xquat("target_pos")
+ # new_target_quat = None
+ # while new_target_quat is None or rotation_distance(
+ # new_target_quat, old_target_quat) > np.pi / 2.:
+ # theta = np.random.uniform(0, np.pi * 2)
+ # new_target_quat = rot_to_quat(theta, np.array([0, 0, 1]))
+ #
+ # self.sim.model.body_pos[3] = new_target_pos
+ # self.sim.model.body_quat[3] = new_target_quat
+ # self.sim.forward()
+
+ try:
+ self.do_simulation(resultant_action, self.frame_skip)
+ except mujoco_py.MujocoException as e:
+ print(e)
+ invalid_flag = True
+
+ # record the trajectory
+ # if self.time_steps % (10//self.frame_skip) == 0:
+ # self.trajectory_recorder.record()
+
+ box_pos = self.sim.data.get_body_xpos("box_0")
+ box_quat = self.sim.data.get_body_xquat("box_0")
+ target_pos = self.sim.data.get_body_xpos("replan_target_pos")
+ target_quat = self.sim.data.get_body_xquat("replan_target_pos")
+ rod_tip_pos = self.sim.data.site_xpos[self.rod_tip_site_id].copy()
+ rod_quat = self.sim.data.get_body_xquat("push_rod")
+ qpos = self.sim.data.qpos[:7].copy()
+ qvel = self.sim.data.qvel[:7].copy()
+
+
+ episode_end = False
+ self.episode_energy += np.sum(np.square(action))
+ if self.time_steps >= 100 - 1:
+ episode_end = True
+ done = True
+ self.episode_end_pos_dist = np.linalg.norm(box_pos - target_pos)
+ self.episode_end_rot_dist = rotation_distance(
+ box_quat, target_quat)
+
+ if self.reward_type == "Dense":
+ reward = self.reward.step_reward(box_pos, box_quat, target_pos,
+ target_quat, rod_tip_pos, rod_quat,
+ qpos, qvel, action)
+ elif self.reward_type == "Sparse1":
+ reward = self.reward.sparse1_reward(episode_end, box_pos, box_quat,
+ target_pos, target_quat,
+ rod_tip_pos, rod_quat, qpos, qvel,
+ action)
+ elif self.reward_type == "Sparse2":
+ reward = self.reward.sparse2_reward(episode_end, box_pos, box_quat,
+ target_pos, target_quat, rod_tip_pos,
+ rod_quat, qpos, qvel, action)
+ else:
+ raise NotImplementedError("Unknown reward type: {}".format(
+ self.reward_type))
+
+ if invalid_flag:
+ reward = -25
+
+ obs = self._get_obs()
+ infos = {
+ 'episode_end':
+ episode_end,
+ 'box_goal_pos_dist':
+ self.episode_end_pos_dist,
+ 'box_goal_rot_dist':
+ self.episode_end_rot_dist,
+ 'episode_energy':
+ self.episode_energy if episode_end else 0.,
+ 'is_success':
+ True if episode_end and self.episode_end_pos_dist < 0.05
+ and self.episode_end_rot_dist < 0.5 else False,
+ 'num_steps':
+ self.time_steps
+ }
+ return obs, reward, done, infos
+
+ def getJacobian(self, body_id="tcp", q=np.zeros(7)):
+
+ self.sim.data.qpos[:7] = q
+ self.sim.forward()
+ jacp = self.sim.data.get_body_jacp(body_id).reshape(3, -1)[:, :7]
+ jacr = self.sim.data.get_body_jacr(body_id).reshape(3, -1)[:, :7]
+ jac = np.concatenate((jacp, jacr), axis=0)
+ return jac
+
+ def findJointPosition(self, desiredPos=None, desiredQuat=None):
+
+ eps = 1e-5
+ IT_MAX = 1000
+ DT = 1e-3
+
+ i = 0
+ self.pgain = [
+ 33.9403713446798,
+ 30.9403713446798,
+ 33.9403713446798,
+ 27.69370238555632,
+ 33.98706171459314,
+ 30.9185531893281,
+ ]
+ self.pgain_null = 5 * np.array([
+ 7.675519770796831,
+ 2.676935478437176,
+ 8.539040163444975,
+ 1.270446361314313,
+ 8.87752182480855,
+ 2.186782233762969,
+ 4.414432577659688,
+ ])
+ self.pgain_limit = 20
+
+ q = self.sim.data.qpos[:7].copy()
+ qd_d = np.zeros(q.shape)
+ oldErrNorm = np.inf
+
+ # if (desiredPos is None):
+ # desiredPos = self.desiredTaskPosition[:3]
+ #
+ # if (desiredQuat is None):
+ # desiredQuat = self.desiredTaskPosition[3:]
+
+ while True:
+ oldQ = q
+ q = q + DT * qd_d
+
+ q = np.clip(q, q_min, q_max)
+
+ # cartPos, orient = self.sim.data.site_xpos[self.rod_tip_site_id]
+ cartPos = self.sim.data.get_body_xpos("tcp")
+ orient = self.sim.data.get_body_xquat("tcp")
+ cpos_err = desiredPos - cartPos
+
+ if np.linalg.norm(orient -
+ desiredQuat) > np.linalg.norm(orient +
+ desiredQuat):
+ orient = -orient
+
+ cpos_err = np.clip(cpos_err, -0.1, 0.1)
+ cquat_err = np.clip(
+ get_quaternion_error(orient, desiredQuat),
+ -0.5,
+ 0.5,
+ )
+ err = np.hstack((cpos_err, cquat_err))
+
+ errNorm = np.sum(cpos_err**2) + np.sum((orient - desiredQuat)**2)
+
+ if errNorm > oldErrNorm:
+ q = oldQ
+ DT = DT * 0.7
+ continue
+ else:
+ DT = DT * 1.025
+
+ if errNorm < eps:
+ success = True
+ break
+ if i >= IT_MAX:
+ success = False
+ break
+
+ # if not i % 1:
+ #print('%d: error = %s, %s, %s' % (i, errNorm, oldErrNorm, DT))
+
+ oldErrNorm = errNorm
+
+ J = self.getJacobian(q=q)
+
+ Jw = J.dot(self.W)
+
+ # J * W * J' + reg * I
+ JwJ_reg = Jw.dot(J.T) + self.J_reg * np.eye(J.shape[0])
+
+ # Null space movement
+ qd_null = self.pgain_null * (self.target_th_null - q)
+
+ margin_to_limit = 0.1
+ qd_null_limit = np.zeros(qd_null.shape)
+ qd_null_limit_max = self.pgain_limit * (q_max - margin_to_limit -
+ q)
+ qd_null_limit_min = self.pgain_limit * (q_min + margin_to_limit -
+ q)
+ qd_null_limit[q > q_max - margin_to_limit] += qd_null_limit_max[
+ q > q_max - margin_to_limit]
+ qd_null_limit[q < q_min + margin_to_limit] += qd_null_limit_min[
+ q < q_min + margin_to_limit]
+
+ qd_null += qd_null_limit
+ # W J.T (J W J' + reg I)^-1 xd_d + (I - W J.T (J W J' + reg I)^-1 J qd_null
+ qd_d = np.linalg.solve(JwJ_reg, self.pgain * err - J.dot(qd_null))
+ # qd_d = self.pgain * err
+ qd_d = self.W.dot(J.transpose()).dot(qd_d) + qd_null
+
+ i += 1
+
+ # print("Final IK error (%d iterations): %s" % (i, errNorm))
+ return q
+
+
+if __name__ == "__main__":
+ env = BOX_PUSH_Env_Gym(enable_gravity_comp=True, frame_skip=10)
+ env.reset()
+ for j in range(60):
+ old_obs = env.reset()
+ done = False
+ for _ in range(100):
+ env.render(mode='human')
+ # action = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
+ # action = np.array([0.0] * 7)
+ action = env.action_space.sample()
+ obs, reward, done, info = env.step(action)
+ qpos = env.sim.data.qpos
+ # qpos[:7] = [2.8973, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973]
+ # qpos[:7] = [-2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973]
+ # qvel = env.sim.data.qvel
+ # qvel[:7] = [0, 0, 0, 0, 0, 0, 0]
+ # env.set_state(qpos, qvel)
+ # print("diff between old and new obs: ", np.linalg.norm(obs - old_obs))
+ old_obs = obs
+ print("========================================")
diff --git a/fancy_gym/envs/mujoco/box_pushing/box_push_utils.py b/fancy_gym/envs/mujoco/box_pushing/box_push_utils.py
new file mode 100644
index 0000000..e543607
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/box_push_utils.py
@@ -0,0 +1,342 @@
+import matplotlib.pyplot as plt
+
+import numpy as np
+import pandas as pd
+
+from gym.envs.mujoco.mujoco_env import MujocoEnv
+
+from scipy.interpolate import make_interp_spline
+
+# joint constraints for Franka robot
+q_max = np.array([2.8973, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973])
+q_min = np.array(
+ [-2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973])
+
+q_dot_max = np.array([2.1750, 2.1750, 2.1750, 2.1750, 2.6100, 2.6100, 2.6100])
+q_dot_dot_max = np.array([90., 90., 90., 90., 12., 12., 12.])
+
+desired_rod_quat = np.array([0.0, 1.0, 0.0, 0.0])
+
+
+def get_quaternion_error(curr_quat, des_quat):
+ """
+ Calculates the difference between the current quaternion and the desired quaternion.
+ See Siciliano textbook page 140 Eq 3.91
+
+ :param curr_quat: current quaternion
+ :param des_quat: desired quaternion
+ :return: difference between current quaternion and desired quaternion
+ """
+ quatError = np.zeros((3, ))
+
+ quatError[0] = (curr_quat[0] * des_quat[1] - des_quat[0] * curr_quat[1] -
+ curr_quat[3] * des_quat[2] + curr_quat[2] * des_quat[3])
+
+ quatError[1] = (curr_quat[0] * des_quat[2] - des_quat[0] * curr_quat[2] +
+ curr_quat[3] * des_quat[1] - curr_quat[1] * des_quat[3])
+
+ quatError[2] = (curr_quat[0] * des_quat[3] - des_quat[0] * curr_quat[3] -
+ curr_quat[2] * des_quat[1] + curr_quat[1] * des_quat[2])
+
+ return quatError
+
+
+def rotation_distance(p: np.array, q: np.array):
+ """
+ p: quaternion
+ q: quaternion
+ theta: rotation angle between p and q (rad)
+ """
+ assert p.shape == q.shape, "p and q should be quaternion"
+ product = p[0] * q[0] + p[1] * q[1] + p[2] * q[2] + p[3] * q[3]
+ theta = 2 * np.arccos(abs(product))
+ return theta
+
+
+def joint_limit_violate_penalty(joint_pos,
+ joint_vel,
+ enable_pos_limit=False,
+ enable_vel_limit=False):
+ penalty = 0.
+ p_coeff = 1.
+ v_coeff = 1.
+ # q_limit
+ if enable_pos_limit:
+ higher_indice = np.where(joint_pos > q_max)
+ lower_indice = np.where(joint_pos < q_min)
+ higher_error = joint_pos - q_max
+ lower_error = q_min - joint_pos
+ penalty -= p_coeff * (abs(np.sum(higher_error[higher_indice])) +
+ abs(np.sum(lower_error[lower_indice])))
+ # q_dot_limit
+ if enable_vel_limit:
+ q_dot_error = abs(joint_vel) - abs(q_dot_max)
+ q_dot_violate_idx = np.where(q_dot_error > 0.)
+ penalty -= v_coeff * abs(np.sum(q_dot_error[q_dot_violate_idx]))
+ return penalty
+
+
+def rot_to_quat(theta, axis):
+ quant = np.zeros(4)
+ quant[0] = np.sin(theta / 2.)
+ quant[1] = np.cos(theta / 2.) * axis[0]
+ quant[2] = np.cos(theta / 2.) * axis[1]
+ quant[3] = np.cos(theta / 2.) * axis[2]
+ return quant
+
+
+class PushBoxReward:
+ def __init__(self):
+ self.box_size = np.array([0.05, 0.05, 0.045])
+ self.step_reward_joint_penalty = []
+ self.step_reward_tcp = []
+ self.step_reward_pos = []
+ self.step_reward_rot = []
+ self.step_reward_flip = []
+ self.energy_cost = []
+
+ def get_reward_trajectory(self):
+ return np.array(self.step_reward_pos.copy()), np.array(
+ self.step_reward_rot.copy())
+
+ def reset(self):
+ self.step_reward_pos = []
+ self.step_reward_rot = []
+ self.step_reward_tcp = []
+ self.step_reward_joint_penalty = []
+ self.step_reward_flip = []
+ self.energy_cost = []
+
+ def step_reward(self, box_pos, box_quat, target_pos, target_quat,
+ rod_tip_pos, rod_quat, qpos, qvel, action):
+
+ joint_penalty = joint_limit_violate_penalty(qpos,
+ qvel,
+ enable_pos_limit=True,
+ enable_vel_limit=True)
+ tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100)
+ box_goal_pos_dist_reward = -3.5 * np.linalg.norm(box_pos - target_pos)
+ box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi
+ energy_cost = -0.0005 * np.sum(np.square(action))
+
+ reward = joint_penalty + tcp_box_dist_reward + \
+ box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost
+
+ rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat)
+ if rod_inclined_angle > np.pi / 4:
+ reward -= rod_inclined_angle / (np.pi)
+
+ # self.step_reward_joint_penalty.append(joint_penalty)
+ # self.step_reward_tcp.append(tcp_box_dist_reward)
+ # self.step_reward_pos.append(box_goal_pos_dist_reward)
+ # self.step_reward_rot.append(box_goal_rot_dist_reward)
+ # self.energy_cost.append(energy_cost)
+ return reward
+
+ def sparse1_reward(self, episodic_end, box_pos, box_quat, target_pos,
+ target_quat, rod_tip_pos, rod_quat, qpos, qvel, action):
+
+ reward = 0.
+ joint_penalty = joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True)
+ energy_cost = -0.0005 * np.sum(np.square(action))
+ tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100)
+ reward += joint_penalty + tcp_box_dist_reward + energy_cost
+ rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat)
+
+ if rod_inclined_angle > np.pi / 4:
+ reward -= rod_inclined_angle / (np.pi)
+
+ if not episodic_end:
+ return reward
+
+ box_goal_dist = np.linalg.norm(box_pos - target_pos)
+
+ box_goal_pos_dist_reward = -3.5 * box_goal_dist * 100
+ box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi * 100
+
+ reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward
+
+ return reward
+
+ def sparse2_reward(self, episodic_end, box_pos, box_quat, target_pos,
+ target_quat, rod_tip_pos, rod_quat, qpos, qvel, action):
+
+ reward = 0.
+ joint_penalty = joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True)
+ energy_cost = -0.0005 * np.sum(np.square(action))
+ tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100)
+ reward += joint_penalty + tcp_box_dist_reward + energy_cost
+ rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat)
+
+ if rod_inclined_angle > np.pi / 4:
+ reward -= rod_inclined_angle / (np.pi)
+
+ if not episodic_end:
+ return reward
+
+ box_goal_dist = np.linalg.norm(box_pos - target_pos)
+
+ if box_goal_dist < 0.1:
+ reward += 300
+ box_goal_pos_dist_reward = np.clip(- 3.5 * box_goal_dist * 100 * 3, -100, 0)
+ box_goal_rot_dist_reward = np.clip(- rotation_distance(box_quat, target_quat)/np.pi * 100 * 1.5, -100, 0)
+ reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward
+
+ return reward
+
+ def plotRewards(self):
+ length = np.array(self.step_reward_pos).shape[0]
+ t = np.arange(0, length)
+ fig, axs = plt.subplots(1, 1)
+ axs.plot(t, self.step_reward_pos, label='pos')
+ axs.plot(t, self.step_reward_rot, label='rot')
+ axs.plot(t, self.step_reward_tcp, label='tcp')
+ axs.plot(t,
+ np.array(self.step_reward_joint_penalty) / 0.5,
+ label='joint_penalty')
+ # axs.plot(t, self.step_reward_flip, label='flip')
+ axs.plot(t, self.energy_cost, label='energy_cost')
+ plt.legend(loc='upper right')
+ plt.xlabel('Steps')
+ plt.title('Reward')
+ plt.show()
+
+class TrajectoryRecoder(object):
+ def __init__(self, sim, prefix="episodic", max_length=1000):
+ self.sim = sim
+ self.prefix = prefix
+ self.max_length = max_length
+ self.reset()
+
+ def update_sim(self, sim):
+ self.sim = sim
+
+ def reset(self):
+ self.trajectory = []
+ self.box_pos_dist_trajectory = [] # trajectory of box position distance to goal
+ self.box_rot_dist_trajectory = [] # trajectory of box rotation distance to goal
+ self.joint_pos_trajectory = [] # trajectory of joint position
+ self.joint_vel_trajectory = [] # trajectory of joint velocity
+ self.joint_torque_trajectory = [] # trajectory of joint torque
+ self.length = 0
+
+ def record(self):
+ if self.sim is None:
+ return
+
+ self.joint_pos_trajectory.append(self.sim.data.qpos[:7].copy())
+ self.joint_vel_trajectory.append(self.sim.data.qvel[:7].copy())
+ self.joint_torque_trajectory.append(self.sim.data.qfrc_actuator[:7].copy())
+
+ box_pos = self.sim.data.get_body_xpos("box_0")
+ box_quat = self.sim.data.get_body_xquat("box_0")
+ target_pos = self.sim.data.get_body_xpos("replan_target_pos")
+ target_quat = self.sim.data.get_body_xquat("replan_target_pos")
+
+ self.box_pos_dist_trajectory.append(np.linalg.norm(box_pos - target_pos))
+ self.box_rot_dist_trajectory.append(rotation_distance(box_quat, target_quat))
+
+ self.length += 1
+ if self.length > self.max_length:
+ self.joint_vel_trajectory.pop(0)
+ self.joint_pos_trajectory.pop(0)
+ self.joint_torque_trajectory.pop(0)
+ self.box_pos_dist_trajectory.pop(0)
+ self.box_rot_dist_trajectory.pop(0)
+ self.length -= 1
+
+ def get_trajectory(self):
+ return self.trajectory
+
+ def get_length(self):
+ return self.length
+
+ def plot_trajectories(self):
+ self.plot_trajectory(self.joint_pos_trajectory,
+ "joint_pos_trajectory")
+ self.plot_trajectory(self.joint_vel_trajectory,
+ "joint_vel_trajectory")
+ self.plot_trajectory(self.joint_torque_trajectory,
+ "joint_acc_trajectory")
+ self.plot_trajectory(self.box_pos_dist_trajectory,
+ "box_pos_dist_trajectory")
+ self.plot_trajectory(self.box_rot_dist_trajectory,
+ "box_rot_dist_trajectory")
+
+ def plot_trajectory(self, trajectory, title: str):
+ if len(trajectory) == 0:
+ return
+ trajectory = np.array(trajectory)
+ length = trajectory.shape[0]
+ t = np.arange(0, length)
+ dim = trajectory.shape[1] if len(trajectory.shape) > 1 else 1
+ fig, axs = plt.subplots(dim, 1, sharex=True)
+ if dim == 1:
+ axs.plot(t, trajectory)
+ else:
+ for i in range(dim):
+ axs[i].plot(t, trajectory[:, i])
+ # plt.legend(loc='upper right')
+ plt.xlabel('Steps')
+ plt.title(self.prefix + ": " + title)
+ plt.show()
+
+ def plot_box_trajectory(self):
+ pass
+
+ def save_trajectory(self, path: str):
+
+ joint_pos_trajectory = np.array(self.joint_pos_trajectory)
+ joint_vel_trajectory = np.array(self.joint_vel_trajectory)
+ joint_torque_trajectory = np.array(self.joint_torque_trajectory)
+ box_pos_dist_trajectory = np.array(self.box_pos_dist_trajectory)
+ box_rot_dist_trajectory = np.array(self.box_rot_dist_trajectory)
+ pd_dict = {}
+ if joint_pos_trajectory.shape[0] > 0:
+ for i in range(7):
+ pd_dict["qpos_" + str(i)] = joint_pos_trajectory[:, i]
+ pd_dict["qvel_" + str(i)] = joint_vel_trajectory[:, i]
+ pd_dict["qfrc_" + str(i)] = joint_torque_trajectory[:, i]
+ pd_dict["box_pos_dist"] = box_pos_dist_trajectory
+ pd_dict["box_rot_dist"] = box_rot_dist_trajectory
+
+ df = pd.DataFrame(pd_dict)
+ folder_path = "/home/i53/student/hongyi_zhou/py_ws/alr_envs/alr_envs/alr/mujoco/box_pushing/recorded_trajectory/"
+ save_path = folder_path + path + ".csv"
+ df.to_csv(save_path)
+
+def plot_trajectories(traj_dict:dict, traj_name:str):
+
+ fig, axs = plt.subplots(1, 1, sharex=True)
+
+ for key in traj_dict.keys():
+ t = traj_dict[key][traj_name].shape[0]
+ t = np.arange(0, t)
+ spline = make_interp_spline(t, traj_dict[key][traj_name])
+ t_ = np.linspace(t.min(), t.max(), 1000)
+ y_ = spline(t_)
+ axs.plot(t_, y_, label=key)
+
+ plt.legend(loc='upper right')
+ plt.title(traj_name)
+ plt.xlabel('Steps')
+ plt.show()
+
+
+if __name__ == "__main__":
+
+ ## sparse1 nodistcheck sparse2 withdistcheck
+ folder_path = "/home/i53/student/hongyi_zhou/py_ws/alr_envs/alr_envs/alr/mujoco/box_pushing/recorded_trajectory/"
+ exp_name = ["ppo_dense", "promp_dense_reward", "promp_sparse_nodistcheck", "promp_sparse_withdistcheck"]
+ # exp_name = ["promp_sparse_nodistcheck"]
+ trajectory_dict = { }
+ for name in exp_name:
+ data_path = folder_path + name + ".csv"
+ df = pd.read_csv(data_path)
+ trajectory_dict[name] = df
+
+ # plot_trajectories(trajectory_dict, "box_pos_dist")
+ # plot_trajectories(trajectory_dict, "box_rot_dist")
+ plot_trajectories(trajectory_dict, "qpos_6")
+ plot_trajectories(trajectory_dict, "qvel_6")
+ plot_trajectories(trajectory_dict, "qfrc_6")
diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py
new file mode 100644
index 0000000..73a2412
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py
@@ -0,0 +1,248 @@
+import os
+
+import numpy as np
+from gym import utils, spaces
+from gym.envs.mujoco import MujocoEnv
+from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import rot_to_quat, get_quaternion_error, rotation_distance
+from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import q_max, q_min, q_dot_max, q_torque_max
+from fancy_gym.envs.mujoco.box_pushing.box_pushing_utils import BoxPushingReward
+
+import mujoco
+
+MAX_EPISODE_STEPS_BOX_PUSHING = 1000
+
+BOX_POS_BOUND = np.array([[0.3, -0.45, -0.01], [0.6, 0.45, -0.01]])
+
+class BoxPushingEnv(MujocoEnv, utils.EzPickle):
+ """
+ franka box pushing environment
+ action space:
+ normalized joints torque * 7 , range [-1, 1]
+ observation space:
+
+ rewards:
+ 1. dense reward
+ 2. time-depend sparse reward
+ 3. time-spatial-depend sparse reward
+ """
+
+ def __init__(self, reward_type: str = "dense", frame_skip: int = 10):
+ utils.EzPickle.__init__(**locals())
+ self._steps = 0
+ self.init_qpos_box_pushing = np.array([0., 0., 0., -1.5, 0., 1.5, 0., 0., 0., 0.6, 0.45, 0.0, 1., 0., 0., 0.])
+ self.init_qvel_box_pushing = np.zeros(15)
+ assert reward_type in ["dense", "temporal_sparse", "temporal_spatial_sparse"], "unrecognized reward type"
+ self.reward = BoxPushingReward(reward_type, q_max, q_min, q_dot_max)
+ MujocoEnv.__init__(self,
+ model_path=os.path.join(os.path.dirname(__file__), "assets", "box_pushing.xml"),
+ frame_skip=frame_skip,
+ mujoco_bindings="mujoco")
+ self.action_space = spaces.Box(low=-1, high=1, shape=(7,))
+
+ def step(self, action):
+ episode_end = False
+ action = 10 * np.clip(action, self.action_space.low, self.action_space.high)
+ resultant_action = np.clip(action + self.data.qfrc_bias[:7].copy(), -q_torque_max, q_torque_max)
+
+ unstable_simulation = False
+
+ try:
+ self.do_simulation(resultant_action, self.frame_skip)
+ except Exception as e:
+ print(e)
+ unstable_simulation = True
+
+ self._steps += 1
+ if self._steps >= MAX_EPISODE_STEPS_BOX_PUSHING:
+ episode_end = True
+
+ box_pos = self.data.body("box_0").xpos.copy()
+ box_quat = self.data.body("box_0").xquat.copy()
+ target_pos = self.data.body("replan_target_pos").xpos.copy()
+ target_quat = self.data.body("replan_target_pos").xquat.copy()
+ rod_tip_pos = self.data.site("rod_tip").xpos.copy()
+ rod_quat = self.data.body("push_rod").xquat.copy()
+ qpos = self.data.qpos[:7].copy()
+ qvel = self.data.qvel[:7].copy()
+
+ if not unstable_simulation:
+ reward = self.reward.get_reward(episode_end, box_pos, box_quat, target_pos, target_quat,
+ rod_tip_pos, rod_quat, qpos, qvel, action)
+ else:
+ reward = -50
+
+ obs = self._get_obs()
+ infos = dict()
+ return obs, reward, episode_end, infos
+
+ def reset_model(self):
+ # rest box to initial position
+ self.set_state(self.init_qpos_box_pushing, self.init_qvel_box_pushing)
+ box_init_pos = np.array([0.4, 0.3, -0.01, 0.0, 0.0, 0.0, 1.0])
+ self.data.body("box_0").xpos = box_init_pos[:3]
+ self.data.body("box_0").xquat = box_init_pos[3:]
+
+ # set target position
+ box_target_pos = self.sample_context()
+ self.model.body_pos[2] = box_target_pos[:3]
+ self.model.body_quat[2] = box_target_pos[-4:]
+ self.model.body_pos[3] = box_target_pos[:3]
+ self.model.body_quat[3] = box_target_pos[-4:]
+
+ # set the robot to the right configuration (rod tip in the box)
+ desired_tcp_pos = box_target_pos[:3] + np.array([0.0, 0.0, 0.15])
+ desired_tcp_quat = np.array([0, 1, 0, 0])
+ desired_joint_pos = self.calculateOfflineIK(desired_tcp_pos, desired_tcp_quat)
+ self.data.qpos[:7] = desired_joint_pos
+
+ mujoco.mj_forward(self.model, self.data)
+ self._steps = 0
+
+ return self._get_obs()
+
+ def sample_context(self):
+ pos = np.random.uniform(low=BOX_POS_BOUND[0], high=BOX_POS_BOUND[1], size=3)
+ theta = np.random.uniform(low=0, high=np.pi * 2)
+ quat = rot_to_quat(theta, np.array([0, 0, 1]))
+ return np.concatenate([pos, quat])
+
+ def _get_obs(self):
+ obs = np.concatenate([
+ self.data.qpos[:7].copy(), # joint position
+ self.data.qvel[:7].copy(), # joint velocity
+ self.data.qfrc_bias[:7].copy(), # joint gravity compensation
+ self.data.site("rod_tip").xpos.copy(), # position of rod tip
+ self.data.body("push_rod").xquat.copy(), # orientation of rod
+ self.data.body("box_0").xpos.copy(), # position of box
+ self.data.body("box_0").xquat.copy(), # orientation of box
+ self.data.body("replan_target_pos").xpos.copy(), # position of target
+ self.data.body("replan_target_pos").xquat.copy() # orientation of target
+ ])
+ return obs
+
+ def get_body_jacp(self, name):
+ id = mujoco.mj_name2id(self.model, 1, name)
+ jacp = np.zeros((3, self.model.nv))
+ mujoco.mj_jacBody(self.model, self.data, jacp, None, id)
+ return jacp
+
+ def get_body_jacr(self, name):
+ id = mujoco.mj_name2id(self.model, 1, name)
+ jacr = np.zeros((3, self.model.nv))
+ mujoco.mj_jacBody(self.model, self.data, None, jacr, id)
+ return jacr
+
+ def calculateOfflineIK(self, desired_cart_pos, desired_cart_quat):
+ """
+ calculate offline inverse kinematics for franka pandas
+ :param desired_cart_pos: desired cartesian position of tool center point
+ :param desired_cart_quat: desired cartesian quaternion of tool center point
+ :return: joint angles
+ """
+ J_reg = 1e-6
+ w = np.diag([1, 1, 1, 1, 1, 1, 1])
+ target_theta_null = np.array([
+ 3.57795216e-09,
+ 1.74532920e-01,
+ 3.30500960e-08,
+ -8.72664630e-01,
+ -1.14096181e-07,
+ 1.22173047e00,
+ 7.85398126e-01])
+ eps = 1e-5 # threshold for convergence
+ IT_MAX = 1000
+ dt = 1e-3
+ i = 0
+ pgain = [
+ 33.9403713446798,
+ 30.9403713446798,
+ 33.9403713446798,
+ 27.69370238555632,
+ 33.98706171459314,
+ 30.9185531893281,
+ ]
+ pgain_null = 5 * np.array([
+ 7.675519770796831,
+ 2.676935478437176,
+ 8.539040163444975,
+ 1.270446361314313,
+ 8.87752182480855,
+ 2.186782233762969,
+ 4.414432577659688,
+ ])
+ pgain_limit = 20
+ q = self.data.qpos[:7].copy()
+ qd_d = np.zeros(q.shape)
+ old_err_norm = np.inf
+
+ while True:
+ q_old = q
+ q = q + dt * qd_d
+ q = np.clip(q, q_min, q_max)
+ current_cart_pos = self.data.body("tcp").xpos.copy()
+ current_cart_quat = self.data.body("tcp").xquat.copy()
+
+ cart_pos_error = np.clip(desired_cart_pos - current_cart_pos, -0.1, 0.1)
+
+ if np.linalg.norm(current_cart_quat - desired_cart_quat) > np.linalg.norm(current_cart_quat + desired_cart_quat):
+ current_cart_quat = -current_cart_quat
+ cart_quat_error = np.clip(get_quaternion_error(current_cart_quat, desired_cart_quat), -0.5, 0.5)
+
+ err = np.hstack((cart_pos_error, cart_quat_error))
+ err_norm = np.sum(cart_pos_error**2) + np.sum((current_cart_quat - desired_cart_quat)**2)
+ print("err_norm: {}, old_err_norm: {}".format(err_norm, old_err_norm))
+ if err_norm > old_err_norm:
+ q = q_old
+ dt = 0.7 * dt
+ continue
+ else:
+ dt = 1.025 * dt
+
+ if err_norm < eps:
+ break
+ if i > IT_MAX:
+ break
+
+ old_err_norm = err_norm
+
+ ### get Jacobian by mujoco
+ self.data.qpos[:7] = q
+ mujoco.mj_forward(self.model, self.data)
+ jacp = self.get_body_jacp("tcp")[:, :7].copy()
+ jacr = self.get_body_jacr("tcp")[:, :7].copy()
+ J = np.concatenate((jacp, jacr), axis=0)
+
+ Jw = J.dot(w)
+
+ # J * W * J.T + J_reg * I
+ JwJ_reg = Jw.dot(J.T) + J_reg * np.eye(J.shape[0])
+
+ # Null space velocity, points to home position
+ qd_null = pgain_null * (target_theta_null - q)
+
+ margin_to_limit = 0.1
+ qd_null_limit = np.zeros(qd_null.shape)
+ qd_null_limit_max = pgain_limit * (q_max - margin_to_limit - q)
+ qd_null_limit_min = pgain_limit * (q_min + margin_to_limit - q)
+ qd_null_limit[q > q_max - margin_to_limit] += qd_null_limit_max[q > q_max - margin_to_limit]
+ qd_null_limit[q < q_min + margin_to_limit] += qd_null_limit_min[q < q_min + margin_to_limit]
+ qd_null += qd_null_limit
+
+ # W J.T (J W J' + reg I)^-1 xd_d + (I - W J.T (J W J' + reg I)^-1 J qd_null
+ qd_d = np.linalg.solve(JwJ_reg, pgain * err - J.dot(qd_null))
+
+ qd_d = w.dot(J.transpose()).dot(qd_d) + qd_null
+
+ i += 1
+
+ return q
+
+if __name__=="__main__":
+ env = BoxPushingEnv(reward_type="dense", frame_skip=10)
+ env.reset()
+ for i in range(100):
+ env.reset()
+ for _ in range(100):
+ env.render("human")
+ action = env.action_space.sample()
+ obs, reward, done, info = env.step(action)
diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py
new file mode 100644
index 0000000..cfa15ff
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_utils.py
@@ -0,0 +1,193 @@
+import numpy as np
+
+
+# joint constraints for Franka robot
+q_max = np.array([2.8973, 1.7628, 2.8973, -0.0698, 2.8973, 3.7525, 2.8973])
+q_min = np.array([-2.8973, -1.7628, -2.8973, -3.0718, -2.8973, -0.0175, -2.8973])
+
+q_dot_max = np.array([2.1750, 2.1750, 2.1750, 2.1750, 2.6100, 2.6100, 2.6100])
+q_torque_max = np.array([90., 90., 90., 90., 12., 12., 12.])
+#
+desired_rod_quat = np.array([0.0, 1.0, 0.0, 0.0])
+
+def get_quaternion_error(curr_quat, des_quat):
+ """
+ Calculates the difference between the current quaternion and the desired quaternion.
+ See Siciliano textbook page 140 Eq 3.91
+
+ :param curr_quat: current quaternion
+ :param des_quat: desired quaternion
+ :return: difference between current quaternion and desired quaternion
+ """
+ quatError = np.zeros((3, ))
+
+ quatError[0] = (curr_quat[0] * des_quat[1] - des_quat[0] * curr_quat[1] -
+ curr_quat[3] * des_quat[2] + curr_quat[2] * des_quat[3])
+
+ quatError[1] = (curr_quat[0] * des_quat[2] - des_quat[0] * curr_quat[2] +
+ curr_quat[3] * des_quat[1] - curr_quat[1] * des_quat[3])
+
+ quatError[2] = (curr_quat[0] * des_quat[3] - des_quat[0] * curr_quat[3] -
+ curr_quat[2] * des_quat[1] + curr_quat[1] * des_quat[2])
+
+ return quatError
+
+
+def rotation_distance(p: np.array, q: np.array):
+ """
+ p: quaternion
+ q: quaternion
+ theta: rotation angle between p and q (rad)
+ """
+ assert p.shape == q.shape, "p and q should be quaternion"
+ product = p[0] * q[0] + p[1] * q[1] + p[2] * q[2] + p[3] * q[3]
+ theta = 2 * np.arccos(abs(product))
+ return theta
+
+
+def rot_to_quat(theta, axis):
+ quant = np.zeros(4)
+ quant[0] = np.sin(theta / 2.)
+ quant[1] = np.cos(theta / 2.) * axis[0]
+ quant[2] = np.cos(theta / 2.) * axis[1]
+ quant[3] = np.cos(theta / 2.) * axis[2]
+ return quant
+
+
+
+class RewardBase():
+ def __init__(self, q_max, q_min, q_dot_max):
+ self._reward = 0.
+ self._done = False
+ self._q_max = q_max
+ self._q_min = q_min
+ self._q_dot_max = q_dot_max
+
+ def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat,
+ rod_tip_pos, rod_quat, qpos, qvel, action):
+ raise NotImplementedError
+
+ def _joint_limit_violate_penalty(self,
+ qpos,
+ qvel,
+ enable_pos_limit=False,
+ enable_vel_limit=False):
+ penalty = 0.
+ p_coeff = 1.
+ v_coeff = 1.
+ # q_limit
+ if enable_pos_limit:
+ higher_indice = np.where(qpos > self._q_max)
+ lower_indice = np.where(qpos < self._q_min)
+ higher_error = qpos - self._q_max
+ lower_error = self._q_min - qpos
+ penalty -= p_coeff * (abs(np.sum(higher_error[higher_indice])) +
+ abs(np.sum(lower_error[lower_indice])))
+ # q_dot_limit
+ if enable_vel_limit:
+ q_dot_error = abs(qvel) - abs(self._q_dot_max)
+ q_dot_violate_idx = np.where(q_dot_error > 0.)
+ penalty -= v_coeff * abs(np.sum(q_dot_error[q_dot_violate_idx]))
+ return penalty
+
+
+
+
+class DenseReward(RewardBase):
+ def __init__(self, q_max, q_min, q_dot_max):
+ super(DenseReward, self).__init__(q_max, q_min, q_dot_max)
+
+ def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat,
+ rod_tip_pos, rod_quat, qpos, qvel, action):
+ joint_penalty = self._joint_limit_violate_penalty(qpos,
+ qvel,
+ enable_pos_limit=True,
+ enable_vel_limit=True)
+ tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100)
+ box_goal_pos_dist_reward = -3.5 * np.linalg.norm(box_pos - target_pos)
+ box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi
+ energy_cost = -0.0005 * np.sum(np.square(action))
+
+ reward = joint_penalty + tcp_box_dist_reward + \
+ box_goal_pos_dist_reward + box_goal_rot_dist_reward + energy_cost
+
+ rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat)
+ if rod_inclined_angle > np.pi / 4:
+ reward -= rod_inclined_angle / (np.pi)
+
+ return reward
+
+
+
+
+class TemporalSparseReward(RewardBase):
+ def __init__(self, q_max, q_min, q_dot_max):
+ super(TemporalSparseReward, self).__init__(q_max, q_min, q_dot_max)
+
+ def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat,
+ rod_tip_pos, rod_quat, qpos, qvel, action):
+ reward = 0.
+ joint_penalty = self._joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True)
+ energy_cost = -0.0005 * np.sum(np.square(action))
+ tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100)
+ reward += joint_penalty + tcp_box_dist_reward + energy_cost
+ rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat)
+
+ if rod_inclined_angle > np.pi / 4:
+ reward -= rod_inclined_angle / (np.pi)
+
+ if not episodic_end:
+ return reward
+
+ box_goal_dist = np.linalg.norm(box_pos - target_pos)
+
+ box_goal_pos_dist_reward = -3.5 * box_goal_dist * 100
+ box_goal_rot_dist_reward = -rotation_distance(box_quat, target_quat) / np.pi * 100
+
+ reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward
+
+ return reward
+
+
+
+
+class TemporalSpatialSparseReward(RewardBase):
+ def __init__(self, q_max, q_min, q_dot_max):
+ super(TemporalSpatialSparseReward, self).__init__(q_max, q_min, q_dot_max)
+
+ def get_reward(self, episodic_end, box_pos, box_quat, target_pos, target_quat,
+ rod_tip_pos, rod_quat, qpos, qvel, action):
+ reward = 0.
+ joint_penalty = self._joint_limit_violate_penalty(qpos, qvel, enable_pos_limit=True, enable_vel_limit=True)
+ energy_cost = -0.0005 * np.sum(np.square(action))
+ tcp_box_dist_reward = -2 * np.clip(np.linalg.norm(box_pos - rod_tip_pos), 0.05, 100)
+ reward += joint_penalty + tcp_box_dist_reward + energy_cost
+ rod_inclined_angle = rotation_distance(rod_quat, desired_rod_quat)
+
+ if rod_inclined_angle > np.pi / 4:
+ reward -= rod_inclined_angle / (np.pi)
+
+ if not episodic_end:
+ return reward
+
+ box_goal_dist = np.linalg.norm(box_pos - target_pos)
+
+ if box_goal_dist < 0.1:
+ reward += 300
+ box_goal_pos_dist_reward = np.clip(- 3.5 * box_goal_dist * 100 * 3, -100, 0)
+ box_goal_rot_dist_reward = np.clip(- rotation_distance(box_quat, target_quat)/np.pi * 100 * 1.5, -100, 0)
+ reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward
+
+ return reward
+
+
+
+def BoxPushingReward(reward_type, q_max, q_min, q_dot_max):
+ if reward_type == 'dense':
+ return DenseReward(q_max, q_min, q_dot_max)
+ elif reward_type == 'temporal_sparse':
+ return TemporalSparseReward(q_max, q_min, q_dot_max)
+ elif reward_type == 'temporal_spatial_sparse':
+ return TemporalSpatialSparseReward(q_max, q_min, q_dot_max)
+ else:
+ raise NotImplementedError
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box.xml
new file mode 100644
index 0000000..3e71094
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box_with_lid.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box_with_lid.xml
new file mode 100644
index 0000000..e4bc5d6
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/box_with_lid.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/cubic_with_sites.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/cubic_with_sites.xml
new file mode 100644
index 0000000..e88e02f
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/cubic_with_sites.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/goal.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/goal.xml
new file mode 100644
index 0000000..74bd7a9
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/goal.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/push_box.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/push_box.xml
new file mode 100644
index 0000000..25313f8
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/push_box.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/tray.xml b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/tray.xml
new file mode 100644
index 0000000..cca0617
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/objects/assets/tray.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_mocap_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_mocap_control.xml
new file mode 100644
index 0000000..a36c987
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_mocap_control.xml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_position_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_position_control.xml
new file mode 100644
index 0000000..c83939a
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_position_control.xml
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_torque_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_torque_control.xml
new file mode 100644
index 0000000..7b70a75
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_torque_control.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_velocity_control.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_velocity_control.xml
new file mode 100644
index 0000000..4fda9de
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/controller/panda_velocity_control.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/depr/panda_gym.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/depr/panda_gym.xml
new file mode 100644
index 0000000..27f3415
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/depr/panda_gym.xml
@@ -0,0 +1,157 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/d435v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/d435v.stl
new file mode 100644
index 0000000..809a0da
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/d435v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/finger.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/finger.stl
new file mode 100644
index 0000000..3b87289
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/finger.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/fingerv.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/fingerv.stl
new file mode 100644
index 0000000..0b11382
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/fingerv.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/hand.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/hand.stl
new file mode 100644
index 0000000..4e82090
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/hand.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/handv.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/handv.stl
new file mode 100644
index 0000000..92f60bd
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/handv.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0.stl
new file mode 100644
index 0000000..def070c
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0v.stl
new file mode 100644
index 0000000..72dba55
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link0v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1.stl
new file mode 100644
index 0000000..426bcf2
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1v.stl
new file mode 100644
index 0000000..b42f97b
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link1v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2.stl
new file mode 100644
index 0000000..b369f15
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2v.stl
new file mode 100644
index 0000000..d72bbe4
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link2v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3.stl
new file mode 100644
index 0000000..25162ee
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3v.stl
new file mode 100644
index 0000000..904de9d
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link3v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4.stl
new file mode 100644
index 0000000..76c8c33
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4v.stl
new file mode 100644
index 0000000..da74ed6
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link4v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5.stl
new file mode 100644
index 0000000..3006a0b
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5v.stl
new file mode 100644
index 0000000..f795374
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link5v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6.stl
new file mode 100644
index 0000000..2e9594a
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6v.stl
new file mode 100644
index 0000000..8b2a7f3
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link6v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7.stl
new file mode 100644
index 0000000..0532d05
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7v.stl b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7v.stl
new file mode 100644
index 0000000..82b5946
Binary files /dev/null and b/fancy_gym/envs/mujoco/box_pushing/model/robots/meshes/panda/link7v.stl differ
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda.xml
new file mode 100644
index 0000000..8718e42
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda.xml
@@ -0,0 +1,155 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_bimanual.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_bimanual.xml
new file mode 100644
index 0000000..b970726
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_bimanual.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_mocap.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_mocap.xml
new file mode 100644
index 0000000..7de5c38
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_mocap.xml
@@ -0,0 +1,140 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_rod.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_rod.xml
new file mode 100644
index 0000000..0a3e433
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/panda_rod.xml
@@ -0,0 +1,159 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/robots/pandas_kit_lab.xml b/fancy_gym/envs/mujoco/box_pushing/model/robots/pandas_kit_lab.xml
new file mode 100644
index 0000000..b970726
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/robots/pandas_kit_lab.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/surroundings/assembled_scene.xml b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/assembled_scene.xml
new file mode 100644
index 0000000..832c53e
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/assembled_scene.xml
@@ -0,0 +1,42 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/surroundings/base.xml b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/base.xml
new file mode 100644
index 0000000..7a40fbd
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/base.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/surroundings/kit_lab_surrounding.xml b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/kit_lab_surrounding.xml
new file mode 100644
index 0000000..ca60a9c
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/surroundings/kit_lab_surrounding.xml
@@ -0,0 +1,118 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/workspace/large_workspace.xml b/fancy_gym/envs/mujoco/box_pushing/model/workspace/large_workspace.xml
new file mode 100644
index 0000000..a92eaf4
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/workspace/large_workspace.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/workspace/medium_workspace.xml b/fancy_gym/envs/mujoco/box_pushing/model/workspace/medium_workspace.xml
new file mode 100644
index 0000000..7c0bf13
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/workspace/medium_workspace.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/model/workspace/small_workspace.xml b/fancy_gym/envs/mujoco/box_pushing/model/workspace/small_workspace.xml
new file mode 100644
index 0000000..1a34694
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/model/workspace/small_workspace.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py
new file mode 100644
index 0000000..172005a
--- /dev/null
+++ b/fancy_gym/envs/mujoco/box_pushing/mp_wrapper.py
@@ -0,0 +1,32 @@
+from typing import Union, Tuple
+
+import numpy as np
+
+from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+
+ # Random x goal + random init pos
+ @property
+ def context_mask(self):
+ return np.hstack([
+ [False] * 7, # joints position
+ [False] * 7, # joints velocity
+ [False] * 7, # joints gravity compensation
+ [False] * 3, # position of rod tip
+ [False] * 4, # orientation of rod
+ [True] * 3, # position of box
+ [True] * 4, # orientation of box
+ [True] * 3, # position of target
+ [True] * 4, # orientation of target
+ [True] * 1, # time
+ ])
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.data.qpos[3:6].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.data.qvel[3:6].copy()
diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py
index da7c94d..f1c25e2 100644
--- a/fancy_gym/examples/examples_movement_primitives.py
+++ b/fancy_gym/examples/examples_movement_primitives.py
@@ -155,12 +155,12 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
if __name__ == '__main__':
- render = False
+ render = True
# DMP
- example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render)
+ # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render)
# ProMP
- example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
+ # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render)
# Altered basis functions
obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render)