diff --git a/alr_envs/mujoco/gym_table_tennis/__init__.py b/alr_envs/mujoco/gym_table_tennis/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/__init__.py
@@ -0,0 +1 @@
+
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/__init__.py b/alr_envs/mujoco/gym_table_tennis/envs/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/__init__.py
@@ -0,0 +1 @@
+
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml
new file mode 100644
index 0000000..7772d14
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml
new file mode 100644
index 0000000..a8df915
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml
@@ -0,0 +1,76 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml
new file mode 100644
index 0000000..011b95a
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml
@@ -0,0 +1,95 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml
new file mode 100644
index 0000000..ad1ae35
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml
new file mode 100644
index 0000000..eb2b347
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml
new file mode 100644
index 0000000..29a21e1
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml
@@ -0,0 +1,80 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl
new file mode 100644
index 0000000..133b112
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl
new file mode 100644
index 0000000..047e9df
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl
new file mode 100644
index 0000000..3b05c27
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl
new file mode 100644
index 0000000..5ff94a2
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl
new file mode 100644
index 0000000..c548448
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl
new file mode 100644
index 0000000..495160d
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..b4bb322
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..7b2f001
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..f05174e
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl
new file mode 100644
index 0000000..eb252d9
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl
new file mode 100644
index 0000000..0a986fa
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c039f0d
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..250acaf
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..993d0f7
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl
new file mode 100644
index 0000000..8448a3f
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl
new file mode 100644
index 0000000..b34963d
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl
new file mode 100644
index 0000000..f6a1515
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..e6aa6b6
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..667902e
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl
new file mode 100644
index 0000000..ed66bbb
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..aba957d
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..5cca6a9
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..3343e27
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl
new file mode 100644
index 0000000..ae505fd
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl
new file mode 100644
index 0000000..c36cfec
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl
new file mode 100644
index 0000000..dc633c4
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..82d0093
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..7fd5a55
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl
new file mode 100644
index 0000000..76353ae
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl
new file mode 100644
index 0000000..a0386f6
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl
new file mode 100644
index 0000000..f6b41ad
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c36f88f
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..d00cac1
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..34d1d8b
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl
new file mode 100644
index 0000000..13d2f73
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..06e857f
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..48e1bb1
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl
new file mode 100644
index 0000000..0d95239
Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl differ
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml
new file mode 100644
index 0000000..9abf102
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml
new file mode 100644
index 0000000..dfbc37a
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml
new file mode 100644
index 0000000..f2432bb
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py b/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py
new file mode 100644
index 0000000..625a122
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py
@@ -0,0 +1,233 @@
+import numpy as np
+from gym import spaces
+from gym.envs.robotics import robot_env, utils
+# import xml.etree.ElementTree as ET
+from alr_envs.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis
+# import glfw
+from alr_envs.mujoco.gym_table_tennis.utils.experiment import ball_initialize
+from pathlib import Path
+import os
+
+
+class TableTennisEnv(robot_env.RobotEnv):
+ """Class for Table Tennis environment.
+ """
+ def __init__(self, n_substeps=1,
+ model_path=None,
+ initial_qpos=None,
+ initial_ball_state=None,
+ config=None,
+ reward_obj=None
+ ):
+ """Initializes a new mujoco based Table Tennis environment.
+
+ Args:
+ model_path (string): path to the environments XML file
+ initial_qpos (dict): a dictionary of joint names and values that define the initial
+ n_actions: Number of joints
+ n_substeps (int): number of substeps the simulation runs on every call to step
+ scale (double): limit maximum change in position
+ initial_ball_state: to reset the ball state
+ """
+ # self.config = config.config
+ if model_path is None:
+ path_cws = Path.cwd()
+ print(path_cws)
+ current_dir = Path(os.path.split(os.path.realpath(__file__))[0])
+ table_tennis_env_xml_path = current_dir / "robotics"/"assets"/"table_tennis"/"table_tennis_env.xml"
+ model_path = str(table_tennis_env_xml_path)
+ self.config = config
+ action_space = self.config['trajectory']['args']['action_space']
+ time_step = self.config['mujoco_sim_env']['args']["time_step"]
+ if initial_qpos is None:
+ initial_qpos = self.config['robot_config']['args']['initial_qpos']
+
+ # TODO should read all configuration in config
+ assert initial_qpos is not None, "Must initialize the initial q position of robot arm"
+ n_actions = 7
+ self.initial_qpos_value = np.array(list(initial_qpos.values())).copy()
+ # # change time step in .xml file
+ # tree = ET.parse(model_path)
+ # root = tree.getroot()
+ # for option in root.findall('option'):
+ # option.set("timestep", str(time_step))
+ #
+ # tree.write(model_path)
+
+ super(TableTennisEnv, self).__init__(
+ model_path=model_path, n_substeps=n_substeps, n_actions=n_actions,
+ initial_qpos=initial_qpos)
+
+ if action_space:
+ self.action_space = spaces.Box(low=np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]),
+ high=np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]),
+ dtype='float64')
+ else:
+ self.action_space = spaces.Box(low=np.array([-np.inf] * 7),
+ high=np.array([-np.inf] * 7),
+ dtype='float64')
+ self.scale = None
+ self.desired_pos = None
+ self.n_actions = n_actions
+ self.action = None
+ self.time_step = time_step
+ self.paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center')
+ if reward_obj is None:
+ self.reward_obj = HierarchicalRewardTableTennis()
+ else:
+ self.reward_obj = reward_obj
+
+ if initial_ball_state is not None:
+ self.initial_ball_state = initial_ball_state
+ else:
+ self.initial_ball_state = ball_initialize(random=False)
+ self.target_ball_pos = self.sim.data.get_site_xpos("target_ball")
+ self.racket_center_pos = self.sim.data.get_site_xpos("wam/paddle_center")
+
+ def close(self):
+ if self.viewer is not None:
+ glfw.destroy_window(self.viewer.window)
+ # self.viewer.window.close()
+ self.viewer = None
+ self._viewers = {}
+
+ # GoalEnv methods
+ # ----------------------------
+ def compute_reward(self, achieved_goal, goal, info):
+ # reset the reward, if action valid
+ # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
+ # right_court_contact_detector = self.reward_obj.contact_detection(self, right_court_contact_obj)
+ # if right_court_contact_detector:
+ # print("can detect the table ball contact")
+ self.reward_obj.total_reward = 0
+ # Stage 1 Hitting
+ self.reward_obj.hitting(self)
+ # if not hitted, return the highest reward
+ if not self.reward_obj.goal_achievement:
+ return self.reward_obj.highest_reward
+ # # Stage 2 Right Table Contact
+ # self.reward_obj.right_table_contact(self)
+ # if not self.reward_obj.goal_achievement:
+ # return self.reward_obj.highest_reward
+ # # Stage 2 Net Contact
+ # self.reward_obj.net_contact(self)
+ # if not self.reward_obj.goal_achievement:
+ # return self.reward_obj.highest_reward
+ # Stage 3 Opponent court Contact
+ # self.reward_obj.landing_on_opponent_court(self)
+ # if not self.reward_obj.goal_achievement:
+ # print("self.reward_obj.highest_reward: ", self.reward_obj.highest_reward)
+ # TODO
+ self.reward_obj.target_achievement(self)
+ return self.reward_obj.highest_reward
+
+ def _reset_sim(self):
+ self.sim.set_state(self.initial_state)
+ [initial_x, initial_y, initial_z, v_x, v_y, v_z] = self.initial_ball_state
+ self.sim.data.set_joint_qpos('tar:x', initial_x)
+ self.sim.data.set_joint_qpos('tar:y', initial_y)
+ self.sim.data.set_joint_qpos('tar:z', initial_z)
+ self.energy_corrected = True
+ self.give_reflection_reward = False
+
+ # velocity is positive direction
+ self.sim.data.set_joint_qvel('tar:x', v_x)
+ self.sim.data.set_joint_qvel('tar:y', v_y)
+ self.sim.data.set_joint_qvel('tar:z', v_z)
+
+ # Apply gravity compensation
+ if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
+ self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
+ self.sim.forward()
+ return True
+
+ def _env_setup(self, initial_qpos):
+ for name, value in initial_qpos.items():
+ self.sim.data.set_joint_qpos(name, value)
+
+ # Apply gravity compensation
+ if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
+ self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
+ self.sim.forward()
+
+ # Get the target position
+ self.initial_paddle_center_xpos = self.sim.data.get_site_xpos('wam/paddle_center').copy()
+ self.initial_paddle_center_vel = None # self.sim.get_site_
+
+ def _sample_goal(self):
+ goal = self.initial_paddle_center_xpos[:3] + self.np_random.uniform(-0.2, 0.2, size=3)
+ return goal.copy()
+
+ def _get_obs(self):
+
+ # positions of racket center
+ paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center')
+ ball_pos = self.sim.data.get_site_xpos("target_ball")
+
+ dt = self.sim.nsubsteps * self.sim.model.opt.timestep
+ paddle_center_velp = self.sim.data.get_site_xvelp('wam/paddle_center') * dt
+ robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
+
+ wrist_state = robot_qpos[-3:]
+ wrist_vel = robot_qvel[-3:] * dt # change to a scalar if the gripper is made symmetric
+
+ # achieved_goal = paddle_body_EE_pos
+ obs = np.concatenate([
+ paddle_center_pos, paddle_center_velp, wrist_state, wrist_vel
+ ])
+
+ out_dict = {
+ 'observation': obs.copy(),
+ 'achieved_goal': paddle_center_pos.copy(),
+ 'desired_goal': self.goal.copy(),
+ 'q_pos': self.sim.data.qpos[:].copy(),
+ "ball_pos": ball_pos.copy(),
+ # "hitting_flag": self.reward_obj.hitting_flag
+ }
+
+ return out_dict
+
+ def _step_callback(self):
+ pass
+
+ def _set_action(self, action):
+ # Apply gravity compensation
+ if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
+ self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
+ # print("set action process running")
+ assert action.shape == (self.n_actions,)
+ self.action = action.copy() # ensure that we don't change the action outside of this scope
+ pos_ctrl = self.action[:] # limit maximum change in position
+ pos_ctrl = np.clip(pos_ctrl, self.action_space.low, self.action_space.high)
+
+ # get desired trajectory
+ self.sim.data.qpos[:7] = pos_ctrl
+ self.sim.forward()
+ self.desired_pos = self.sim.data.get_site_xpos('wam/paddle_center').copy()
+
+ self.sim.data.ctrl[:] = pos_ctrl
+
+ def _is_success(self, achieved_goal, desired_goal):
+ pass
+
+
+if __name__ == '__main__':
+ render_mode = "human" # "human" or "partial" or "final"
+ env = TableTennisEnv()
+ env.reset()
+ # env.render(mode=render_mode)
+
+ for i in range(200):
+ # objective.load_result("/tmp/cma")
+ # test with random actions
+ ac = 2 * env.action_space.sample()
+ # ac[0] += np.pi/2
+ obs, rew, d, info = env.step(ac)
+ env.render(mode=render_mode)
+
+ print(rew)
+
+ if d:
+ break
+
+ env.close()
diff --git a/alr_envs/mujoco/gym_table_tennis/utils/__init__.py b/alr_envs/mujoco/gym_table_tennis/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/mujoco/gym_table_tennis/utils/experiment.py b/alr_envs/mujoco/gym_table_tennis/utils/experiment.py
new file mode 100644
index 0000000..addd6c5
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/utils/experiment.py
@@ -0,0 +1,83 @@
+import numpy as np
+from gym.utils import seeding
+from alr_envs.mujoco.gym_table_tennis.utils.util import read_yaml, read_json
+from pathlib import Path
+
+
+def ball_initialize(random=False, scale=False, context_range=None, scale_value=None):
+ if random:
+ if scale:
+ # if scale_value is None:
+ scale_value = context_scale_initialize(context_range)
+ v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value
+ dx = 1
+ dy = 0
+ dz = 0.05
+ else:
+ seed = None
+ np_random, seed = seeding.np_random(seed)
+ dx = np_random.uniform(-0.1, 0.1)
+ dy = np_random.uniform(-0.1, 0.1)
+ dz = np_random.uniform(-0.1, 0.1)
+
+ v_x = np_random.uniform(1.7, 1.8)
+ v_y = np_random.uniform(0.7, 0.8)
+ v_z = np_random.uniform(0.1, 0.2)
+ # print(dx, dy, dz, v_x, v_y, v_z)
+ # else:
+ # dx = -0.1
+ # dy = 0.05
+ # dz = 0.05
+ # v_x = 1.5
+ # v_y = 0.7
+ # v_z = 0.06
+ # initial_x = -0.6 + dx
+ # initial_y = -0.3 + dy
+ # initial_z = 0.8 + dz
+ else:
+ if scale:
+ v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value
+ else:
+ v_x = 2.5
+ v_y = 2
+ v_z = 0.5
+ dx = 1
+ dy = 0
+ dz = 0.05
+
+ initial_x = 0 + dx
+ initial_y = -0.2 + dy
+ initial_z = 0.3 + dz
+ # print("initial ball state: ", initial_x, initial_y, initial_z, v_x, v_y, v_z)
+ initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
+ return initial_ball_state
+
+
+def context_scale_initialize(range):
+ """
+
+ Returns:
+
+ """
+ low, high = range
+ scale = np.random.uniform(low, high, 1)
+ return scale
+
+
+def config_handle_generation(config_file_path):
+ """Generate config handle for multiprocessing
+
+ Args:
+ config_file_path:
+
+ Returns:
+
+ """
+ cfg_fname = Path(config_file_path)
+ # .json and .yml file
+ if cfg_fname.suffix == ".json":
+ config = read_json(cfg_fname)
+ elif cfg_fname.suffix == ".yml":
+ config = read_yaml(cfg_fname)
+
+ return config
diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/__init__.py b/alr_envs/mujoco/gym_table_tennis/utils/rewards/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py b/alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py
new file mode 100644
index 0000000..fe69104
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py
@@ -0,0 +1,402 @@
+import numpy as np
+import logging
+
+
+class HierarchicalRewardTableTennis(object):
+ """Class for hierarchical reward function for table tennis experiment.
+
+ Return Highest Reward.
+ Reward = 0
+ Step 1: Action Valid. Upper Bound 0
+ [-∞, 0]
+ Reward += -1 * |hit_duration - hit_duration_threshold| * |hit_duration < hit_duration_threshold| * 10
+ Step 2: Hitting. Upper Bound 2
+ if hitting:
+ [0, 2]
+ Reward = 2 * (1 - tanh(|shortest_hitting_dist|))
+ if not hitting:
+ [0, 0.2]
+ Reward = 2 * (1 - tanh(|shortest_hitting_dist|))
+ Step 3: Target Point Achievement. Upper Bound 6
+ [0, 4]
+ if table_contact_detector:
+ Reward += 1
+ Reward += (1 - tanh(|shortest_hitting_dist|)) * 2
+ if contact_coordinate[0] < 0:
+ Reward += 1
+ else:
+ Reward += 0
+ elif:
+ Reward += (1 - tanh(|shortest_hitting_dist|))
+ """
+
+ def __init__(self):
+ self.reward = None
+ self.goal_achievement = False
+ self.total_reward = 0
+ self.shortest_hitting_dist = 1000
+ self.highest_reward = -1000
+ self.lowest_corner_dist = 100
+ self.right_court_contact_detector = False
+ self.table_contact_detector = False
+ self.floor_contact_detector = False
+ self.radius = 0.025
+ self.min_ball_x_pos = 100
+ self.hit_contact_detector = False
+ self.net_contact_detector = False
+ self.ratio = 1
+ self.lowest_z = 100
+ self.target_flag = False
+ self.dist_target_virtual = 100
+ self.ball_z_pos_lowest = 100
+ self.hitting_flag = False
+ self.hitting_time_point = None
+ self.ctxt_dim = None
+ self.context_range_bounds = None
+ # self.ctxt_out_of_range_punishment = None
+ # self.ctxt_in_side_of_range_punishment = None
+ #
+ # def check_where_invalid(self, ctxt, context_range_bounds, set_to_valid_region=False):
+ # idx_max = []
+ # idx_min = []
+ # for dim in range(self.ctxt_dim):
+ # min_dim = context_range_bounds[0][dim]
+ # max_dim = context_range_bounds[1][dim]
+ # idx_max_c = np.where(ctxt[:, dim] > max_dim)[0]
+ # idx_min_c = np.where(ctxt[:, dim] < min_dim)[0]
+ # if set_to_valid_region:
+ # if idx_max_c.shape[0] != 0:
+ # ctxt[idx_max_c, dim] = max_dim
+ # if idx_min_c.shape[0] != 0:
+ # ctxt[idx_min_c, dim] = min_dim
+ # idx_max.append(idx_max_c)
+ # idx_min.append(idx_min_c)
+ # return idx_max, idx_min, ctxt
+
+ def check_valid(self, scale, context_range_bounds):
+
+ min_dim = context_range_bounds[0][0]
+ max_dim = context_range_bounds[1][0]
+ valid = (scale < max_dim) and (scale > min_dim)
+ return valid
+
+ @classmethod
+ def goal_distance(cls, goal_a, goal_b):
+ assert goal_a.shape == goal_b.shape
+ return np.linalg.norm(goal_a - goal_b, axis=-1)
+
+ def refresh_highest_reward(self):
+ if self.total_reward >= self.highest_reward:
+ self.highest_reward = self.total_reward
+
+ def duration_valid(self):
+ pass
+
+ def huge_value_unstable(self):
+ self.total_reward += -10
+ self.highest_reward = -1
+
+ def context_valid(self, context):
+ valid = self.check_valid(context.copy(), context_range_bounds=self.context_range_bounds)
+ # when using dirac punishments
+ if valid:
+ self.total_reward += 1 # If Action Valid and Context Valid, total_reward = 0
+ else:
+ self.total_reward += 0
+ self.refresh_highest_reward()
+
+
+
+ # If in the ctxt, add 1, otherwise, 0
+
+ def action_valid(self, durations=None):
+ """Ensure the execution of the robot movement with parameters which are in a valid domain.
+
+ Time should always be positive,
+ the joint position of the robot should be a subset of [−π, π].
+ if all parameters are valid, the robot gets a zero score,
+ otherwise it gets a negative score proportional to how much it is beyond the valid parameter domain.
+
+ Returns:
+ rewards: if valid, reward is equal to 0.
+ if not valid, reward is negative and proportional to the distance beyond the valid parameter domain
+ """
+ assert durations.shape[0] == 2, "durations type should be np.array and the shape should be 2"
+ # pre_duration = durations[0]
+ hit_duration = durations[1]
+ # pre_duration_thres = 0.01
+ hit_duration_thres = 1
+ # self.goal_achievement = np.all(
+ # [(pre_duration > pre_duration_thres), (hit_duration > hit_duration_thres), (0.3 < pre_duration < 0.6)])
+ self.goal_achievement = (hit_duration > hit_duration_thres)
+ if self.goal_achievement:
+ self.total_reward = -1
+ self.goal_achievement = True
+ else:
+ # self.total_reward += -1 * ((np.abs(pre_duration - pre_duration_thres) * int(
+ # pre_duration < pre_duration_thres) + np.abs(hit_duration - hit_duration_thres) * int(
+ # hit_duration < hit_duration_thres)) * 10)
+ self.total_reward = -1 * ((np.abs(hit_duration - hit_duration_thres) * int(
+ hit_duration < hit_duration_thres)) * 10)
+ self.total_reward += -1
+ self.goal_achievement = False
+ self.refresh_highest_reward()
+
+ def motion_penalty(self, action, high_motion_penalty):
+ """Protects the robot from high acceleration and dangerous movement.
+ """
+ if not high_motion_penalty:
+ reward_ctrl = - 0.05 * np.square(action).sum()
+ else:
+ reward_ctrl = - 0.075 * np.square(action).sum()
+ self.total_reward += reward_ctrl
+ self.refresh_highest_reward()
+ self.goal_achievement = True
+
+ def hitting(self, env): # , target_ball_pos, racket_center_pos, hit_contact_detector=False
+ """Hitting reward calculation
+
+ If racket successfully hit the ball, the reward +1
+ Otherwise calculate the distance between the center of racket and the center of ball,
+ reward = tanh(r/dist) if dist<1 reward almost 2 , if dist >= 1 reward is between [0, 0.2]
+
+
+ Args:
+ env:
+
+ Returns:
+
+ """
+
+ hit_contact_obj = ["target_ball", "bat"]
+ target_ball_pos = env.target_ball_pos
+ racket_center_pos = env.racket_center_pos
+ # hit contact detection
+ # Record the hitting history
+ self.hitting_flag = False
+ if not self.hit_contact_detector:
+ self.hit_contact_detector = self.contact_detection(env, hit_contact_obj)
+ if self.hit_contact_detector:
+ print("First time detect hitting")
+ self.hitting_flag = True
+ if self.hit_contact_detector:
+
+ # TODO
+ dist = self.goal_distance(target_ball_pos, racket_center_pos)
+ if dist < 0:
+ dist = 0
+ # print("goal distance is:", dist)
+ if dist <= self.shortest_hitting_dist:
+ self.shortest_hitting_dist = dist
+ # print("shortest_hitting_dist is:", self.shortest_hitting_dist)
+ # Keep the shortest hitting distance.
+ dist_reward = 2 * (1 - np.tanh(np.abs(self.shortest_hitting_dist)))
+
+ # TODO sparse
+ # dist_reward = 2
+
+ self.total_reward += dist_reward
+ self.goal_achievement = True
+
+ # if self.hitting_time_point is not None and self.hitting_time_point > 600:
+ # self.total_reward += 1
+
+ else:
+ dist = self.goal_distance(target_ball_pos, racket_center_pos)
+ if dist <= self.shortest_hitting_dist:
+ self.shortest_hitting_dist = dist
+ dist_reward = 1 - np.tanh(self.shortest_hitting_dist)
+ reward = 0.2 * dist_reward # because it does not hit the ball, so multiply 0.2
+ self.total_reward += reward
+ self.goal_achievement = False
+
+ self.refresh_highest_reward()
+
+ @classmethod
+ def relu(cls, x):
+ return np.maximum(0, x)
+
+ # def right_table_contact(self, env):
+ # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
+ # if env.target_ball_pos[0] >= 0 and env.target_ball_pos[2] >= 0.7:
+ # # update right court contact detection
+ # if not self.right_court_contact_detector:
+ # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
+ # if self.right_court_contact_detector:
+ # self.contact_x_pos = env.target_ball_pos[0]
+ # if self.right_court_contact_detector:
+ # self.total_reward += 1 - norm(0.685, 1).pdf(self.contact_x_pos) # x axis middle of right table
+ # self.goal_achievement = False
+ # else:
+ # self.total_reward += 1
+ # self.goal_achievement = True
+ # # else:
+ # # self.total_reward += 0
+ # # self.goal_achievement = False
+ # self.refresh_highest_reward()
+
+ # def net_contact(self, env):
+ # net_contact_obj = ["target_ball", "table_tennis_net"]
+ # # net_contact_detector = self.contact_detection(env, net_contact_obj)
+ # # ball_x_pos = env.target_ball_pos[0]
+ # # if self.min_ball_x_pos >= ball_x_pos:
+ # # self.min_ball_x_pos = ball_x_pos
+ # # table_left_edge_x_pos = -1.37
+ # # if np.abs(ball_x_pos) <= 0.01: # x threshold of net
+ # # if self.lowest_z >= env.target_ball_pos[2]:
+ # # self.lowest_z = env.target_ball_pos[2]
+ # # # construct a gaussian distribution of z
+ # # z_reward = 4 - norm(0, 0.1).pdf(self.lowest_z - 0.07625) # maximum 4
+ # # self.total_reward += z_reward
+ # # self.total_reward += 2 - np.minimum(1, self.relu(np.abs(self.min_ball_x_pos)))
+ # if not self.net_contact_detector:
+ # self.net_contact_detector = self.contact_detection(env, net_contact_obj)
+ # if self.net_contact_detector:
+ # self.total_reward += 0 # very high cost
+ # self.goal_achievement = False
+ # else:
+ # self.total_reward += 1
+ # self.goal_achievement = True
+ # self.refresh_highest_reward()
+
+ # def landing_on_opponent_court(self, env):
+ # # Very sparse reward
+ # # don't contact the right side court
+ # # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
+ # # right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
+ # left_court_contact_obj = ["target_ball", "table_tennis_table_left_side"]
+ # # left_court_contact_detector = self.contact_detection(env, left_court_contact_obj)
+ # # record the contact history
+ # # if not self.right_court_contact_detector:
+ # # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
+ # if not self.table_contact_detector:
+ # self.table_contact_detector = self.contact_detection(env, left_court_contact_obj)
+ #
+ # dist_left_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_up_corner"))
+ # dist_middle_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("middle_up_corner"))
+ # dist_left_down_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_down_corner"))
+ # dist_middle_down_corner = self.goal_distance(env.target_ball_pos,
+ # env.sim.data.get_site_xpos("middle_down_corner"))
+ # dist_array = np.array(
+ # [dist_left_up_corner, dist_middle_up_corner, dist_left_down_corner, dist_middle_down_corner])
+ # dist_corner = np.amin(dist_array)
+ # if self.lowest_corner_dist >= dist_corner:
+ # self.lowest_corner_dist = dist_corner
+ #
+ # right_contact_cost = 1
+ # left_contact_reward = 2
+ # dist_left_table_reward = (2 - np.tanh(self.lowest_corner_dist))
+ # # TODO Try multi dimensional gaussian distribution
+ # # contact only the left side court
+ # if self.right_court_contact_detector:
+ # self.total_reward += 0
+ # self.goal_achievement = False
+ # if self.table_contact_detector:
+ # self.total_reward += left_contact_reward
+ # self.goal_achievement = False
+ # else:
+ # self.total_reward += dist_left_table_reward
+ # self.goal_achievement = False
+ # else:
+ # self.total_reward += right_contact_cost
+ # if self.table_contact_detector:
+ # self.total_reward += left_contact_reward
+ # self.goal_achievement = True
+ # else:
+ # self.total_reward += dist_left_table_reward
+ # self.goal_achievement = False
+ # self.refresh_highest_reward()
+ # # if self.left_court_contact_detector and not self.right_court_contact_detector:
+ # # self.total_reward += self.ratio * left_contact_reward
+ # # print("only left court reward return!!!!!!!!!")
+ # # print("contact only left court!!!!!!")
+ # # self.goal_achievement = True
+ # # # no contact with table
+ # # elif not self.right_court_contact_detector and not self.left_court_contact_detector:
+ # # self.total_reward += 0 + self.ratio * dist_left_table_reward
+ # # self.goal_achievement = False
+ # # # contact both side
+ # # elif self.right_court_contact_detector and self.left_court_contact_detector:
+ # # self.total_reward += self.ratio * (left_contact_reward - right_contact_cost) # cost of contact of right court
+ # # self.goal_achievement = False
+ # # # contact only the right side court
+ # # elif self.right_court_contact_detector and not self.left_court_contact_detector:
+ # # self.total_reward += 0 + self.ratio * (
+ # # dist_left_table_reward - right_contact_cost) # cost of contact of right court
+ # # self.goal_achievement = False
+
+ def target_achievement(self, env):
+ target_coordinate = np.array([-0.5, -0.5])
+ # net_contact_obj = ["target_ball", "table_tennis_net"]
+ table_contact_obj = ["target_ball", "table_tennis_table"]
+ floor_contact_obj = ["target_ball", "floor"]
+
+ if 0.78 < env.target_ball_pos[2] < 0.8:
+ dist_target_virtual = np.linalg.norm(env.target_ball_pos[:2] - target_coordinate)
+ if self.dist_target_virtual > dist_target_virtual:
+ self.dist_target_virtual = dist_target_virtual
+ if -0.07 < env.target_ball_pos[0] < 0.07 and env.sim.data.get_joint_qvel('tar:x') < 0:
+ if self.ball_z_pos_lowest > env.target_ball_pos[2]:
+ self.ball_z_pos_lowest = env.target_ball_pos[2].copy()
+ # if not self.net_contact_detector:
+ # self.net_contact_detector = self.contact_detection(env, net_contact_obj)
+ if not self.table_contact_detector:
+ self.table_contact_detector = self.contact_detection(env, table_contact_obj)
+ if not self.floor_contact_detector:
+ self.floor_contact_detector = self.contact_detection(env, floor_contact_obj)
+ if not self.target_flag:
+ # Table Contact Reward.
+ if self.table_contact_detector:
+ self.total_reward += 1
+ # only update when the first contact because of the flag
+ contact_coordinate = env.target_ball_pos[:2].copy()
+ print("contact table ball coordinate: ", env.target_ball_pos)
+ logging.info("contact table ball coordinate: {}".format(env.target_ball_pos))
+ dist_target = np.linalg.norm(contact_coordinate - target_coordinate)
+ self.total_reward += (1 - np.tanh(dist_target)) * 2
+ self.target_flag = True
+ # Net Contact Reward. Precondition: Table Contact exits.
+ if contact_coordinate[0] < 0:
+ print("left table contact")
+ logging.info("~~~~~~~~~~~~~~~left table contact~~~~~~~~~~~~~~~")
+ self.total_reward += 1
+ # TODO Z coordinate reward
+ # self.total_reward += np.maximum(np.tanh(self.ball_z_pos_lowest), 0)
+ self.goal_achievement = True
+ else:
+ print("right table contact")
+ logging.info("~~~~~~~~~~~~~~~right table contact~~~~~~~~~~~~~~~")
+ self.total_reward += 0
+ self.goal_achievement = False
+ # if self.net_contact_detector:
+ # self.total_reward += 0
+ # self.goal_achievement = False
+ # else:
+ # self.total_reward += 1
+ # self.goal_achievement = False
+ # Floor Contact Reward. Precondition: Table Contact exits.
+ elif self.floor_contact_detector:
+ self.total_reward += (1 - np.tanh(self.dist_target_virtual))
+ self.target_flag = True
+ self.goal_achievement = False
+ # No Contact of Floor or Table, flying
+ else:
+ pass
+ # else:
+ # print("Flag is True already")
+ self.refresh_highest_reward()
+
+ def distance_to_target(self):
+ pass
+
+ @classmethod
+ def contact_detection(cls, env, goal_contact):
+ for i in range(env.sim.data.ncon):
+ contact = env.sim.data.contact[i]
+ achieved_geom1_name = env.sim.model.geom_id2name(contact.geom1)
+ achieved_geom2_name = env.sim.model.geom_id2name(contact.geom2)
+ if np.all([(achieved_geom1_name in goal_contact), (achieved_geom2_name in goal_contact)]):
+ print("contact of " + achieved_geom1_name + " " + achieved_geom2_name)
+ return True
+ else:
+ return False
diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py b/alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py
new file mode 100644
index 0000000..6e6aa32
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py
@@ -0,0 +1,136 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+# """Soft indicator function evaluating whether a number is within bounds."""
+#
+# from __future__ import absolute_import
+# from __future__ import division
+# from __future__ import print_function
+
+# Internal dependencies.
+import numpy as np
+
+# The value returned by tolerance() at `margin` distance from `bounds` interval.
+_DEFAULT_VALUE_AT_MARGIN = 0.1
+
+
+def _sigmoids(x, value_at_1, sigmoid):
+ """Returns 1 when `x` == 0, between 0 and 1 otherwise.
+
+ Args:
+ x: A scalar or numpy array.
+ value_at_1: A float between 0 and 1 specifying the output when `x` == 1.
+ sigmoid: String, choice of sigmoid type.
+
+ Returns:
+ A numpy array with values between 0.0 and 1.0.
+
+ Raises:
+ ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and
+ `quadratic` sigmoids which allow `value_at_1` == 0.
+ ValueError: If `sigmoid` is of an unknown type.
+ """
+ if sigmoid in ('cosine', 'linear', 'quadratic'):
+ if not 0 <= value_at_1 < 1:
+ raise ValueError('`value_at_1` must be nonnegative and smaller than 1, '
+ 'got {}.'.format(value_at_1))
+ else:
+ if not 0 < value_at_1 < 1:
+ raise ValueError('`value_at_1` must be strictly between 0 and 1, '
+ 'got {}.'.format(value_at_1))
+
+ if sigmoid == 'gaussian':
+ scale = np.sqrt(-2 * np.log(value_at_1))
+ return np.exp(-0.5 * (x*scale)**2)
+
+ elif sigmoid == 'hyperbolic':
+ scale = np.arccosh(1/value_at_1)
+ return 1 / np.cosh(x*scale)
+
+ elif sigmoid == 'long_tail':
+ scale = np.sqrt(1/value_at_1 - 1)
+ return 1 / ((x*scale)**2 + 1)
+
+ elif sigmoid == 'cosine':
+ scale = np.arccos(2*value_at_1 - 1) / np.pi
+ scaled_x = x*scale
+ return np.where(abs(scaled_x) < 1, (1 + np.cos(np.pi*scaled_x))/2, 0.0)
+
+ elif sigmoid == 'linear':
+ scale = 1-value_at_1
+ scaled_x = x*scale
+ return np.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0)
+
+ elif sigmoid == 'quadratic':
+ scale = np.sqrt(1-value_at_1)
+ scaled_x = x*scale
+ return np.where(abs(scaled_x) < 1, 1 - scaled_x**2, 0.0)
+
+ elif sigmoid == 'tanh_squared':
+ scale = np.arctanh(np.sqrt(1-value_at_1))
+ return 1 - np.tanh(x*scale)**2
+
+ else:
+ raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid))
+
+
+def tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian',
+ value_at_margin=_DEFAULT_VALUE_AT_MARGIN):
+ """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise.
+
+ Args:
+ x: A scalar or numpy array.
+ bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for
+ the target interval. These can be infinite if the interval is unbounded
+ at one or both ends, or they can be equal to one another if the target
+ value is exact.
+ margin: Float. Parameter that controls how steeply the output decreases as
+ `x` moves out-of-bounds.
+ * If `margin == 0` then the output will be 0 for all values of `x`
+ outside of `bounds`.
+ * If `margin > 0` then the output will decrease sigmoidally with
+ increasing distance from the nearest bound.
+ sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian',
+ 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'.
+ value_at_margin: A float between 0 and 1 specifying the output value when
+ the distance from `x` to the nearest bound is equal to `margin`. Ignored
+ if `margin == 0`.
+
+ Returns:
+ A float or numpy array with values between 0.0 and 1.0.
+
+ Raises:
+ ValueError: If `bounds[0] > bounds[1]`.
+ ValueError: If `margin` is negative.
+ """
+ lower, upper = bounds
+ if lower > upper:
+ raise ValueError('Lower bound must be <= upper bound.')
+ if margin < 0:
+ raise ValueError('`margin` must be non-negative.')
+
+ in_bounds = np.logical_and(lower <= x, x <= upper)
+ if margin == 0:
+ value = np.where(in_bounds, 1.0, 0.0)
+ else:
+ d = np.where(x < lower, lower - x, x - upper) / margin
+ value = np.where(in_bounds, 1.0, _sigmoids(d, value_at_margin, sigmoid))
+
+ return float(value) if np.isscalar(x) else value
+
+
+
+
+
diff --git a/alr_envs/mujoco/gym_table_tennis/utils/util.py b/alr_envs/mujoco/gym_table_tennis/utils/util.py
new file mode 100644
index 0000000..716b3c6
--- /dev/null
+++ b/alr_envs/mujoco/gym_table_tennis/utils/util.py
@@ -0,0 +1,49 @@
+import json
+import yaml
+import xml.etree.ElementTree as ET
+from collections import OrderedDict
+from pathlib import Path
+
+
+def read_json(fname):
+ fname = Path(fname)
+ with fname.open('rt') as handle:
+ return json.load(handle, object_hook=OrderedDict)
+
+
+def write_json(content, fname):
+ fname = Path(fname)
+ with fname.open('wt') as handle:
+ json.dump(content, handle, indent=4, sort_keys=False)
+
+
+def read_yaml(fname):
+ fname = Path(fname)
+ with fname.open('rt') as handle:
+ return yaml.load(handle, Loader=yaml.FullLoader)
+
+
+def write_yaml(content, fname):
+ fname = Path(fname)
+ with fname.open('wt') as handle:
+ yaml.dump(content, handle)
+
+
+def config_save(dir_path, config):
+ dir_path = Path(dir_path)
+ config_path_json = dir_path / "config.json"
+ config_path_yaml = dir_path / "config.yml"
+ # .json and .yml file,save 2 version of configuration.
+ write_json(config, config_path_json)
+ write_yaml(config, config_path_yaml)
+
+
+def change_kp_in_xml(kp_list,
+ model_path="/home/zhou/slow/table_tennis_rl/simulation/gymTableTennis/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml"):
+ tree = ET.parse(model_path)
+ root = tree.getroot()
+ # for actuator in root.find("actuator"):
+ for position, kp in zip(root.iter('position'), kp_list):
+ position.set("kp", str(kp))
+ tree.write(model_path)
+
diff --git a/example.py b/example.py
index cc96611..fd11c73 100644
--- a/example.py
+++ b/example.py
@@ -4,8 +4,8 @@ import gym
if __name__ == '__main__':
- # env = gym.make('alr_envs:ALRReacher-v0')
- env = gym.make('alr_envs:SimpleReacher-v0')
+ env = gym.make('alr_envs:ALRReacher-v0')
+ # env = gym.make('alr_envs:SimpleReacher-v0')
# env = gym.make('alr_envs:ALRReacher7-v0')
state = env.reset()