diff --git a/alr_envs/mujoco/gym_table_tennis/__init__.py b/alr_envs/mujoco/gym_table_tennis/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/__init__.py @@ -0,0 +1 @@ + diff --git a/alr_envs/mujoco/gym_table_tennis/envs/__init__.py b/alr_envs/mujoco/gym_table_tennis/envs/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/__init__.py @@ -0,0 +1 @@ + diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml new file mode 100644 index 0000000..7772d14 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml new file mode 100644 index 0000000..a8df915 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml new file mode 100644 index 0000000..011b95a --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml @@ -0,0 +1,95 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml new file mode 100644 index 0000000..ad1ae35 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml new file mode 100644 index 0000000..eb2b347 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml new file mode 100644 index 0000000..29a21e1 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl new file mode 100644 index 0000000..133b112 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl new file mode 100644 index 0000000..047e9df Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl new file mode 100644 index 0000000..3b05c27 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl new file mode 100644 index 0000000..5ff94a2 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl new file mode 100644 index 0000000..c548448 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl new file mode 100644 index 0000000..495160d Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..b4bb322 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..7b2f001 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..f05174e Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl new file mode 100644 index 0000000..eb252d9 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl new file mode 100644 index 0000000..0a986fa Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c039f0d Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..250acaf Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..993d0f7 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl new file mode 100644 index 0000000..8448a3f Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl new file mode 100644 index 0000000..b34963d Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl new file mode 100644 index 0000000..f6a1515 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..e6aa6b6 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..667902e Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl new file mode 100644 index 0000000..ed66bbb Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..aba957d Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..5cca6a9 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..3343e27 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl new file mode 100644 index 0000000..ae505fd Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl new file mode 100644 index 0000000..c36cfec Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl new file mode 100644 index 0000000..dc633c4 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..82d0093 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..7fd5a55 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl new file mode 100644 index 0000000..76353ae Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl new file mode 100644 index 0000000..a0386f6 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl new file mode 100644 index 0000000..f6b41ad Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c36f88f Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..d00cac1 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..34d1d8b Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl new file mode 100644 index 0000000..13d2f73 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..06e857f Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..48e1bb1 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl new file mode 100644 index 0000000..0d95239 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml new file mode 100644 index 0000000..9abf102 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml new file mode 100644 index 0000000..dfbc37a --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml new file mode 100644 index 0000000..f2432bb --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py b/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py new file mode 100644 index 0000000..625a122 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py @@ -0,0 +1,233 @@ +import numpy as np +from gym import spaces +from gym.envs.robotics import robot_env, utils +# import xml.etree.ElementTree as ET +from alr_envs.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis +# import glfw +from alr_envs.mujoco.gym_table_tennis.utils.experiment import ball_initialize +from pathlib import Path +import os + + +class TableTennisEnv(robot_env.RobotEnv): + """Class for Table Tennis environment. + """ + def __init__(self, n_substeps=1, + model_path=None, + initial_qpos=None, + initial_ball_state=None, + config=None, + reward_obj=None + ): + """Initializes a new mujoco based Table Tennis environment. + + Args: + model_path (string): path to the environments XML file + initial_qpos (dict): a dictionary of joint names and values that define the initial + n_actions: Number of joints + n_substeps (int): number of substeps the simulation runs on every call to step + scale (double): limit maximum change in position + initial_ball_state: to reset the ball state + """ + # self.config = config.config + if model_path is None: + path_cws = Path.cwd() + print(path_cws) + current_dir = Path(os.path.split(os.path.realpath(__file__))[0]) + table_tennis_env_xml_path = current_dir / "robotics"/"assets"/"table_tennis"/"table_tennis_env.xml" + model_path = str(table_tennis_env_xml_path) + self.config = config + action_space = self.config['trajectory']['args']['action_space'] + time_step = self.config['mujoco_sim_env']['args']["time_step"] + if initial_qpos is None: + initial_qpos = self.config['robot_config']['args']['initial_qpos'] + + # TODO should read all configuration in config + assert initial_qpos is not None, "Must initialize the initial q position of robot arm" + n_actions = 7 + self.initial_qpos_value = np.array(list(initial_qpos.values())).copy() + # # change time step in .xml file + # tree = ET.parse(model_path) + # root = tree.getroot() + # for option in root.findall('option'): + # option.set("timestep", str(time_step)) + # + # tree.write(model_path) + + super(TableTennisEnv, self).__init__( + model_path=model_path, n_substeps=n_substeps, n_actions=n_actions, + initial_qpos=initial_qpos) + + if action_space: + self.action_space = spaces.Box(low=np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]), + high=np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]), + dtype='float64') + else: + self.action_space = spaces.Box(low=np.array([-np.inf] * 7), + high=np.array([-np.inf] * 7), + dtype='float64') + self.scale = None + self.desired_pos = None + self.n_actions = n_actions + self.action = None + self.time_step = time_step + self.paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center') + if reward_obj is None: + self.reward_obj = HierarchicalRewardTableTennis() + else: + self.reward_obj = reward_obj + + if initial_ball_state is not None: + self.initial_ball_state = initial_ball_state + else: + self.initial_ball_state = ball_initialize(random=False) + self.target_ball_pos = self.sim.data.get_site_xpos("target_ball") + self.racket_center_pos = self.sim.data.get_site_xpos("wam/paddle_center") + + def close(self): + if self.viewer is not None: + glfw.destroy_window(self.viewer.window) + # self.viewer.window.close() + self.viewer = None + self._viewers = {} + + # GoalEnv methods + # ---------------------------- + def compute_reward(self, achieved_goal, goal, info): + # reset the reward, if action valid + # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"] + # right_court_contact_detector = self.reward_obj.contact_detection(self, right_court_contact_obj) + # if right_court_contact_detector: + # print("can detect the table ball contact") + self.reward_obj.total_reward = 0 + # Stage 1 Hitting + self.reward_obj.hitting(self) + # if not hitted, return the highest reward + if not self.reward_obj.goal_achievement: + return self.reward_obj.highest_reward + # # Stage 2 Right Table Contact + # self.reward_obj.right_table_contact(self) + # if not self.reward_obj.goal_achievement: + # return self.reward_obj.highest_reward + # # Stage 2 Net Contact + # self.reward_obj.net_contact(self) + # if not self.reward_obj.goal_achievement: + # return self.reward_obj.highest_reward + # Stage 3 Opponent court Contact + # self.reward_obj.landing_on_opponent_court(self) + # if not self.reward_obj.goal_achievement: + # print("self.reward_obj.highest_reward: ", self.reward_obj.highest_reward) + # TODO + self.reward_obj.target_achievement(self) + return self.reward_obj.highest_reward + + def _reset_sim(self): + self.sim.set_state(self.initial_state) + [initial_x, initial_y, initial_z, v_x, v_y, v_z] = self.initial_ball_state + self.sim.data.set_joint_qpos('tar:x', initial_x) + self.sim.data.set_joint_qpos('tar:y', initial_y) + self.sim.data.set_joint_qpos('tar:z', initial_z) + self.energy_corrected = True + self.give_reflection_reward = False + + # velocity is positive direction + self.sim.data.set_joint_qvel('tar:x', v_x) + self.sim.data.set_joint_qvel('tar:y', v_y) + self.sim.data.set_joint_qvel('tar:z', v_z) + + # Apply gravity compensation + if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]: + self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] + self.sim.forward() + return True + + def _env_setup(self, initial_qpos): + for name, value in initial_qpos.items(): + self.sim.data.set_joint_qpos(name, value) + + # Apply gravity compensation + if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]: + self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] + self.sim.forward() + + # Get the target position + self.initial_paddle_center_xpos = self.sim.data.get_site_xpos('wam/paddle_center').copy() + self.initial_paddle_center_vel = None # self.sim.get_site_ + + def _sample_goal(self): + goal = self.initial_paddle_center_xpos[:3] + self.np_random.uniform(-0.2, 0.2, size=3) + return goal.copy() + + def _get_obs(self): + + # positions of racket center + paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center') + ball_pos = self.sim.data.get_site_xpos("target_ball") + + dt = self.sim.nsubsteps * self.sim.model.opt.timestep + paddle_center_velp = self.sim.data.get_site_xvelp('wam/paddle_center') * dt + robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) + + wrist_state = robot_qpos[-3:] + wrist_vel = robot_qvel[-3:] * dt # change to a scalar if the gripper is made symmetric + + # achieved_goal = paddle_body_EE_pos + obs = np.concatenate([ + paddle_center_pos, paddle_center_velp, wrist_state, wrist_vel + ]) + + out_dict = { + 'observation': obs.copy(), + 'achieved_goal': paddle_center_pos.copy(), + 'desired_goal': self.goal.copy(), + 'q_pos': self.sim.data.qpos[:].copy(), + "ball_pos": ball_pos.copy(), + # "hitting_flag": self.reward_obj.hitting_flag + } + + return out_dict + + def _step_callback(self): + pass + + def _set_action(self, action): + # Apply gravity compensation + if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]: + self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] + # print("set action process running") + assert action.shape == (self.n_actions,) + self.action = action.copy() # ensure that we don't change the action outside of this scope + pos_ctrl = self.action[:] # limit maximum change in position + pos_ctrl = np.clip(pos_ctrl, self.action_space.low, self.action_space.high) + + # get desired trajectory + self.sim.data.qpos[:7] = pos_ctrl + self.sim.forward() + self.desired_pos = self.sim.data.get_site_xpos('wam/paddle_center').copy() + + self.sim.data.ctrl[:] = pos_ctrl + + def _is_success(self, achieved_goal, desired_goal): + pass + + +if __name__ == '__main__': + render_mode = "human" # "human" or "partial" or "final" + env = TableTennisEnv() + env.reset() + # env.render(mode=render_mode) + + for i in range(200): + # objective.load_result("/tmp/cma") + # test with random actions + ac = 2 * env.action_space.sample() + # ac[0] += np.pi/2 + obs, rew, d, info = env.step(ac) + env.render(mode=render_mode) + + print(rew) + + if d: + break + + env.close() diff --git a/alr_envs/mujoco/gym_table_tennis/utils/__init__.py b/alr_envs/mujoco/gym_table_tennis/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/mujoco/gym_table_tennis/utils/experiment.py b/alr_envs/mujoco/gym_table_tennis/utils/experiment.py new file mode 100644 index 0000000..addd6c5 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/utils/experiment.py @@ -0,0 +1,83 @@ +import numpy as np +from gym.utils import seeding +from alr_envs.mujoco.gym_table_tennis.utils.util import read_yaml, read_json +from pathlib import Path + + +def ball_initialize(random=False, scale=False, context_range=None, scale_value=None): + if random: + if scale: + # if scale_value is None: + scale_value = context_scale_initialize(context_range) + v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value + dx = 1 + dy = 0 + dz = 0.05 + else: + seed = None + np_random, seed = seeding.np_random(seed) + dx = np_random.uniform(-0.1, 0.1) + dy = np_random.uniform(-0.1, 0.1) + dz = np_random.uniform(-0.1, 0.1) + + v_x = np_random.uniform(1.7, 1.8) + v_y = np_random.uniform(0.7, 0.8) + v_z = np_random.uniform(0.1, 0.2) + # print(dx, dy, dz, v_x, v_y, v_z) + # else: + # dx = -0.1 + # dy = 0.05 + # dz = 0.05 + # v_x = 1.5 + # v_y = 0.7 + # v_z = 0.06 + # initial_x = -0.6 + dx + # initial_y = -0.3 + dy + # initial_z = 0.8 + dz + else: + if scale: + v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value + else: + v_x = 2.5 + v_y = 2 + v_z = 0.5 + dx = 1 + dy = 0 + dz = 0.05 + + initial_x = 0 + dx + initial_y = -0.2 + dy + initial_z = 0.3 + dz + # print("initial ball state: ", initial_x, initial_y, initial_z, v_x, v_y, v_z) + initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z]) + return initial_ball_state + + +def context_scale_initialize(range): + """ + + Returns: + + """ + low, high = range + scale = np.random.uniform(low, high, 1) + return scale + + +def config_handle_generation(config_file_path): + """Generate config handle for multiprocessing + + Args: + config_file_path: + + Returns: + + """ + cfg_fname = Path(config_file_path) + # .json and .yml file + if cfg_fname.suffix == ".json": + config = read_json(cfg_fname) + elif cfg_fname.suffix == ".yml": + config = read_yaml(cfg_fname) + + return config diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/__init__.py b/alr_envs/mujoco/gym_table_tennis/utils/rewards/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py b/alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py new file mode 100644 index 0000000..fe69104 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py @@ -0,0 +1,402 @@ +import numpy as np +import logging + + +class HierarchicalRewardTableTennis(object): + """Class for hierarchical reward function for table tennis experiment. + + Return Highest Reward. + Reward = 0 + Step 1: Action Valid. Upper Bound 0 + [-∞, 0] + Reward += -1 * |hit_duration - hit_duration_threshold| * |hit_duration < hit_duration_threshold| * 10 + Step 2: Hitting. Upper Bound 2 + if hitting: + [0, 2] + Reward = 2 * (1 - tanh(|shortest_hitting_dist|)) + if not hitting: + [0, 0.2] + Reward = 2 * (1 - tanh(|shortest_hitting_dist|)) + Step 3: Target Point Achievement. Upper Bound 6 + [0, 4] + if table_contact_detector: + Reward += 1 + Reward += (1 - tanh(|shortest_hitting_dist|)) * 2 + if contact_coordinate[0] < 0: + Reward += 1 + else: + Reward += 0 + elif: + Reward += (1 - tanh(|shortest_hitting_dist|)) + """ + + def __init__(self): + self.reward = None + self.goal_achievement = False + self.total_reward = 0 + self.shortest_hitting_dist = 1000 + self.highest_reward = -1000 + self.lowest_corner_dist = 100 + self.right_court_contact_detector = False + self.table_contact_detector = False + self.floor_contact_detector = False + self.radius = 0.025 + self.min_ball_x_pos = 100 + self.hit_contact_detector = False + self.net_contact_detector = False + self.ratio = 1 + self.lowest_z = 100 + self.target_flag = False + self.dist_target_virtual = 100 + self.ball_z_pos_lowest = 100 + self.hitting_flag = False + self.hitting_time_point = None + self.ctxt_dim = None + self.context_range_bounds = None + # self.ctxt_out_of_range_punishment = None + # self.ctxt_in_side_of_range_punishment = None + # + # def check_where_invalid(self, ctxt, context_range_bounds, set_to_valid_region=False): + # idx_max = [] + # idx_min = [] + # for dim in range(self.ctxt_dim): + # min_dim = context_range_bounds[0][dim] + # max_dim = context_range_bounds[1][dim] + # idx_max_c = np.where(ctxt[:, dim] > max_dim)[0] + # idx_min_c = np.where(ctxt[:, dim] < min_dim)[0] + # if set_to_valid_region: + # if idx_max_c.shape[0] != 0: + # ctxt[idx_max_c, dim] = max_dim + # if idx_min_c.shape[0] != 0: + # ctxt[idx_min_c, dim] = min_dim + # idx_max.append(idx_max_c) + # idx_min.append(idx_min_c) + # return idx_max, idx_min, ctxt + + def check_valid(self, scale, context_range_bounds): + + min_dim = context_range_bounds[0][0] + max_dim = context_range_bounds[1][0] + valid = (scale < max_dim) and (scale > min_dim) + return valid + + @classmethod + def goal_distance(cls, goal_a, goal_b): + assert goal_a.shape == goal_b.shape + return np.linalg.norm(goal_a - goal_b, axis=-1) + + def refresh_highest_reward(self): + if self.total_reward >= self.highest_reward: + self.highest_reward = self.total_reward + + def duration_valid(self): + pass + + def huge_value_unstable(self): + self.total_reward += -10 + self.highest_reward = -1 + + def context_valid(self, context): + valid = self.check_valid(context.copy(), context_range_bounds=self.context_range_bounds) + # when using dirac punishments + if valid: + self.total_reward += 1 # If Action Valid and Context Valid, total_reward = 0 + else: + self.total_reward += 0 + self.refresh_highest_reward() + + + + # If in the ctxt, add 1, otherwise, 0 + + def action_valid(self, durations=None): + """Ensure the execution of the robot movement with parameters which are in a valid domain. + + Time should always be positive, + the joint position of the robot should be a subset of [−π, π]. + if all parameters are valid, the robot gets a zero score, + otherwise it gets a negative score proportional to how much it is beyond the valid parameter domain. + + Returns: + rewards: if valid, reward is equal to 0. + if not valid, reward is negative and proportional to the distance beyond the valid parameter domain + """ + assert durations.shape[0] == 2, "durations type should be np.array and the shape should be 2" + # pre_duration = durations[0] + hit_duration = durations[1] + # pre_duration_thres = 0.01 + hit_duration_thres = 1 + # self.goal_achievement = np.all( + # [(pre_duration > pre_duration_thres), (hit_duration > hit_duration_thres), (0.3 < pre_duration < 0.6)]) + self.goal_achievement = (hit_duration > hit_duration_thres) + if self.goal_achievement: + self.total_reward = -1 + self.goal_achievement = True + else: + # self.total_reward += -1 * ((np.abs(pre_duration - pre_duration_thres) * int( + # pre_duration < pre_duration_thres) + np.abs(hit_duration - hit_duration_thres) * int( + # hit_duration < hit_duration_thres)) * 10) + self.total_reward = -1 * ((np.abs(hit_duration - hit_duration_thres) * int( + hit_duration < hit_duration_thres)) * 10) + self.total_reward += -1 + self.goal_achievement = False + self.refresh_highest_reward() + + def motion_penalty(self, action, high_motion_penalty): + """Protects the robot from high acceleration and dangerous movement. + """ + if not high_motion_penalty: + reward_ctrl = - 0.05 * np.square(action).sum() + else: + reward_ctrl = - 0.075 * np.square(action).sum() + self.total_reward += reward_ctrl + self.refresh_highest_reward() + self.goal_achievement = True + + def hitting(self, env): # , target_ball_pos, racket_center_pos, hit_contact_detector=False + """Hitting reward calculation + + If racket successfully hit the ball, the reward +1 + Otherwise calculate the distance between the center of racket and the center of ball, + reward = tanh(r/dist) if dist<1 reward almost 2 , if dist >= 1 reward is between [0, 0.2] + + + Args: + env: + + Returns: + + """ + + hit_contact_obj = ["target_ball", "bat"] + target_ball_pos = env.target_ball_pos + racket_center_pos = env.racket_center_pos + # hit contact detection + # Record the hitting history + self.hitting_flag = False + if not self.hit_contact_detector: + self.hit_contact_detector = self.contact_detection(env, hit_contact_obj) + if self.hit_contact_detector: + print("First time detect hitting") + self.hitting_flag = True + if self.hit_contact_detector: + + # TODO + dist = self.goal_distance(target_ball_pos, racket_center_pos) + if dist < 0: + dist = 0 + # print("goal distance is:", dist) + if dist <= self.shortest_hitting_dist: + self.shortest_hitting_dist = dist + # print("shortest_hitting_dist is:", self.shortest_hitting_dist) + # Keep the shortest hitting distance. + dist_reward = 2 * (1 - np.tanh(np.abs(self.shortest_hitting_dist))) + + # TODO sparse + # dist_reward = 2 + + self.total_reward += dist_reward + self.goal_achievement = True + + # if self.hitting_time_point is not None and self.hitting_time_point > 600: + # self.total_reward += 1 + + else: + dist = self.goal_distance(target_ball_pos, racket_center_pos) + if dist <= self.shortest_hitting_dist: + self.shortest_hitting_dist = dist + dist_reward = 1 - np.tanh(self.shortest_hitting_dist) + reward = 0.2 * dist_reward # because it does not hit the ball, so multiply 0.2 + self.total_reward += reward + self.goal_achievement = False + + self.refresh_highest_reward() + + @classmethod + def relu(cls, x): + return np.maximum(0, x) + + # def right_table_contact(self, env): + # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"] + # if env.target_ball_pos[0] >= 0 and env.target_ball_pos[2] >= 0.7: + # # update right court contact detection + # if not self.right_court_contact_detector: + # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj) + # if self.right_court_contact_detector: + # self.contact_x_pos = env.target_ball_pos[0] + # if self.right_court_contact_detector: + # self.total_reward += 1 - norm(0.685, 1).pdf(self.contact_x_pos) # x axis middle of right table + # self.goal_achievement = False + # else: + # self.total_reward += 1 + # self.goal_achievement = True + # # else: + # # self.total_reward += 0 + # # self.goal_achievement = False + # self.refresh_highest_reward() + + # def net_contact(self, env): + # net_contact_obj = ["target_ball", "table_tennis_net"] + # # net_contact_detector = self.contact_detection(env, net_contact_obj) + # # ball_x_pos = env.target_ball_pos[0] + # # if self.min_ball_x_pos >= ball_x_pos: + # # self.min_ball_x_pos = ball_x_pos + # # table_left_edge_x_pos = -1.37 + # # if np.abs(ball_x_pos) <= 0.01: # x threshold of net + # # if self.lowest_z >= env.target_ball_pos[2]: + # # self.lowest_z = env.target_ball_pos[2] + # # # construct a gaussian distribution of z + # # z_reward = 4 - norm(0, 0.1).pdf(self.lowest_z - 0.07625) # maximum 4 + # # self.total_reward += z_reward + # # self.total_reward += 2 - np.minimum(1, self.relu(np.abs(self.min_ball_x_pos))) + # if not self.net_contact_detector: + # self.net_contact_detector = self.contact_detection(env, net_contact_obj) + # if self.net_contact_detector: + # self.total_reward += 0 # very high cost + # self.goal_achievement = False + # else: + # self.total_reward += 1 + # self.goal_achievement = True + # self.refresh_highest_reward() + + # def landing_on_opponent_court(self, env): + # # Very sparse reward + # # don't contact the right side court + # # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"] + # # right_court_contact_detector = self.contact_detection(env, right_court_contact_obj) + # left_court_contact_obj = ["target_ball", "table_tennis_table_left_side"] + # # left_court_contact_detector = self.contact_detection(env, left_court_contact_obj) + # # record the contact history + # # if not self.right_court_contact_detector: + # # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj) + # if not self.table_contact_detector: + # self.table_contact_detector = self.contact_detection(env, left_court_contact_obj) + # + # dist_left_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_up_corner")) + # dist_middle_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("middle_up_corner")) + # dist_left_down_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_down_corner")) + # dist_middle_down_corner = self.goal_distance(env.target_ball_pos, + # env.sim.data.get_site_xpos("middle_down_corner")) + # dist_array = np.array( + # [dist_left_up_corner, dist_middle_up_corner, dist_left_down_corner, dist_middle_down_corner]) + # dist_corner = np.amin(dist_array) + # if self.lowest_corner_dist >= dist_corner: + # self.lowest_corner_dist = dist_corner + # + # right_contact_cost = 1 + # left_contact_reward = 2 + # dist_left_table_reward = (2 - np.tanh(self.lowest_corner_dist)) + # # TODO Try multi dimensional gaussian distribution + # # contact only the left side court + # if self.right_court_contact_detector: + # self.total_reward += 0 + # self.goal_achievement = False + # if self.table_contact_detector: + # self.total_reward += left_contact_reward + # self.goal_achievement = False + # else: + # self.total_reward += dist_left_table_reward + # self.goal_achievement = False + # else: + # self.total_reward += right_contact_cost + # if self.table_contact_detector: + # self.total_reward += left_contact_reward + # self.goal_achievement = True + # else: + # self.total_reward += dist_left_table_reward + # self.goal_achievement = False + # self.refresh_highest_reward() + # # if self.left_court_contact_detector and not self.right_court_contact_detector: + # # self.total_reward += self.ratio * left_contact_reward + # # print("only left court reward return!!!!!!!!!") + # # print("contact only left court!!!!!!") + # # self.goal_achievement = True + # # # no contact with table + # # elif not self.right_court_contact_detector and not self.left_court_contact_detector: + # # self.total_reward += 0 + self.ratio * dist_left_table_reward + # # self.goal_achievement = False + # # # contact both side + # # elif self.right_court_contact_detector and self.left_court_contact_detector: + # # self.total_reward += self.ratio * (left_contact_reward - right_contact_cost) # cost of contact of right court + # # self.goal_achievement = False + # # # contact only the right side court + # # elif self.right_court_contact_detector and not self.left_court_contact_detector: + # # self.total_reward += 0 + self.ratio * ( + # # dist_left_table_reward - right_contact_cost) # cost of contact of right court + # # self.goal_achievement = False + + def target_achievement(self, env): + target_coordinate = np.array([-0.5, -0.5]) + # net_contact_obj = ["target_ball", "table_tennis_net"] + table_contact_obj = ["target_ball", "table_tennis_table"] + floor_contact_obj = ["target_ball", "floor"] + + if 0.78 < env.target_ball_pos[2] < 0.8: + dist_target_virtual = np.linalg.norm(env.target_ball_pos[:2] - target_coordinate) + if self.dist_target_virtual > dist_target_virtual: + self.dist_target_virtual = dist_target_virtual + if -0.07 < env.target_ball_pos[0] < 0.07 and env.sim.data.get_joint_qvel('tar:x') < 0: + if self.ball_z_pos_lowest > env.target_ball_pos[2]: + self.ball_z_pos_lowest = env.target_ball_pos[2].copy() + # if not self.net_contact_detector: + # self.net_contact_detector = self.contact_detection(env, net_contact_obj) + if not self.table_contact_detector: + self.table_contact_detector = self.contact_detection(env, table_contact_obj) + if not self.floor_contact_detector: + self.floor_contact_detector = self.contact_detection(env, floor_contact_obj) + if not self.target_flag: + # Table Contact Reward. + if self.table_contact_detector: + self.total_reward += 1 + # only update when the first contact because of the flag + contact_coordinate = env.target_ball_pos[:2].copy() + print("contact table ball coordinate: ", env.target_ball_pos) + logging.info("contact table ball coordinate: {}".format(env.target_ball_pos)) + dist_target = np.linalg.norm(contact_coordinate - target_coordinate) + self.total_reward += (1 - np.tanh(dist_target)) * 2 + self.target_flag = True + # Net Contact Reward. Precondition: Table Contact exits. + if contact_coordinate[0] < 0: + print("left table contact") + logging.info("~~~~~~~~~~~~~~~left table contact~~~~~~~~~~~~~~~") + self.total_reward += 1 + # TODO Z coordinate reward + # self.total_reward += np.maximum(np.tanh(self.ball_z_pos_lowest), 0) + self.goal_achievement = True + else: + print("right table contact") + logging.info("~~~~~~~~~~~~~~~right table contact~~~~~~~~~~~~~~~") + self.total_reward += 0 + self.goal_achievement = False + # if self.net_contact_detector: + # self.total_reward += 0 + # self.goal_achievement = False + # else: + # self.total_reward += 1 + # self.goal_achievement = False + # Floor Contact Reward. Precondition: Table Contact exits. + elif self.floor_contact_detector: + self.total_reward += (1 - np.tanh(self.dist_target_virtual)) + self.target_flag = True + self.goal_achievement = False + # No Contact of Floor or Table, flying + else: + pass + # else: + # print("Flag is True already") + self.refresh_highest_reward() + + def distance_to_target(self): + pass + + @classmethod + def contact_detection(cls, env, goal_contact): + for i in range(env.sim.data.ncon): + contact = env.sim.data.contact[i] + achieved_geom1_name = env.sim.model.geom_id2name(contact.geom1) + achieved_geom2_name = env.sim.model.geom_id2name(contact.geom2) + if np.all([(achieved_geom1_name in goal_contact), (achieved_geom2_name in goal_contact)]): + print("contact of " + achieved_geom1_name + " " + achieved_geom2_name) + return True + else: + return False diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py b/alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py new file mode 100644 index 0000000..6e6aa32 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py @@ -0,0 +1,136 @@ +# Copyright 2017 The dm_control Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# """Soft indicator function evaluating whether a number is within bounds.""" +# +# from __future__ import absolute_import +# from __future__ import division +# from __future__ import print_function + +# Internal dependencies. +import numpy as np + +# The value returned by tolerance() at `margin` distance from `bounds` interval. +_DEFAULT_VALUE_AT_MARGIN = 0.1 + + +def _sigmoids(x, value_at_1, sigmoid): + """Returns 1 when `x` == 0, between 0 and 1 otherwise. + + Args: + x: A scalar or numpy array. + value_at_1: A float between 0 and 1 specifying the output when `x` == 1. + sigmoid: String, choice of sigmoid type. + + Returns: + A numpy array with values between 0.0 and 1.0. + + Raises: + ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and + `quadratic` sigmoids which allow `value_at_1` == 0. + ValueError: If `sigmoid` is of an unknown type. + """ + if sigmoid in ('cosine', 'linear', 'quadratic'): + if not 0 <= value_at_1 < 1: + raise ValueError('`value_at_1` must be nonnegative and smaller than 1, ' + 'got {}.'.format(value_at_1)) + else: + if not 0 < value_at_1 < 1: + raise ValueError('`value_at_1` must be strictly between 0 and 1, ' + 'got {}.'.format(value_at_1)) + + if sigmoid == 'gaussian': + scale = np.sqrt(-2 * np.log(value_at_1)) + return np.exp(-0.5 * (x*scale)**2) + + elif sigmoid == 'hyperbolic': + scale = np.arccosh(1/value_at_1) + return 1 / np.cosh(x*scale) + + elif sigmoid == 'long_tail': + scale = np.sqrt(1/value_at_1 - 1) + return 1 / ((x*scale)**2 + 1) + + elif sigmoid == 'cosine': + scale = np.arccos(2*value_at_1 - 1) / np.pi + scaled_x = x*scale + return np.where(abs(scaled_x) < 1, (1 + np.cos(np.pi*scaled_x))/2, 0.0) + + elif sigmoid == 'linear': + scale = 1-value_at_1 + scaled_x = x*scale + return np.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0) + + elif sigmoid == 'quadratic': + scale = np.sqrt(1-value_at_1) + scaled_x = x*scale + return np.where(abs(scaled_x) < 1, 1 - scaled_x**2, 0.0) + + elif sigmoid == 'tanh_squared': + scale = np.arctanh(np.sqrt(1-value_at_1)) + return 1 - np.tanh(x*scale)**2 + + else: + raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid)) + + +def tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian', + value_at_margin=_DEFAULT_VALUE_AT_MARGIN): + """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise. + + Args: + x: A scalar or numpy array. + bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for + the target interval. These can be infinite if the interval is unbounded + at one or both ends, or they can be equal to one another if the target + value is exact. + margin: Float. Parameter that controls how steeply the output decreases as + `x` moves out-of-bounds. + * If `margin == 0` then the output will be 0 for all values of `x` + outside of `bounds`. + * If `margin > 0` then the output will decrease sigmoidally with + increasing distance from the nearest bound. + sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian', + 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'. + value_at_margin: A float between 0 and 1 specifying the output value when + the distance from `x` to the nearest bound is equal to `margin`. Ignored + if `margin == 0`. + + Returns: + A float or numpy array with values between 0.0 and 1.0. + + Raises: + ValueError: If `bounds[0] > bounds[1]`. + ValueError: If `margin` is negative. + """ + lower, upper = bounds + if lower > upper: + raise ValueError('Lower bound must be <= upper bound.') + if margin < 0: + raise ValueError('`margin` must be non-negative.') + + in_bounds = np.logical_and(lower <= x, x <= upper) + if margin == 0: + value = np.where(in_bounds, 1.0, 0.0) + else: + d = np.where(x < lower, lower - x, x - upper) / margin + value = np.where(in_bounds, 1.0, _sigmoids(d, value_at_margin, sigmoid)) + + return float(value) if np.isscalar(x) else value + + + + + diff --git a/alr_envs/mujoco/gym_table_tennis/utils/util.py b/alr_envs/mujoco/gym_table_tennis/utils/util.py new file mode 100644 index 0000000..716b3c6 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/utils/util.py @@ -0,0 +1,49 @@ +import json +import yaml +import xml.etree.ElementTree as ET +from collections import OrderedDict +from pathlib import Path + + +def read_json(fname): + fname = Path(fname) + with fname.open('rt') as handle: + return json.load(handle, object_hook=OrderedDict) + + +def write_json(content, fname): + fname = Path(fname) + with fname.open('wt') as handle: + json.dump(content, handle, indent=4, sort_keys=False) + + +def read_yaml(fname): + fname = Path(fname) + with fname.open('rt') as handle: + return yaml.load(handle, Loader=yaml.FullLoader) + + +def write_yaml(content, fname): + fname = Path(fname) + with fname.open('wt') as handle: + yaml.dump(content, handle) + + +def config_save(dir_path, config): + dir_path = Path(dir_path) + config_path_json = dir_path / "config.json" + config_path_yaml = dir_path / "config.yml" + # .json and .yml file,save 2 version of configuration. + write_json(config, config_path_json) + write_yaml(config, config_path_yaml) + + +def change_kp_in_xml(kp_list, + model_path="/home/zhou/slow/table_tennis_rl/simulation/gymTableTennis/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml"): + tree = ET.parse(model_path) + root = tree.getroot() + # for actuator in root.find("actuator"): + for position, kp in zip(root.iter('position'), kp_list): + position.set("kp", str(kp)) + tree.write(model_path) + diff --git a/example.py b/example.py index cc96611..fd11c73 100644 --- a/example.py +++ b/example.py @@ -4,8 +4,8 @@ import gym if __name__ == '__main__': - # env = gym.make('alr_envs:ALRReacher-v0') - env = gym.make('alr_envs:SimpleReacher-v0') + env = gym.make('alr_envs:ALRReacher-v0') + # env = gym.make('alr_envs:SimpleReacher-v0') # env = gym.make('alr_envs:ALRReacher7-v0') state = env.reset()