diff --git a/alr_envs/mujoco/__init__.py b/alr_envs/mujoco/__init__.py index 77588f7..e0ecd85 100644 --- a/alr_envs/mujoco/__init__.py +++ b/alr_envs/mujoco/__init__.py @@ -1 +1 @@ -from alr_envs.mujoco.alr_reacher import ALRReacherEnv \ No newline at end of file +from alr_envs.mujoco.reacher.alr_reacher import ALRReacherEnv \ No newline at end of file diff --git a/alr_envs/mujoco/ball_in_a_cup/__init__.py b/alr_envs/mujoco/ball_in_a_cup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/ball-in-a-cup_base.xml b/alr_envs/mujoco/ball_in_a_cup/assets/ball-in-a-cup_base.xml new file mode 100644 index 0000000..b534205 --- /dev/null +++ b/alr_envs/mujoco/ball_in_a_cup/assets/ball-in-a-cup_base.xml @@ -0,0 +1,366 @@ + + + diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/base_link_convex.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_convex.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/base_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/base_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/base_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/base_link_fine.stl diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup.stl new file mode 100644 index 0000000..bc34058 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split1.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split1.stl new file mode 100644 index 0000000..c80aa61 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split1.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split10.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split10.stl new file mode 100644 index 0000000..bd5708b Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split10.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split11.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split11.stl new file mode 100644 index 0000000..ac81da2 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split11.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split12.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split12.stl new file mode 100644 index 0000000..a18e96e Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split12.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split13.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split13.stl new file mode 100644 index 0000000..f0e5832 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split13.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split14.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split14.stl new file mode 100644 index 0000000..41a3e94 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split14.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split15.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split15.stl new file mode 100644 index 0000000..7a26643 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split15.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split16.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split16.stl new file mode 100644 index 0000000..155b24e Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split16.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split17.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split17.stl new file mode 100644 index 0000000..2fe8d95 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split17.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split18.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split18.stl new file mode 100644 index 0000000..f5287b2 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split18.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split2.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split2.stl new file mode 100644 index 0000000..5c1e50c Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split2.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split3.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split3.stl new file mode 100644 index 0000000..ef6d547 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split3.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split4.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split4.stl new file mode 100644 index 0000000..5476296 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split4.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split5.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split5.stl new file mode 100644 index 0000000..ccfcd42 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split5.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split6.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split6.stl new file mode 100644 index 0000000..72d6287 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split6.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split7.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split7.stl new file mode 100644 index 0000000..d4918f2 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split7.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split8.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split8.stl new file mode 100644 index 0000000..8a0cd84 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split8.stl differ diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split9.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split9.stl new file mode 100644 index 0000000..4281a69 Binary files /dev/null and b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/cup_split9.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/elbow_link_convex.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_convex.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/elbow_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/elbow_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/elbow_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/elbow_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/forearm_link_convex_decomposition_p1.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p1.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/forearm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/forearm_link_convex_decomposition_p2.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_convex_decomposition_p2.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/forearm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/forearm_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/forearm_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/forearm_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_convex_decomposition_p1.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p1.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_convex_decomposition_p2.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p2.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_convex_decomposition_p3.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_convex_decomposition_p3.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_pitch_link_convex.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_convex.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_pitch_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_pitch_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/shoulder_pitch_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/shoulder_pitch_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/upper_arm_link_convex_decomposition_p1.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p1.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/upper_arm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/upper_arm_link_convex_decomposition_p2.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_convex_decomposition_p2.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/upper_arm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/upper_arm_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/upper_arm_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/upper_arm_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_palm_link_convex.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_convex.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_palm_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_palm_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_palm_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_palm_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p1.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p2.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_convex_decomposition_p3.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_pitch_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_pitch_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p1.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_convex_decomposition_p2.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl b/alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_yaw_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/wrist_yaw_link_fine.stl rename to alr_envs/mujoco/ball_in_a_cup/assets/meshes/wrist_yaw_link_fine.stl diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py new file mode 100644 index 0000000..093e7f1 --- /dev/null +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py @@ -0,0 +1,117 @@ +from gym.envs.mujoco import mujoco_env +from gym import utils +import os +import numpy as np +from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward +import mujoco_py + + +class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self, ): + self._steps = 0 + + self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", + "ball-in-a-cup_base" + ".xml") + + self.sim_time = 8 # seconds + self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct + self.reward_function = BallInACupReward(self.sim_steps) + + self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) + self._q_pos = [] + + utils.EzPickle.__init__(self) + mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"), + frame_skip=4) + + def reset_model(self): + start_pos = self.init_qpos.copy() + start_pos[0:7] = self.start_pos + start_vel = np.zeros_like(start_pos) + self.set_state(start_pos, start_vel) + self._steps = 0 + self.reward_function.reset() + self._q_pos = [] + + def do_simulation(self, ctrl, n_frames): + self.sim.data.ctrl[:] = ctrl + for _ in range(n_frames): + try: + self.sim.step() + except mujoco_py.builder.MujocoException as e: + # print("Error in simulation: " + str(e)) + # error = True + # Copy the current torque as if it would have been applied until the end of the trajectory + # for i in range(k + 1, sim_time): + # torques.append(trq) + return True + + return False + + def step(self, a): + # Apply gravity compensation + if not np.all(self.sim.data.qfrc_applied[:7] == self.sim.data.qfrc_bias[:7]): + self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] + + reward_dist = 0.0 + angular_vel = 0.0 + # if self._steps >= self.steps_before_reward: + # vec = self.get_body_com("fingertip") - self.get_body_com("target") + # reward_dist -= self.reward_weight * np.linalg.norm(vec) + # angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) + reward_ctrl = - np.square(a).sum() + # reward_balance = - self.balance_weight * np.abs( + # angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) + # + # reward = reward_dist + reward_ctrl + angular_vel + reward_balance + # self.do_simulation(a, self.frame_skip) + + crash = self.do_simulation(a, self.frame_skip) + + self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) + + ob = self._get_obs() + + if not crash: + reward, success, collision = self.reward_function.compute_reward(a, self.sim, self._steps) + done = success or self._steps == self.sim_steps - 1 or collision + self._steps += 1 + else: + reward = -1000 + done = True + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, + velocity=angular_vel, # reward_balance=reward_balance, + # end_effector=self.get_body_com("fingertip").copy(), + goal=self.goal if hasattr(self, "goal") else None, + traj=self._q_pos) + + def _get_obs(self): + theta = self.sim.data.qpos.flat[:7] + return np.concatenate([ + np.cos(theta), + np.sin(theta), + # self.get_body_com("target"), # only return target to make problem harder + [self._steps], + ]) + + + +if __name__ == "__main__": + env = ALRBallInACupEnv() + env.reset() + for i in range(2000): + # objective.load_result("/tmp/cma") + # test with random actions + # ac = 0.0 * env.action_space.sample() + ac = env.start_pos + # ac[0] += np.pi/2 + obs, rew, d, info = env.step(ac) + env.render() + + print(rew) + + if d: + break + + env.close() + diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py new file mode 100644 index 0000000..69932db --- /dev/null +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py @@ -0,0 +1,127 @@ +import numpy as np + + +class BallInACupReward: + def __init__(self, sim_time): + self.sim_time = sim_time + + self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", + "wrist_pitch_link_convex_decomposition_p1_geom", + "wrist_pitch_link_convex_decomposition_p2_geom", + "wrist_pitch_link_convex_decomposition_p3_geom", + "wrist_yaw_link_convex_decomposition_p1_geom", + "wrist_yaw_link_convex_decomposition_p2_geom", + "forearm_link_convex_decomposition_p1_geom", + "forearm_link_convex_decomposition_p2_geom"] + + self.ctxt_id = None + self.ball_id = None + self.ball_collision_id = None + self.goal_id = None + self.goal_final_id = None + self.collision_ids = None + + self.ball_traj = None + self.dists = None + self.dists_final = None + self.costs = None + + self.reset() + + def reset(self): + self.ball_traj = np.zeros(shape=(self.sim_time, 3)) + self.dists = [] + self.dists_final = [] + self.costs = [] + + def compute_reward(self, action, sim, step): + self.ctxt_id = sim.model._site_name2id['context_point'] + self.ball_id = sim.model._body_name2id["ball"] + self.ball_collision_id = sim.model._geom_name2id["ball_geom"] + self.goal_id = sim.model._site_name2id["cup_goal"] + self.goal_final_id = sim.model._site_name2id["cup_goal_final"] + self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects] + + ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id) + + # Compute the current distance from the ball to the inner part of the cup + goal_pos = sim.data.site_xpos[self.goal_id] + ball_pos = sim.data.body_xpos[self.ball_id] + goal_final_pos = sim.data.site_xpos[self.goal_final_id] + self.dists.append(np.linalg.norm(goal_pos - ball_pos)) + self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) + # dists_ctxt.append(np.linalg.norm(ball_pos - ctxt)) + self.ball_traj[step, :] = ball_pos + + if self.check_collision(sim): + return -1000, False, True + + # self._get_cost(ball_pos, goal_pos, goal_final_pos, action, + # sim.data.get_site_xpos('context_point').copy(), step) + + # min_dist = np.min(self.dists) + # dist_final = self.dists_final[-1] + action_cost = np.sum(np.square(action)) + + # cost = self.get_stage_wise_cost(ball_in_cup, min_dist, self.dists_final[-1]) # , self.dists_ctxt[-1]) + if step == self.sim_time - 1: + min_dist = np.min(self.dists) + dist_final = self.dists_final[-1] + + cost = 0.5 * min_dist + 0.5 * dist_final + # cost = 3 + 2 * (0.5 * min_dist ** 2 + 0.5 * dist_final ** 2) + reward = np.exp(-2 * min_dist) - 1e-5 * action_cost + success = dist_final < 0.05 and min_dist < 0.05 + else: + cost = 0 + reward = - 1e-5 * action_cost + success = False + # action_cost = np.mean(np.sum(np.square(torques), axis=1), axis=0) + + return reward, success, False + + def get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final): #, dist_to_ctxt): + # stop_sim = False + cost = 3 + 2 * (0.5 * min_dist ** 2 + 0.5 * dist_final ** 2) + # if not ball_in_cup: + # # cost = 3 + 2*(0.5 * min_dist + 0.5 * dist_final) + # cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2) + # else: + # # cost = 2*dist_to_ctxt + # cost = 2*dist_to_ctxt**2 + # stop_sim = True + # # print(dist_to_ctxt-0.02) + # print('Context Distance:', dist_to_ctxt) + return cost + + def _get_cost(self, ball_pos, goal_pos, goal_pos_final, u, ctxt, t): + + cost = 0 + if t == self.sim_time*0.8: + dist = 0.5*np.linalg.norm(goal_pos-ball_pos)**2 + 0.5*np.linalg.norm(goal_pos_final-ball_pos)**2 + # dist_ctxt = np.linalg.norm(ctxt-goal_pos)**2 + cost = dist # +dist_ctxt + return cost + + def check_ball_in_cup(self, sim, ball_collision_id): + cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"] + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id + collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id + + if collision or collision_trans: + return True + return False + + def check_collision(self, sim): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id + collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids + + if collision or collision_trans: + return True + return False diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py new file mode 100644 index 0000000..9161f17 --- /dev/null +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py @@ -0,0 +1,210 @@ +from gym.envs.mujoco import mujoco_env +from gym import utils, spaces +import os +import numpy as np +from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward +import mujoco_py + + +class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self, pd_control=True): + self._steps = 0 + self.pd_control = pd_control + + self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", + "ball-in-a-cup_base" + ".xml") + + self.sim_time = 8 # seconds + self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct + self.reward_function = BallInACupReward(self.sim_steps) + + self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) + self.start_vel = np.zeros(7) + # self.start_pos = np.array([0.58760536, 1.36004913, -0.32072943]) + self._q_pos = [] + self._q_vel = [] + # self.weight_matrix_scale = 50 + self.p_gains = 1*np.array([200, 300, 100, 100, 10, 10, 2.5]) + self.d_gains = 1*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]) + + self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) + self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) + + utils.EzPickle.__init__(self) + mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"), + frame_skip=4) + + @property + def current_pos(self): + return self.sim.data.qpos[0:7].copy() + + @property + def current_vel(self): + return self.sim.data.qvel[0:7].copy() + + def reset_model(self): + init_pos_all = self.init_qpos.copy() + init_pos_robot = self.start_pos + init_vel = np.zeros_like(init_pos_all) + ball_id = self.sim.model._body_name2id["ball"] + goal_final_id = self.sim.model._site_name2id["cup_goal_final"] + # self.set_state(start_pos, start_vel) + self._steps = 0 + self.reward_function.reset() + self._q_pos = [] + self._q_vel = [] + + # Reset the system + self.sim.data.qpos[:] = init_pos_all + self.sim.data.qvel[:] = init_vel + self.sim.data.qpos[0:7] = init_pos_robot + + self.sim.step() + + self.sim.data.qpos[:] = init_pos_all + self.sim.data.qvel[:] = init_vel + self.sim.data.qpos[0:7] = init_pos_robot + self.sim.data.body_xpos[ball_id, :] = np.copy(self.sim.data.site_xpos[goal_final_id, :]) - np.array([0., 0., 0.329]) + + # Stabilize the system around the initial position + for i in range(0, 2000): + self.sim.data.qpos[7:] = 0. + self.sim.data.qvel[7:] = 0. + # self.sim.data.qpos[7] = -0.2 + cur_pos = self.sim.data.qpos[0:7].copy() + cur_vel = self.sim.data.qvel[0:7].copy() + trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel) + self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy() + self.sim.step() + # self.render() + + for i in range(0, 2000): + cur_pos = self.sim.data.qpos[0:7].copy() + cur_vel = self.sim.data.qvel[0:7].copy() + trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel) + self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy() + self.sim.step() + # self.render() + + def do_simulation(self, ctrl, n_frames): + # cur_pos = self.sim.data.qpos[0:7].copy() + # cur_vel = self.sim.data.qvel[0:7].copy() + # des_pos = ctrl[:7] + # des_vel = ctrl[7:] + # trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel) + if self.pd_control: + self.sim.data.qfrc_applied[0:7] = ctrl + self.sim.data.qfrc_bias[:7].copy() + else: + self.sim.data.ctrl[:] = ctrl + + for _ in range(n_frames): + try: + self.sim.step() + except mujoco_py.builder.MujocoException as e: + print("Error in simulation: " + str(e)) + # error = True + # Copy the current torque as if it would have been applied until the end of the trajectory + # for i in range(k + 1, sim_time): + # torques.append(trq) + return True + + return False + + def step(self, a): + # Apply gravity compensation + # if not np.all(self.sim.data.qfrc_applied[:7] == self.sim.data.qfrc_bias[:7]): + # self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] + + reward_dist = 0.0 + angular_vel = 0.0 + # if self._steps >= self.steps_before_reward: + # vec = self.get_body_com("fingertip") - self.get_body_com("target") + # reward_dist -= self.reward_weight * np.linalg.norm(vec) + # angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) + reward_ctrl = - np.square(a).sum() + # reward_balance = - self.balance_weight * np.abs( + # angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) + # + # reward = reward_dist + reward_ctrl + angular_vel + reward_balance + # self.do_simulation(a, self.frame_skip) + + joint_cons_viol = self.check_traj_in_joint_limits() + + crash = self.do_simulation(a, self.frame_skip) + + self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) + self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy()) + + ob = self._get_obs() + + if not crash and not joint_cons_viol: + reward, success, collision = self.reward_function.compute_reward(a, self.sim, self._steps) + done = success or self._steps == self.sim_steps - 1 or collision + self._steps += 1 + else: + reward = -1000 + done = True + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, + velocity=angular_vel, # reward_balance=reward_balance, + # end_effector=self.get_body_com("fingertip").copy(), + goal=self.goal if hasattr(self, "goal") else None, + traj=self._q_pos) + + def check_traj_in_joint_limits(self): + return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) + + def check_collision(self): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id + collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids + + if collision or collision_trans: + return True + return False + + def _get_obs(self): + theta = self.sim.data.qpos.flat[:7] + return np.concatenate([ + np.cos(theta), + np.sin(theta), + # self.get_body_com("target"), # only return target to make problem harder + [self._steps], + ]) + + def extend_des_pos(self, des_pos): + des_pos_full = self.start_pos.copy() + des_pos_full[1] = des_pos[0] + des_pos_full[3] = des_pos[1] + des_pos_full[5] = des_pos[2] + return des_pos_full + + def extend_des_vel(self, des_vel): + des_vel_full = self.start_vel.copy() + des_vel_full[1] = des_vel[0] + des_vel_full[3] = des_vel[1] + des_vel_full[5] = des_vel[2] + return des_vel_full + +if __name__ == "__main__": + env = ALRBallInACupEnv() + env.reset() + env.render() + for i in range(4000): + # objective.load_result("/tmp/cma") + # test with random actions + # ac = 0.1 * env.action_space.sample() + # ac = -np.array([i, i, i]) / 10000 + np.array([env.start_pos[1], env.start_pos[3], env.start_pos[5]]) + ac = np.array([0., -0.1, 0, 0, 0, 0, 0]) + # ac[0] += np.pi/2 + obs, rew, d, info = env.step(ac) + env.render() + + print(rew) + + if d: + break + + env.close() + diff --git a/alr_envs/mujoco/gym_table_tennis/envs/MUJOCO_LOG.TXT b/alr_envs/mujoco/gym_table_tennis/envs/MUJOCO_LOG.TXT new file mode 100644 index 0000000..91c2162 --- /dev/null +++ b/alr_envs/mujoco/gym_table_tennis/envs/MUJOCO_LOG.TXT @@ -0,0 +1,3 @@ +Mon Jan 25 15:45:30 2021 +ERROR: GLEW initalization error: Missing GL version + diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_7_motor_actuator.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_left.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_barrett_wam_7dof_right.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/include_table.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_table.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/include_table.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_target_ball.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/include_test_balls.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl new file mode 100644 index 0000000..133b112 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl new file mode 100644 index 0000000..047e9df Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_convex.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_convex.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_dist_link_fine.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_convex.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_med_link_fine.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_finger_prox_link_fine.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_fine.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p1.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p2.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p3.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/meshes/bhand_palm_link_convex_decomposition_p4.stl rename to alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl new file mode 100644 index 0000000..b34963d Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_fine.stl new file mode 100644 index 0000000..f6a1515 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..e6aa6b6 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..667902e Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl new file mode 100644 index 0000000..ed66bbb Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..aba957d Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..5cca6a9 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..3343e27 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl new file mode 100644 index 0000000..ae505fd Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl new file mode 100644 index 0000000..c36cfec Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl new file mode 100644 index 0000000..dc633c4 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..82d0093 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..7fd5a55 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl new file mode 100644 index 0000000..76353ae Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl new file mode 100644 index 0000000..a0386f6 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_fine.stl new file mode 100644 index 0000000..f6b41ad Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c36f88f Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..d00cac1 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..34d1d8b Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl new file mode 100644 index 0000000..13d2f73 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..06e857f Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..48e1bb1 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl new file mode 100644 index 0000000..0d95239 Binary files /dev/null and b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/shared.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/shared.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/shared.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml b/alr_envs/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/robotics/assets/table_tennis/table_tennis_env.xml rename to alr_envs/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py b/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py index 625a122..f155f06 100644 --- a/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py +++ b/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py @@ -3,7 +3,7 @@ from gym import spaces from gym.envs.robotics import robot_env, utils # import xml.etree.ElementTree as ET from alr_envs.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis -# import glfw +import glfw from alr_envs.mujoco.gym_table_tennis.utils.experiment import ball_initialize from pathlib import Path import os @@ -34,18 +34,26 @@ class TableTennisEnv(robot_env.RobotEnv): path_cws = Path.cwd() print(path_cws) current_dir = Path(os.path.split(os.path.realpath(__file__))[0]) - table_tennis_env_xml_path = current_dir / "robotics"/"assets"/"table_tennis"/"table_tennis_env.xml" + table_tennis_env_xml_path = current_dir / "assets"/"table_tennis_env.xml" model_path = str(table_tennis_env_xml_path) self.config = config - action_space = self.config['trajectory']['args']['action_space'] - time_step = self.config['mujoco_sim_env']['args']["time_step"] + action_space = True # self.config['trajectory']['args']['action_space'] + time_step = 0.002 # self.config['mujoco_sim_env']['args']["time_step"] if initial_qpos is None: - initial_qpos = self.config['robot_config']['args']['initial_qpos'] + initial_qpos = {"wam/base_yaw_joint_right": 1.5, + "wam/shoulder_pitch_joint_right": 1, + "wam/shoulder_yaw_joint_right": 0, + "wam/elbow_pitch_joint_right": 1, + "wam/wrist_yaw_joint_right": 1, + "wam/wrist_pitch_joint_right": 0, + "wam/palm_yaw_joint_right": 0} + # initial_qpos = [1.5, 1, 0, 1, 1, 0, 0] # self.config['robot_config']['args']['initial_qpos'] # TODO should read all configuration in config assert initial_qpos is not None, "Must initialize the initial q position of robot arm" n_actions = 7 self.initial_qpos_value = np.array(list(initial_qpos.values())).copy() + # self.initial_qpos_value = np.array(initial_qpos) # # change time step in .xml file # tree = ET.parse(model_path) # root = tree.getroot() @@ -71,6 +79,7 @@ class TableTennisEnv(robot_env.RobotEnv): self.n_actions = n_actions self.action = None self.time_step = time_step + self._dt = time_step self.paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center') if reward_obj is None: self.reward_obj = HierarchicalRewardTableTennis() @@ -104,7 +113,8 @@ class TableTennisEnv(robot_env.RobotEnv): self.reward_obj.hitting(self) # if not hitted, return the highest reward if not self.reward_obj.goal_achievement: - return self.reward_obj.highest_reward + # return self.reward_obj.highest_reward + return self.reward_obj.total_reward # # Stage 2 Right Table Contact # self.reward_obj.right_table_contact(self) # if not self.reward_obj.goal_achievement: @@ -119,7 +129,8 @@ class TableTennisEnv(robot_env.RobotEnv): # print("self.reward_obj.highest_reward: ", self.reward_obj.highest_reward) # TODO self.reward_obj.target_achievement(self) - return self.reward_obj.highest_reward + # return self.reward_obj.highest_reward + return self.reward_obj.total_reward def _reset_sim(self): self.sim.set_state(self.initial_state) @@ -217,10 +228,10 @@ if __name__ == '__main__': env.reset() # env.render(mode=render_mode) - for i in range(200): + for i in range(500): # objective.load_result("/tmp/cma") # test with random actions - ac = 2 * env.action_space.sample() + ac = env.action_space.sample() # ac[0] += np.pi/2 obs, rew, d, info = env.step(ac) env.render(mode=render_mode) diff --git a/alr_envs/mujoco/reacher/__init__.py b/alr_envs/mujoco/reacher/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/mujoco/alr_reacher.py b/alr_envs/mujoco/reacher/alr_reacher.py similarity index 80% rename from alr_envs/mujoco/alr_reacher.py rename to alr_envs/mujoco/reacher/alr_reacher.py index 7ae28da..85adf58 100644 --- a/alr_envs/mujoco/alr_reacher.py +++ b/alr_envs/mujoco/reacher/alr_reacher.py @@ -29,9 +29,20 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): else: raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.") + self._q_pos = [] + self._q_vel = [] + utils.EzPickle.__init__(self) mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2) + @property + def current_pos(self): + return self.sim.data.qpos[0:5].copy() + + @property + def current_vel(self): + return self.sim.data.qvel[0:5].copy() + def step(self, a): self._steps += 1 @@ -47,28 +58,36 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): reward = reward_dist + reward_ctrl + angular_vel + reward_balance self.do_simulation(a, self.frame_skip) + + self._q_pos.append(self.sim.data.qpos[0:5].ravel().copy()) + self._q_vel.append(self.sim.data.qvel[0:5].ravel().copy()) + ob = self._get_obs() done = False return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, velocity=angular_vel, reward_balance=reward_balance, end_effector=self.get_body_com("fingertip").copy(), - goal=self.goal if hasattr(self, "goal") else None) + goal=self.goal if hasattr(self, "goal") else None, + traj=self._q_pos, vel=self._q_vel) def viewer_setup(self): self.viewer.cam.trackbodyid = 0 def reset_model(self): - qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos + qpos = self.init_qpos # self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos while True: self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2) if np.linalg.norm(self.goal) < self.n_links / 10: break qpos[-2:] = self.goal - qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) + qvel = self.init_qvel # + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) qvel[-2:] = 0 self.set_state(qpos, qvel) self._steps = 0 + self._q_pos = [] + self._q_vel = [] + return self._get_obs() def _get_obs(self): diff --git a/alr_envs/mujoco/assets/reacher_5links.xml b/alr_envs/mujoco/reacher/assets/reacher_5links.xml similarity index 100% rename from alr_envs/mujoco/assets/reacher_5links.xml rename to alr_envs/mujoco/reacher/assets/reacher_5links.xml diff --git a/alr_envs/mujoco/assets/reacher_7links.xml b/alr_envs/mujoco/reacher/assets/reacher_7links.xml similarity index 100% rename from alr_envs/mujoco/assets/reacher_7links.xml rename to alr_envs/mujoco/reacher/assets/reacher_7links.xml diff --git a/alr_envs/utils/detpmp_env_wrapper.py b/alr_envs/utils/detpmp_env_wrapper.py new file mode 100644 index 0000000..64986a5 --- /dev/null +++ b/alr_envs/utils/detpmp_env_wrapper.py @@ -0,0 +1,88 @@ +from mp_lib import det_promp +import numpy as np +import gym + + +class DetPMPEnvWrapperBase(gym.Wrapper): + def __init__(self, + env, + num_dof, + num_basis, + width, + start_pos=None, + duration=1, + dt=0.01, + post_traj_time=0., + policy=None, + weights_scale=1): + super(DetPMPEnvWrapperBase, self).__init__(env) + self.num_dof = num_dof + self.num_basis = num_basis + self.dim = num_dof * num_basis + self.pmp = det_promp.DeterministicProMP(n_basis=num_basis, width=width, off=0.01) + weights = np.zeros(shape=(num_basis, num_dof)) + self.pmp.set_weights(duration, weights) + self.weights_scale = weights_scale + + self.duration = duration + self.dt = dt + self.post_traj_steps = int(post_traj_time / dt) + + self.start_pos = start_pos + + self.policy = policy + + def __call__(self, params): + params = np.atleast_2d(params) + observations = [] + rewards = [] + dones = [] + infos = [] + for p in params: + observation, reward, done, info = self.rollout(p) + observations.append(observation) + rewards.append(reward) + dones.append(done) + infos.append(info) + + return np.array(rewards), infos + + def rollout(self, params, render=False): + """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step""" + raise NotImplementedError + + +class DetPMPEnvWrapperPD(DetPMPEnvWrapperBase): + """ + Wrapper for gym environments which creates a trajectory in joint velocity space + """ + def rollout(self, params, render=False): + params = np.reshape(params, newshape=(self.num_basis, self.num_dof)) * self.weights_scale + self.pmp.set_weights(self.duration, params) + t, des_pos, des_vel, des_acc = self.pmp.compute_trajectory(1/self.dt, 1.) + des_pos += self.start_pos[None, :] + + if self.post_traj_steps > 0: + des_pos = np.vstack([des_pos, np.tile(des_pos[-1, :], [self.post_traj_steps, 1])]) + des_vel = np.vstack([des_vel, np.zeros(shape=(self.post_traj_steps, self.num_dof))]) + + self._trajectory = des_pos + + rews = [] + infos = [] + + self.env.reset() + + for t, pos_vel in enumerate(zip(des_pos, des_vel)): + ac = self.policy.get_action(self.env, pos_vel[0], pos_vel[1]) + obs, rew, done, info = self.env.step(ac) + rews.append(rew) + infos.append(info) + if render: + self.env.render(mode="human") + if done: + break + + reward = np.sum(rews) + + return obs, reward, done, info diff --git a/alr_envs/utils/dmp_env_wrapper.py b/alr_envs/utils/dmp_env_wrapper.py index 9b1bdf1..823df83 100644 --- a/alr_envs/utils/dmp_env_wrapper.py +++ b/alr_envs/utils/dmp_env_wrapper.py @@ -6,19 +6,31 @@ import gym class DmpEnvWrapperBase(gym.Wrapper): - def __init__(self, env, num_dof, num_basis, duration=1, dt=0.01, learn_goal=False): + def __init__(self, + env, + num_dof, + num_basis, + start_pos=None, + final_pos=None, + duration=1, + alpha_phase=2, + dt=0.01, + learn_goal=False, + post_traj_time=0., + policy=None): super(DmpEnvWrapperBase, self).__init__(env) self.num_dof = num_dof self.num_basis = num_basis self.dim = num_dof * num_basis if learn_goal: self.dim += num_dof - self.learn_goal = True + self.learn_goal = learn_goal self.duration = duration # seconds time_steps = int(duration / dt) self.t = np.linspace(0, duration, time_steps) + self.post_traj_steps = int(post_traj_time / dt) - phase_generator = ExpDecayPhaseGenerator(alpha_phase=5, duration=duration) + phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration) basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=self.num_basis) self.dmp = dmps.DMP(num_dof=num_dof, @@ -28,13 +40,18 @@ class DmpEnvWrapperBase(gym.Wrapper): dt=dt ) - self.dmp.dmp_start_pos = env.start_pos.reshape((1, num_dof)) + self.dmp.dmp_start_pos = start_pos.reshape((1, num_dof)) dmp_weights = np.zeros((num_basis, num_dof)) - dmp_goal_pos = np.zeros(num_dof) + if learn_goal: + dmp_goal_pos = np.zeros(num_dof) + else: + dmp_goal_pos = final_pos self.dmp.set_weights(dmp_weights, dmp_goal_pos) + self.policy = policy + def __call__(self, params): params = np.atleast_2d(params) observations = [] @@ -48,7 +65,7 @@ class DmpEnvWrapperBase(gym.Wrapper): dones.append(done) infos.append(info) - return np.array(rewards) + return np.array(rewards), infos def goal_and_weights(self, params): if len(params.shape) > 1: @@ -71,7 +88,7 @@ class DmpEnvWrapperBase(gym.Wrapper): raise NotImplementedError -class DmpEnvWrapperAngle(DmpEnvWrapperBase): +class DmpEnvWrapperPos(DmpEnvWrapperBase): """ Wrapper for gym environments which creates a trajectory in joint angle space """ @@ -80,7 +97,12 @@ class DmpEnvWrapperAngle(DmpEnvWrapperBase): if hasattr(self.env, "weight_matrix_scale"): weight_matrix = weight_matrix * self.env.weight_matrix_scale self.dmp.set_weights(weight_matrix, goal_pos) - trajectory, velocities = self.dmp.reference_trajectory(self.t) + trajectory, _ = self.dmp.reference_trajectory(self.t) + + if self.post_traj_steps > 0: + trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])]) + + self._trajectory = trajectory rews = [] @@ -95,8 +117,6 @@ class DmpEnvWrapperAngle(DmpEnvWrapperBase): break reward = np.sum(rews) - # done = True - info = {} return obs, reward, done, info @@ -110,7 +130,7 @@ class DmpEnvWrapperVel(DmpEnvWrapperBase): if hasattr(self.env, "weight_matrix_scale"): weight_matrix = weight_matrix * self.env.weight_matrix_scale self.dmp.set_weights(weight_matrix, goal_pos) - trajectory, velocities = self.dmp.reference_trajectory(self.t) + _, velocities = self.dmp.reference_trajectory(self.t) rews = [] infos = [] @@ -129,3 +149,41 @@ class DmpEnvWrapperVel(DmpEnvWrapperBase): reward = np.sum(rews) return obs, reward, done, info + + +class DmpEnvWrapperPD(DmpEnvWrapperBase): + """ + Wrapper for gym environments which creates a trajectory in joint velocity space + """ + def rollout(self, action, render=False): + goal_pos, weight_matrix = self.goal_and_weights(action) + if hasattr(self.env, "weight_matrix_scale"): + weight_matrix = weight_matrix * self.env.weight_matrix_scale + self.dmp.set_weights(weight_matrix, goal_pos) + trajectory, velocity = self.dmp.reference_trajectory(self.t) + + if self.post_traj_steps > 0: + trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])]) + velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.num_dof))]) + + self._trajectory = trajectory + self._velocity = velocity + + rews = [] + infos = [] + + self.env.reset() + + for t, pos_vel in enumerate(zip(trajectory, velocity)): + ac = self.policy.get_action(self.env, pos_vel[0], pos_vel[1]) + obs, rew, done, info = self.env.step(ac) + rews.append(rew) + infos.append(info) + if render: + self.env.render(mode="human") + if done: + break + + reward = np.sum(rews) + + return obs, reward, done, info diff --git a/alr_envs/utils/policies.py b/alr_envs/utils/policies.py new file mode 100644 index 0000000..5d251fa --- /dev/null +++ b/alr_envs/utils/policies.py @@ -0,0 +1,15 @@ +class PDController: + def __init__(self, p_gains, d_gains): + self.p_gains = p_gains + self.d_gains = d_gains + + def get_action(self, env, des_pos, des_vel): + # TODO: make standardized ALRenv such that all of them have current_pos/vel attributes + cur_pos = env.current_pos + cur_vel = env.current_vel + if len(des_pos) != len(cur_pos): + des_pos = env.extend_des_pos(des_pos) + if len(des_vel) != len(cur_vel): + des_vel = env.extend_des_vel(des_vel) + trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel) + return trq diff --git a/dmp_env_wrapper_example.py b/dmp_env_wrapper_example.py index 9d699be..fcde324 100644 --- a/dmp_env_wrapper_example.py +++ b/dmp_env_wrapper_example.py @@ -1,36 +1,9 @@ from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker from alr_envs.classic_control.hole_reacher import HoleReacher -from gym.vector.async_vector_env import AsyncVectorEnv import numpy as np -# env = gym.make('alr_envs:SimpleReacher-v0') -# env = HoleReacher(num_links=5, -# allow_self_collision=False, -# allow_wall_collision=True, -# hole_width=0.15, -# hole_depth=1, -# hole_x=1) -# -# env = DmpEnvWrapperVel(env, -# num_dof=5, -# num_basis=5, -# duration=2, -# dt=env._dt, -# learn_goal=True) -# -# params = np.hstack([50 * np.random.randn(25), np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4])]) -# -# print(params) -# -# env.reset() -# obs, rew, done, info = env.step(params, render=True) -# -# print(env.env._joint_angles) -# -# print(rew) - if __name__ == "__main__": def make_env(rank, seed=0): diff --git a/dmp_pd_control_example.py b/dmp_pd_control_example.py new file mode 100644 index 0000000..29e7cda --- /dev/null +++ b/dmp_pd_control_example.py @@ -0,0 +1,48 @@ +from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperPD +from alr_envs.utils.policies import PDController +from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker +from alr_envs.mujoco.reacher.alr_reacher import ALRReacherEnv +import numpy as np + + +if __name__ == "__main__": + + def make_env(rank, seed=0): + """ + Utility function for multiprocessed env. + + :param env_id: (str) the environment ID + :param num_env: (int) the number of environments you wish to have in subprocesses + :param seed: (int) the inital seed for RNG + :param rank: (int) index of the subprocess + """ + def _init(): + p_gains = np.array([100, 100, 100, 100, 100]) + d_gains = np.array([1, 1, 1, 1, 1]) + policy = PDController(p_gains, d_gains) + + env = ALRReacherEnv() + + env = DmpEnvWrapperPD(env, + num_dof=5, + num_basis=5, + duration=4, + dt=env.dt, + learn_goal=False, + start_pos=env.init_qpos[:5], + final_pos=env.init_qpos[:5], + alpha_phase=2, + policy=policy + ) + env.seed(seed + rank) + return env + return _init + + dim = 25 + env = make_env(0, 0)() + + params = 10 * np.random.randn(dim) + + out = env.rollout(params, render=True) + + print(out)