diff --git a/README.md b/README.md index 2232efa..edd1aac 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,25 @@ ## ALR Robotics Control Environments -This project offers a large verity of reinforcement learning environments under a unifying interface base on OpenAI gym. -Besides, some custom environments we also provide support for the benchmark suites -[OpenAI gym](https://gym.openai.com/), +This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/). +Besides, we also provide support (under the OpenAI interface) for the benchmark suites [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) -(DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according +(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we -further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, +additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, we only consider the mean usually). ## Motion Primitive Environments (Episodic environments) Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box -optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full +optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is, however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position, -velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action +velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this -framework we support the above setting for the contextual setting, for which we expose all changing substates of the +framework we support all of this also for the contextual setting, for which we expose all changing substates of the task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each -trajectory. All environments provide the next to the cumulative episode reward also all collected information from each +trajectory. All environments provide next to the cumulative episode reward all collected information from each step as part of the info dictionary. This information should, however, mainly be used for debugging and logging. |Key| Description| diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 6853f0d..f835328 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -1,3 +1,4 @@ +import numpy as np from gym import register from . import classic_control, mujoco @@ -9,6 +10,8 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .mujoco.reacher.alr_reacher import ALRReacherEnv from .mujoco.reacher.balancing import BalancingEnv +from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} # Classic Control @@ -193,6 +196,31 @@ register( } ) +## Table Tennis +register(id='TableTennis2DCtxt-v0', + entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim':2}) + +register(id='TableTennis4DCtxt-v0', + entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim':4}) + +## BeerPong +difficulties = ["simple", "intermediate", "hard", "hardest"] + +for v, difficulty in enumerate(difficulties): + register( + id='ALRBeerPong-v{}'.format(v), + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + max_episode_steps=600, + kwargs={ + "difficulty": difficulty, + "reward_type": "staged", + } + ) + # Motion Primitive Environments ## Simple Reacher @@ -330,3 +358,59 @@ for _v in _versions: } ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## Beerpong +register( + id='BeerpongProMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": "alr_envs:ALRBeerPong-v0", + "wrappers": [mujoco.beerpong.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "n_zero_bases": 2, + "duration": 0.5, + "post_traj_time": 2.5, + # "width": 0.01, + # "off": 0.01, + "policy_type": "motor", + "weights_scale": 0.08, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), + "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) + } + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0") + +## Table Tennis +register( + id='TableTennisProMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": "alr_envs:TableTennis4DCtxt-v0", + "wrappers": [mujoco.table_tennis.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "n_zero_bases": 2, + "duration": 1.25, + "post_traj_time": 4.5, + # "width": 0.01, + # "off": 0.01, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + } + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v0") diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index dd7321a..208f005 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -106,7 +106,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): # self._tmp_hole_depth, self.end_effector - self._goal, self._steps - ]) + ]).astype(np.float32) def _get_line_points(self, num_points_per_link=1): theta = self._joint_angles[:, None] diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py index 758f824..dac06a3 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py @@ -73,7 +73,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): self._angle_velocity, self.end_effector - self._goal, self._steps - ]) + ]).astype(np.float32) def _generate_goal(self): diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 3a1f0e5..292e40a 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -110,7 +110,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self.end_effector - self._via_point, self.end_effector - self._goal, self._steps - ]) + ]).astype(np.float32) def _check_collisions(self) -> bool: return self._check_self_collision() diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py index 6744d15..30e1e7c 100644 --- a/alr_envs/alr/mujoco/__init__.py +++ b/alr_envs/alr/mujoco/__init__.py @@ -1,4 +1,6 @@ from .reacher.alr_reacher import ALRReacherEnv from .reacher.balancing import BalancingEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv -from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv \ No newline at end of file +from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv +from .table_tennis.tt_gym import TT_Env_Gym +from .beerpong.beerpong import ALRBeerBongEnv \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py index e69de29..989b5a9 100644 --- a/alr_envs/alr/mujoco/beerpong/__init__.py +++ b/alr_envs/alr/mujoco/beerpong/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml new file mode 100644 index 0000000..e96d2bc --- /dev/null +++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml @@ -0,0 +1,187 @@ + + + diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index 4d8f389..a10e54a 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -1,3 +1,4 @@ +import mujoco_py.builder import os import numpy as np @@ -5,44 +6,67 @@ from gym import utils from gym.envs.mujoco import MujocoEnv - -class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): - def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None): - utils.EzPickle.__init__(**locals()) - MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip) +class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): + def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False, + context: np.ndarray = None, difficulty='simple'): self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", - "beerpong" + ".xml") - - self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) - self.start_vel = np.zeros(7) - - self._q_pos = [] - self._q_vel = [] - # self.weight_matrix_scale = 50 - self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) - self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5]) - self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]) + "beerpong_wo_cup" + ".xml") self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) - self.context = None - # alr_mujoco_env.AlrMujocoEnv.__init__(self, - # self.xml_path, - # apply_gravity_comp=apply_gravity_comp, - # n_substeps=n_substeps) + self.context = context + self.apply_gravity_comp = apply_gravity_comp + self.add_noise = noisy - self.sim_time = 8 # seconds - self.sim_steps = int(self.sim_time / self.dt) - if reward_function is None: - from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward - reward_function = BeerpongReward - self.reward_function = reward_function(self.sim, self.sim_steps) - self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] - self.ball_id = self.sim.model._body_name2id["ball"] - self.cup_table_id = self.sim.model._body_name2id["cup_table"] + self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) + self._start_vel = np.zeros(7) + + self.ball_site_id = 0 + self.ball_id = 11 + + self._release_step = 100 # time step of ball release + + self.sim_time = 4 # seconds + self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt) + self.cup_table_id = 10 + + if noisy: + self.noise_std = 0.01 + else: + self.noise_std = 0 + + if difficulty == 'simple': + self.cup_goal_pos = np.array([0, -1.7, 0.840]) + elif difficulty == 'intermediate': + self.cup_goal_pos = np.array([0.3, -1.5, 0.840]) + elif difficulty == 'hard': + self.cup_goal_pos = np.array([-0.3, -2.2, 0.840]) + elif difficulty == 'hardest': + self.cup_goal_pos = np.array([-0.3, -1.2, 0.840]) + + if reward_type == "no_context": + from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward + reward_function = BeerPongReward + elif reward_type == "staged": + from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward + reward_function = BeerPongReward + else: + raise ValueError("Unknown reward type: {}".format(reward_type)) + self.reward_function = reward_function() + + MujocoEnv.__init__(self, self.xml_path, frame_skip) + utils.EzPickle.__init__(self) + + @property + def start_pos(self): + return self._start_pos + + @property + def start_vel(self): + return self._start_vel @property def current_pos(self): @@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): def current_vel(self): return self.sim.data.qvel[0:7].copy() - def configure(self, context): - self.context = context - self.reward_function.reset(context) + def reset(self): + self.reward_function.reset(self.add_noise) + return super().reset() def reset_model(self): init_pos_all = self.init_qpos.copy() @@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): init_vel = np.zeros_like(init_pos_all) self._steps = 0 - self._q_pos = [] - self._q_vel = [] start_pos = init_pos_all start_pos[0:7] = init_pos_robot - # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) self.set_state(start_pos, init_vel) - - ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) - self.sim.model.body_pos[self.ball_id] = ball_pos.copy() - self.sim.model.body_pos[self.cup_table_id] = self.context.copy() - + self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos + start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy() + self.set_state(start_pos, init_vel) return self._get_obs() def step(self, a): @@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): angular_vel = 0.0 reward_ctrl = - np.square(a).sum() - crash = self.do_simulation(a) - joint_cons_viol = self.check_traj_in_joint_limits() - - self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) - self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy()) + if self.apply_gravity_comp: + a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0] + try: + self.do_simulation(a, self.frame_skip) + if self._steps < self._release_step: + self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy() + self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy() + elif self._steps == self._release_step and self.add_noise: + self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3) + crash = False + except mujoco_py.builder.MujocoException: + crash = True + # joint_cons_viol = self.check_traj_in_joint_limits() ob = self._get_obs() - if not crash and not joint_cons_viol: - reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps) - done = success or self._steps == self.sim_steps - 1 or stop_sim + if not crash: + reward, reward_infos = self.reward_function.compute_reward(self, a) + success = reward_infos['success'] + is_collided = reward_infos['is_collided'] + ball_pos = reward_infos['ball_pos'] + ball_vel = reward_infos['ball_vel'] + done = is_collided or self._steps == self.ep_length - 1 self._steps += 1 else: - reward = -1000 + reward = -30 success = False + is_collided = False done = True + ball_pos = np.zeros(3) + ball_vel = np.zeros(3) + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, + reward=reward, velocity=angular_vel, - traj=self._q_pos, is_success=success, - is_collided=crash or joint_cons_viol) + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + ball_pos=ball_pos, + ball_vel=ball_vel, + is_success=success, + is_collided=is_collided, sim_crash=crash) def check_traj_in_joint_limits(self): return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) - # TODO + # TODO: extend observation space def _get_obs(self): theta = self.sim.data.qpos.flat[:7] return np.concatenate([ @@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): ]) # TODO + @property def active_obs(self): - pass - + return np.hstack([ + [False] * 7, # cos + [False] * 7, # sin + # [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) if __name__ == "__main__": - env = ALRBeerpongEnv() - ctxt = np.array([0, -2, 0.840]) # initial + env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest') - env.configure(ctxt) + # env.configure(ctxt) env.reset() - env.render() - for i in range(16000): - # test with random actions - ac = 0.0 * env.action_space.sample()[0:7] - ac[1] = -0.8 - ac[3] = - 0.3 - ac[5] = -0.2 - # ac = env.start_pos - # ac[0] += np.pi/2 + env.render("human") + for i in range(800): + ac = 10 * env.action_space.sample()[0:7] obs, rew, d, info = env.step(ac) - env.render() + env.render("human") print(rew) @@ -147,4 +187,3 @@ if __name__ == "__main__": break env.close() - diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py index c3ca3c6..3896e82 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py @@ -1,124 +1,169 @@ import numpy as np -from alr_envs.alr.mujoco import alr_reward_fct -class BeerpongReward(alr_reward_fct.AlrReward): - def __init__(self, sim, sim_time): +class BeerPongReward: + def __init__(self): - self.sim = sim - self.sim_time = sim_time + self.robot_collision_objects = ["wrist_palm_link_convex_geom", + "wrist_pitch_link_convex_decomposition_p1_geom", + "wrist_pitch_link_convex_decomposition_p2_geom", + "wrist_pitch_link_convex_decomposition_p3_geom", + "wrist_yaw_link_convex_decomposition_p1_geom", + "wrist_yaw_link_convex_decomposition_p2_geom", + "forearm_link_convex_decomposition_p1_geom", + "forearm_link_convex_decomposition_p2_geom", + "upper_arm_link_convex_decomposition_p1_geom", + "upper_arm_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p1_geom", + "shoulder_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p3_geom", + "base_link_convex_geom", "table_contact_geom"] - self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", - "wrist_pitch_link_convex_decomposition_p1_geom", - "wrist_pitch_link_convex_decomposition_p2_geom", - "wrist_pitch_link_convex_decomposition_p3_geom", - "wrist_yaw_link_convex_decomposition_p1_geom", - "wrist_yaw_link_convex_decomposition_p2_geom", - "forearm_link_convex_decomposition_p1_geom", - "forearm_link_convex_decomposition_p2_geom"] + self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6", + "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10", + # "cup_base_table", "cup_base_table_contact", + "cup_geom_table15", + "cup_geom_table16", + "cup_geom_table17", "cup_geom1_table8", + # "cup_base_table_contact", + # "cup_base_table" + ] - self.ball_id = None - self.ball_collision_id = None - self.goal_id = None - self.goal_final_id = None - self.collision_ids = None self.ball_traj = None self.dists = None - self.dists_ctxt = None self.dists_final = None self.costs = None - + self.action_costs = None + self.angle_rewards = None + self.cup_angles = None + self.cup_z_axes = None + self.collision_penalty = 500 self.reset(None) def reset(self, context): - self.ball_traj = np.zeros(shape=(self.sim_time, 3)) + self.ball_traj = [] self.dists = [] - self.dists_ctxt = [] self.dists_final = [] self.costs = [] - self.context = context - self.ball_in_cup = False - self.dist_ctxt = 5 + self.action_costs = [] + self.angle_rewards = [] + self.cup_angles = [] + self.cup_z_axes = [] + self.ball_ground_contact = False + self.ball_table_contact = False + self.ball_wall_contact = False + self.ball_cup_contact = False - self.ball_id = self.sim.model._body_name2id["ball"] - self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"] - self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] - self.goal_id = self.sim.model._site_name2id["cup_goal_table"] - self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"] - self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects] - self.cup_table_id = self.sim.model._body_name2id["cup_table"] + def compute_reward(self, env, action): + self.ball_id = env.sim.model._body_name2id["ball"] + self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"] + self.goal_id = env.sim.model._site_name2id["cup_goal_table"] + self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"] + self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects] + self.cup_table_id = env.sim.model._body_name2id["cup_table"] + self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"] + self.wall_collision_id = env.sim.model._geom_name2id["wall"] + self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"] + self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"] + self.ground_collision_id = env.sim.model._geom_name2id["ground"] + self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects] - def compute_reward(self, action, sim, step): - action_cost = np.sum(np.square(action)) - - stop_sim = False - success = False - - if self.check_collision(sim): - reward = - 1e-4 * action_cost - 1000 - stop_sim = True - return reward, success, stop_sim - - # Compute the current distance from the ball to the inner part of the cup - goal_pos = sim.data.site_xpos[self.goal_id] - ball_pos = sim.data.body_xpos[self.ball_id] - goal_final_pos = sim.data.site_xpos[self.goal_final_id] + goal_pos = env.sim.data.site_xpos[self.goal_id] + ball_pos = env.sim.data.body_xpos[self.ball_id] + goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] self.dists.append(np.linalg.norm(goal_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) - self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context)) - self.ball_traj[step, :] = ball_pos - # Determine the first time when ball is in cup - if not self.ball_in_cup: - ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id) - self.ball_in_cup = ball_in_cup - if ball_in_cup: - dist_to_ctxt = np.linalg.norm(ball_pos - self.context) - self.dist_ctxt = dist_to_ctxt + action_cost = np.sum(np.square(action)) + self.action_costs.append(action_cost) + + ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.table_collision_id) + + if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con + self.ball_table_contact = True + + ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id, + self.cup_collision_ids) + if ball_cup_cont: + self.ball_cup_contact = True + + ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id) + if ball_wall_cont and not self.ball_table_contact: + self.ball_wall_contact = True + + ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.ground_collision_id) + if ball_ground_contact and not self.ball_table_contact: + self.ball_ground_contact = True + + self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) + if env._steps == env.ep_length - 1 or self._is_collided: - if step == self.sim_time - 1: min_dist = np.min(self.dists) - dist_final = self.dists_final[-1] - # dist_ctxt = self.dists_ctxt[-1] - # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) - cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt) - reward = np.exp(-1 * cost) - 1e-4 * action_cost - success = dist_final < 0.05 and self.dist_ctxt < 0.05 + ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id) + + cost_offset = 0 + + if self.ball_ground_contact: # or self.ball_wall_contact: + cost_offset += 2 + + if not self.ball_table_contact: + cost_offset += 2 + + if not ball_in_cup: + cost_offset += 2 + cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2 + else: + if self.ball_cup_contact: + cost_offset += 1 + cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost + + reward = - 1*cost - self.collision_penalty * int(self._is_collided) + success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact else: reward = - 1e-4 * action_cost success = False - return reward, success, stop_sim + infos = {} + infos["success"] = success + infos["is_collided"] = self._is_collided + infos["ball_pos"] = ball_pos.copy() + infos["action_cost"] = 5e-4 * action_cost - def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt): - if not ball_in_cup: - cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2) - else: - cost = 2 * dist_to_ctxt ** 2 - print('Context Distance:', dist_to_ctxt) - return cost + return reward, infos - def check_ball_in_cup(self, sim, ball_collision_id): - cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"] + def _check_collision_single_objects(self, sim, id_1, id_2): for coni in range(0, sim.data.ncon): con = sim.data.contact[coni] - collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id - collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id + collision = con.geom1 == id_1 and con.geom2 == id_2 + collision_trans = con.geom1 == id_2 and con.geom2 == id_1 if collision or collision_trans: return True return False - def check_collision(self, sim): + def _check_collision_with_itself(self, sim, collision_ids): + col_1, col_2 = False, False + for j, id in enumerate(collision_ids): + col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j]) + if j != len(collision_ids) - 1: + col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:]) + else: + col_2 = False + collision = True if col_1 or col_2 else False + return collision + + def _check_collision_with_set_of_objects(self, sim, id_1, id_list): for coni in range(0, sim.data.ncon): con = sim.data.contact[coni] - collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id - collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids + collision = con.geom1 in id_list and con.geom2 == id_1 + collision_trans = con.geom1 == id_1 and con.geom2 in id_list if collision or collision_trans: return True - return False + return False \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py new file mode 100644 index 0000000..9d1d878 --- /dev/null +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -0,0 +1,169 @@ +import numpy as np + + +class BeerPongReward: + def __init__(self): + + self.robot_collision_objects = ["wrist_palm_link_convex_geom", + "wrist_pitch_link_convex_decomposition_p1_geom", + "wrist_pitch_link_convex_decomposition_p2_geom", + "wrist_pitch_link_convex_decomposition_p3_geom", + "wrist_yaw_link_convex_decomposition_p1_geom", + "wrist_yaw_link_convex_decomposition_p2_geom", + "forearm_link_convex_decomposition_p1_geom", + "forearm_link_convex_decomposition_p2_geom", + "upper_arm_link_convex_decomposition_p1_geom", + "upper_arm_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p1_geom", + "shoulder_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p3_geom", + "base_link_convex_geom", "table_contact_geom"] + + self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6", + "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10", + # "cup_base_table", "cup_base_table_contact", + "cup_geom_table15", + "cup_geom_table16", + "cup_geom_table17", "cup_geom1_table8", + # "cup_base_table_contact", + # "cup_base_table" + ] + + + self.ball_traj = None + self.dists = None + self.dists_final = None + self.costs = None + self.action_costs = None + self.angle_rewards = None + self.cup_angles = None + self.cup_z_axes = None + self.collision_penalty = 500 + self.reset(None) + + def reset(self, noisy): + self.ball_traj = [] + self.dists = [] + self.dists_final = [] + self.costs = [] + self.action_costs = [] + self.angle_rewards = [] + self.cup_angles = [] + self.cup_z_axes = [] + self.ball_ground_contact = False + self.ball_table_contact = False + self.ball_wall_contact = False + self.ball_cup_contact = False + self.noisy_bp = noisy + self._t_min_final_dist = -1 + + def compute_reward(self, env, action): + self.ball_id = env.sim.model._body_name2id["ball"] + self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"] + self.goal_id = env.sim.model._site_name2id["cup_goal_table"] + self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"] + self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects] + self.cup_table_id = env.sim.model._body_name2id["cup_table"] + self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"] + self.wall_collision_id = env.sim.model._geom_name2id["wall"] + self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"] + self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"] + self.ground_collision_id = env.sim.model._geom_name2id["ground"] + self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects] + + goal_pos = env.sim.data.site_xpos[self.goal_id] + ball_pos = env.sim.data.body_xpos[self.ball_id] + ball_vel = env.sim.data.body_xvelp[self.ball_id] + goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] + self.dists.append(np.linalg.norm(goal_pos - ball_pos)) + self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) + + action_cost = np.sum(np.square(action)) + self.action_costs.append(action_cost) + + self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) + if env._steps == env.ep_length - 1 or self._is_collided: + + min_dist = np.min(self.dists) + ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.table_collision_id) + ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id, + self.cup_collision_ids) + ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.wall_collision_id) + ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.cup_table_collision_id) + if not ball_in_cup: + cost_offset = 2 + if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont: + cost_offset += 2 + cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost + else: + cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7 + + reward = - 1 * cost - self.collision_penalty * int(self._is_collided) + success = ball_in_cup + crash = self._is_collided + else: + reward = - 1e-7 * action_cost + cost = 0 + success = False + crash = False + + infos = {} + infos["success"] = success + infos["is_collided"] = self._is_collided + infos["ball_pos"] = ball_pos.copy() + infos["ball_vel"] = ball_vel.copy() + infos["action_cost"] = 5e-4 * action_cost + infos["task_cost"] = cost + + return reward, infos + + def get_cost_offset(self): + if self.ball_ground_contact: + return 200 + + if not self.ball_table_contact: + return 100 + + if not self.ball_in_cup: + return 50 + + if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp: + return 10 + + return 0 + + def _check_collision_single_objects(self, sim, id_1, id_2): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 == id_1 and con.geom2 == id_2 + collision_trans = con.geom1 == id_2 and con.geom2 == id_1 + + if collision or collision_trans: + return True + return False + + def _check_collision_with_itself(self, sim, collision_ids): + col_1, col_2 = False, False + for j, id in enumerate(collision_ids): + col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j]) + if j != len(collision_ids) - 1: + col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:]) + else: + col_2 = False + collision = True if col_1 or col_2 else False + return collision + + def _check_collision_with_set_of_objects(self, sim, id_1, id_list): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 in id_list and con.geom2 == id_1 + collision_trans = con.geom1 == id_1 and con.geom2 in id_list + + if collision or collision_trans: + return True + return False \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py new file mode 100644 index 0000000..87c6b7e --- /dev/null +++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py @@ -0,0 +1,39 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # TODO: @Max Filter observations correctly + return np.hstack([ + [False] * 7, # cos + [False] * 7, # sin + # [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) + + @property + def start_pos(self): + return self._start_pos + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qpos[0:7].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qvel[0:7].copy() + + @property + def goal_pos(self): + # TODO: @Max I think the default value of returning to the start is reasonable here + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl new file mode 100644 index 0000000..3b05c27 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl new file mode 100644 index 0000000..5ff94a2 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl new file mode 100644 index 0000000..c548448 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl new file mode 100644 index 0000000..495160d Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..b4bb322 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..7b2f001 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..f05174e Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl new file mode 100644 index 0000000..eb252d9 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl new file mode 100644 index 0000000..0a986fa Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c039f0d Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..250acaf Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..993d0f7 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl new file mode 100644 index 0000000..8448a3f Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c36f88f Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..d00cac1 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl new file mode 100644 index 0000000..13d2f73 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl new file mode 100644 index 0000000..0d95239 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py new file mode 100644 index 0000000..c5e6d2f --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py new file mode 100644 index 0000000..86d5a00 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py @@ -0,0 +1,38 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # TODO: @Max Filter observations correctly + return np.hstack([ + [True] * 7, # Joint Pos + [True] * 3, # Ball pos + [True] * 3 # goal pos + ]) + + @property + def start_pos(self): + return self.self.init_qpos_tt + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qpos[:7].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qvel[:7].copy() + + @property + def goal_pos(self): + # TODO: @Max I think the default value of returning to the start is reasonable here + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py new file mode 100644 index 0000000..635d49d --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py @@ -0,0 +1,168 @@ +import os + +import numpy as np +import mujoco_py +from gym import utils, spaces +from gym.envs.mujoco import MujocoEnv + +from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init +from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward + +#TODO: Check for simulation stability. Make sure the code runs even for sim crash + +MAX_EPISODE_STEPS = 1375 +BALL_NAME_CONTACT = "target_ball_contact" +BALL_NAME = "target_ball" +TABLE_NAME = 'table_tennis_table' +FLOOR_NAME = 'floor' +PADDLE_CONTACT_1_NAME = 'bat' +PADDLE_CONTACT_2_NAME = 'bat_back' +RACKET_NAME = 'bat' +# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]]) +CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]]) +CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]]) + +class TT_Env_Gym(MujocoEnv, utils.EzPickle): + + def __init__(self, ctxt_dim=2): + model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml') + + self.ctxt_dim = ctxt_dim + if ctxt_dim == 2: + self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM + self.goal = np.zeros(3) # 2 x,y + 1z + elif ctxt_dim == 4: + self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM + self.goal = np.zeros(3) + else: + raise ValueError("either 2 or 4 dimensional Contexts available") + + action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) + action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) + self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') + + self.time_steps = 0 + self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0]) + self.init_qvel_tt = np.zeros(10) + + self.reward_func = TT_Reward(self.ctxt_dim) + self.ball_landing_pos = None + self.hited_ball = False + self.ball_contact_after_hit = False + self._ids_set = False + super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1) + self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. + self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] + self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] + self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME] + self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this + self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this + self.racket_id = self.sim.model._geom_name2id[RACKET_NAME] + + def _set_ids(self): + self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. + self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] + self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME] + self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this + self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this + self.racket_id = self.sim.model._geom_name2id[RACKET_NAME] + self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] + self._ids_set = True + + def _get_obs(self): + ball_pos = self.sim.data.body_xpos[self.ball_id] + obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions + ball_pos, + self.goal.copy()]) + return obs + + def sample_context(self): + return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim) + + def reset_model(self): + self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state + self.time_steps = 0 + self.ball_landing_pos = None + self.hited_ball = False + self.ball_contact_after_hit = False + self.goal = self.sample_context()[:2] + if self.ctxt_dim == 2: + initial_ball_state = ball_init(random=False) # fixed velocity, fixed position + elif self.ctxt_dim == 4: + initial_ball_state = ball_init(random=False)#raise NotImplementedError + + self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0]) + self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1]) + self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2]) + + self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3]) + self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4]) + self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5]) + + z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77])) + self.goal = z_extended_goal_pos + self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow + self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green + self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red + self.sim.forward() + + self.reward_func.reset(self.goal) # reset the reward function + return self._get_obs() + + def _contact_checker(self, id_1, id_2): + for coni in range(0, self.sim.data.ncon): + con = self.sim.data.contact[coni] + collision = con.geom1 == id_1 and con.geom2 == id_2 + collision_trans = con.geom1 == id_2 and con.geom2 == id_1 + if collision or collision_trans: + return True + return False + + def step(self, action): + if not self._ids_set: + self._set_ids() + done = False + episode_end = False if self.time_steps+1 + + + + + + + + + + + diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml new file mode 100644 index 0000000..7e04c28 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml new file mode 100644 index 0000000..c313489 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml new file mode 100644 index 0000000..19543e2 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml new file mode 100644 index 0000000..25d0366 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml new file mode 100644 index 0000000..e349992 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml new file mode 100644 index 0000000..9609a6e --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/dmc/dmc_wrapper.py b/alr_envs/dmc/dmc_wrapper.py index 10f1af9..aa6c7aa 100644 --- a/alr_envs/dmc/dmc_wrapper.py +++ b/alr_envs/dmc/dmc_wrapper.py @@ -15,10 +15,10 @@ def _spec_to_box(spec): assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found" dim = int(np.prod(s.shape)) if type(s) == specs.Array: - bound = np.inf * np.ones(dim, dtype=np.float32) + bound = np.inf * np.ones(dim, dtype=s.dtype) return -bound, bound elif type(s) == specs.BoundedArray: - zeros = np.zeros(dim, dtype=np.float32) + zeros = np.zeros(dim, dtype=s.dtype) return s.minimum + zeros, s.maximum + zeros mins, maxs = [], [] @@ -29,7 +29,7 @@ def _spec_to_box(spec): low = np.concatenate(mins, axis=0) high = np.concatenate(maxs, axis=0) assert low.shape == high.shape - return spaces.Box(low, high, dtype=np.float32) + return spaces.Box(low, high, dtype=s.dtype) def _flatten_obs(obs: collections.MutableMapping): @@ -113,7 +113,7 @@ class DMCWrapper(core.Env): if self._channels_first: obs = obs.transpose(2, 0, 1).copy() else: - obs = _flatten_obs(time_step.observation) + obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype) return obs @property