diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 179839c..d9843c0 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -1,3 +1,4 @@ +import numpy as np from gym import register from . import classic_control, mujoco @@ -193,6 +194,32 @@ register( } ) +## Table Tennis +from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS +register(id='TableTennis2DCtxt-v0', + entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim':2}) + +register(id='TableTennis4DCtxt-v0', + entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim':4}) + +## BeerPong +difficulties = ["simple", "intermediate", "hard", "hardest"] + +for v, difficulty in enumerate(difficulties): + register( + id='ALRBeerPong-v{}'.format(v), + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + max_episode_steps=600, + kwargs={ + "difficulty": difficulty, + "reward_type": "staged", + } + ) + # Motion Primitive Environments ## Simple Reacher @@ -327,3 +354,59 @@ for _v in _versions: } ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +## Beerpong +register( + id='BeerpongDetPMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "alr_envs:ALRBeerPong-v0", + "wrappers": [mujoco.beerpong.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "n_zero_bases": 2, + "duration": 0.5, + "post_traj_time": 2.5, + "width": 0.01, + "off": 0.01, + "policy_type": "motor", + "weights_scale": 0.08, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), + "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) + } + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("BeerpongDetPMP-v0") + +## Table Tennis +register( + id='TableTennisDetPMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "alr_envs:TableTennis4DCtxt-v0", + "wrappers": [mujoco.table_tennis.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "n_zero_bases": 2, + "duration": 1.25, + "post_traj_time": 4.5, + "width": 0.01, + "off": 0.01, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + } + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("TableTennisDetPMP-v0") \ No newline at end of file diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py index 6744d15..30e1e7c 100644 --- a/alr_envs/alr/mujoco/__init__.py +++ b/alr_envs/alr/mujoco/__init__.py @@ -1,4 +1,6 @@ from .reacher.alr_reacher import ALRReacherEnv from .reacher.balancing import BalancingEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv -from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv \ No newline at end of file +from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv +from .table_tennis.tt_gym import TT_Env_Gym +from .beerpong.beerpong import ALRBeerBongEnv \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py index e69de29..989b5a9 100644 --- a/alr_envs/alr/mujoco/beerpong/__init__.py +++ b/alr_envs/alr/mujoco/beerpong/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml new file mode 100644 index 0000000..e96d2bc --- /dev/null +++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml @@ -0,0 +1,187 @@ + + + diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index 4d8f389..a10e54a 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -1,3 +1,4 @@ +import mujoco_py.builder import os import numpy as np @@ -5,44 +6,67 @@ from gym import utils from gym.envs.mujoco import MujocoEnv - -class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): - def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None): - utils.EzPickle.__init__(**locals()) - MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip) +class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): + def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False, + context: np.ndarray = None, difficulty='simple'): self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", - "beerpong" + ".xml") - - self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) - self.start_vel = np.zeros(7) - - self._q_pos = [] - self._q_vel = [] - # self.weight_matrix_scale = 50 - self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) - self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5]) - self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]) + "beerpong_wo_cup" + ".xml") self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) - self.context = None - # alr_mujoco_env.AlrMujocoEnv.__init__(self, - # self.xml_path, - # apply_gravity_comp=apply_gravity_comp, - # n_substeps=n_substeps) + self.context = context + self.apply_gravity_comp = apply_gravity_comp + self.add_noise = noisy - self.sim_time = 8 # seconds - self.sim_steps = int(self.sim_time / self.dt) - if reward_function is None: - from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward - reward_function = BeerpongReward - self.reward_function = reward_function(self.sim, self.sim_steps) - self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] - self.ball_id = self.sim.model._body_name2id["ball"] - self.cup_table_id = self.sim.model._body_name2id["cup_table"] + self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) + self._start_vel = np.zeros(7) + + self.ball_site_id = 0 + self.ball_id = 11 + + self._release_step = 100 # time step of ball release + + self.sim_time = 4 # seconds + self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt) + self.cup_table_id = 10 + + if noisy: + self.noise_std = 0.01 + else: + self.noise_std = 0 + + if difficulty == 'simple': + self.cup_goal_pos = np.array([0, -1.7, 0.840]) + elif difficulty == 'intermediate': + self.cup_goal_pos = np.array([0.3, -1.5, 0.840]) + elif difficulty == 'hard': + self.cup_goal_pos = np.array([-0.3, -2.2, 0.840]) + elif difficulty == 'hardest': + self.cup_goal_pos = np.array([-0.3, -1.2, 0.840]) + + if reward_type == "no_context": + from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward + reward_function = BeerPongReward + elif reward_type == "staged": + from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward + reward_function = BeerPongReward + else: + raise ValueError("Unknown reward type: {}".format(reward_type)) + self.reward_function = reward_function() + + MujocoEnv.__init__(self, self.xml_path, frame_skip) + utils.EzPickle.__init__(self) + + @property + def start_pos(self): + return self._start_pos + + @property + def start_vel(self): + return self._start_vel @property def current_pos(self): @@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): def current_vel(self): return self.sim.data.qvel[0:7].copy() - def configure(self, context): - self.context = context - self.reward_function.reset(context) + def reset(self): + self.reward_function.reset(self.add_noise) + return super().reset() def reset_model(self): init_pos_all = self.init_qpos.copy() @@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): init_vel = np.zeros_like(init_pos_all) self._steps = 0 - self._q_pos = [] - self._q_vel = [] start_pos = init_pos_all start_pos[0:7] = init_pos_robot - # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) self.set_state(start_pos, init_vel) - - ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05]) - self.sim.model.body_pos[self.ball_id] = ball_pos.copy() - self.sim.model.body_pos[self.cup_table_id] = self.context.copy() - + self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos + start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy() + self.set_state(start_pos, init_vel) return self._get_obs() def step(self, a): @@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): angular_vel = 0.0 reward_ctrl = - np.square(a).sum() - crash = self.do_simulation(a) - joint_cons_viol = self.check_traj_in_joint_limits() - - self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) - self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy()) + if self.apply_gravity_comp: + a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0] + try: + self.do_simulation(a, self.frame_skip) + if self._steps < self._release_step: + self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy() + self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy() + elif self._steps == self._release_step and self.add_noise: + self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3) + crash = False + except mujoco_py.builder.MujocoException: + crash = True + # joint_cons_viol = self.check_traj_in_joint_limits() ob = self._get_obs() - if not crash and not joint_cons_viol: - reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps) - done = success or self._steps == self.sim_steps - 1 or stop_sim + if not crash: + reward, reward_infos = self.reward_function.compute_reward(self, a) + success = reward_infos['success'] + is_collided = reward_infos['is_collided'] + ball_pos = reward_infos['ball_pos'] + ball_vel = reward_infos['ball_vel'] + done = is_collided or self._steps == self.ep_length - 1 self._steps += 1 else: - reward = -1000 + reward = -30 success = False + is_collided = False done = True + ball_pos = np.zeros(3) + ball_vel = np.zeros(3) + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, + reward=reward, velocity=angular_vel, - traj=self._q_pos, is_success=success, - is_collided=crash or joint_cons_viol) + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + ball_pos=ball_pos, + ball_vel=ball_vel, + is_success=success, + is_collided=is_collided, sim_crash=crash) def check_traj_in_joint_limits(self): return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) - # TODO + # TODO: extend observation space def _get_obs(self): theta = self.sim.data.qpos.flat[:7] return np.concatenate([ @@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): ]) # TODO + @property def active_obs(self): - pass - + return np.hstack([ + [False] * 7, # cos + [False] * 7, # sin + # [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) if __name__ == "__main__": - env = ALRBeerpongEnv() - ctxt = np.array([0, -2, 0.840]) # initial + env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest') - env.configure(ctxt) + # env.configure(ctxt) env.reset() - env.render() - for i in range(16000): - # test with random actions - ac = 0.0 * env.action_space.sample()[0:7] - ac[1] = -0.8 - ac[3] = - 0.3 - ac[5] = -0.2 - # ac = env.start_pos - # ac[0] += np.pi/2 + env.render("human") + for i in range(800): + ac = 10 * env.action_space.sample()[0:7] obs, rew, d, info = env.step(ac) - env.render() + env.render("human") print(rew) @@ -147,4 +187,3 @@ if __name__ == "__main__": break env.close() - diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py index c3ca3c6..3896e82 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py @@ -1,124 +1,169 @@ import numpy as np -from alr_envs.alr.mujoco import alr_reward_fct -class BeerpongReward(alr_reward_fct.AlrReward): - def __init__(self, sim, sim_time): +class BeerPongReward: + def __init__(self): - self.sim = sim - self.sim_time = sim_time + self.robot_collision_objects = ["wrist_palm_link_convex_geom", + "wrist_pitch_link_convex_decomposition_p1_geom", + "wrist_pitch_link_convex_decomposition_p2_geom", + "wrist_pitch_link_convex_decomposition_p3_geom", + "wrist_yaw_link_convex_decomposition_p1_geom", + "wrist_yaw_link_convex_decomposition_p2_geom", + "forearm_link_convex_decomposition_p1_geom", + "forearm_link_convex_decomposition_p2_geom", + "upper_arm_link_convex_decomposition_p1_geom", + "upper_arm_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p1_geom", + "shoulder_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p3_geom", + "base_link_convex_geom", "table_contact_geom"] - self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", - "wrist_pitch_link_convex_decomposition_p1_geom", - "wrist_pitch_link_convex_decomposition_p2_geom", - "wrist_pitch_link_convex_decomposition_p3_geom", - "wrist_yaw_link_convex_decomposition_p1_geom", - "wrist_yaw_link_convex_decomposition_p2_geom", - "forearm_link_convex_decomposition_p1_geom", - "forearm_link_convex_decomposition_p2_geom"] + self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6", + "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10", + # "cup_base_table", "cup_base_table_contact", + "cup_geom_table15", + "cup_geom_table16", + "cup_geom_table17", "cup_geom1_table8", + # "cup_base_table_contact", + # "cup_base_table" + ] - self.ball_id = None - self.ball_collision_id = None - self.goal_id = None - self.goal_final_id = None - self.collision_ids = None self.ball_traj = None self.dists = None - self.dists_ctxt = None self.dists_final = None self.costs = None - + self.action_costs = None + self.angle_rewards = None + self.cup_angles = None + self.cup_z_axes = None + self.collision_penalty = 500 self.reset(None) def reset(self, context): - self.ball_traj = np.zeros(shape=(self.sim_time, 3)) + self.ball_traj = [] self.dists = [] - self.dists_ctxt = [] self.dists_final = [] self.costs = [] - self.context = context - self.ball_in_cup = False - self.dist_ctxt = 5 + self.action_costs = [] + self.angle_rewards = [] + self.cup_angles = [] + self.cup_z_axes = [] + self.ball_ground_contact = False + self.ball_table_contact = False + self.ball_wall_contact = False + self.ball_cup_contact = False - self.ball_id = self.sim.model._body_name2id["ball"] - self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"] - self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] - self.goal_id = self.sim.model._site_name2id["cup_goal_table"] - self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"] - self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects] - self.cup_table_id = self.sim.model._body_name2id["cup_table"] + def compute_reward(self, env, action): + self.ball_id = env.sim.model._body_name2id["ball"] + self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"] + self.goal_id = env.sim.model._site_name2id["cup_goal_table"] + self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"] + self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects] + self.cup_table_id = env.sim.model._body_name2id["cup_table"] + self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"] + self.wall_collision_id = env.sim.model._geom_name2id["wall"] + self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"] + self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"] + self.ground_collision_id = env.sim.model._geom_name2id["ground"] + self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects] - def compute_reward(self, action, sim, step): - action_cost = np.sum(np.square(action)) - - stop_sim = False - success = False - - if self.check_collision(sim): - reward = - 1e-4 * action_cost - 1000 - stop_sim = True - return reward, success, stop_sim - - # Compute the current distance from the ball to the inner part of the cup - goal_pos = sim.data.site_xpos[self.goal_id] - ball_pos = sim.data.body_xpos[self.ball_id] - goal_final_pos = sim.data.site_xpos[self.goal_final_id] + goal_pos = env.sim.data.site_xpos[self.goal_id] + ball_pos = env.sim.data.body_xpos[self.ball_id] + goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] self.dists.append(np.linalg.norm(goal_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) - self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context)) - self.ball_traj[step, :] = ball_pos - # Determine the first time when ball is in cup - if not self.ball_in_cup: - ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id) - self.ball_in_cup = ball_in_cup - if ball_in_cup: - dist_to_ctxt = np.linalg.norm(ball_pos - self.context) - self.dist_ctxt = dist_to_ctxt + action_cost = np.sum(np.square(action)) + self.action_costs.append(action_cost) + + ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.table_collision_id) + + if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con + self.ball_table_contact = True + + ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id, + self.cup_collision_ids) + if ball_cup_cont: + self.ball_cup_contact = True + + ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id) + if ball_wall_cont and not self.ball_table_contact: + self.ball_wall_contact = True + + ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.ground_collision_id) + if ball_ground_contact and not self.ball_table_contact: + self.ball_ground_contact = True + + self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) + if env._steps == env.ep_length - 1 or self._is_collided: - if step == self.sim_time - 1: min_dist = np.min(self.dists) - dist_final = self.dists_final[-1] - # dist_ctxt = self.dists_ctxt[-1] - # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) - cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt) - reward = np.exp(-1 * cost) - 1e-4 * action_cost - success = dist_final < 0.05 and self.dist_ctxt < 0.05 + ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id) + + cost_offset = 0 + + if self.ball_ground_contact: # or self.ball_wall_contact: + cost_offset += 2 + + if not self.ball_table_contact: + cost_offset += 2 + + if not ball_in_cup: + cost_offset += 2 + cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2 + else: + if self.ball_cup_contact: + cost_offset += 1 + cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost + + reward = - 1*cost - self.collision_penalty * int(self._is_collided) + success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact else: reward = - 1e-4 * action_cost success = False - return reward, success, stop_sim + infos = {} + infos["success"] = success + infos["is_collided"] = self._is_collided + infos["ball_pos"] = ball_pos.copy() + infos["action_cost"] = 5e-4 * action_cost - def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt): - if not ball_in_cup: - cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2) - else: - cost = 2 * dist_to_ctxt ** 2 - print('Context Distance:', dist_to_ctxt) - return cost + return reward, infos - def check_ball_in_cup(self, sim, ball_collision_id): - cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"] + def _check_collision_single_objects(self, sim, id_1, id_2): for coni in range(0, sim.data.ncon): con = sim.data.contact[coni] - collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id - collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id + collision = con.geom1 == id_1 and con.geom2 == id_2 + collision_trans = con.geom1 == id_2 and con.geom2 == id_1 if collision or collision_trans: return True return False - def check_collision(self, sim): + def _check_collision_with_itself(self, sim, collision_ids): + col_1, col_2 = False, False + for j, id in enumerate(collision_ids): + col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j]) + if j != len(collision_ids) - 1: + col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:]) + else: + col_2 = False + collision = True if col_1 or col_2 else False + return collision + + def _check_collision_with_set_of_objects(self, sim, id_1, id_list): for coni in range(0, sim.data.ncon): con = sim.data.contact[coni] - collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id - collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids + collision = con.geom1 in id_list and con.geom2 == id_1 + collision_trans = con.geom1 == id_1 and con.geom2 in id_list if collision or collision_trans: return True - return False + return False \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py new file mode 100644 index 0000000..9d1d878 --- /dev/null +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -0,0 +1,169 @@ +import numpy as np + + +class BeerPongReward: + def __init__(self): + + self.robot_collision_objects = ["wrist_palm_link_convex_geom", + "wrist_pitch_link_convex_decomposition_p1_geom", + "wrist_pitch_link_convex_decomposition_p2_geom", + "wrist_pitch_link_convex_decomposition_p3_geom", + "wrist_yaw_link_convex_decomposition_p1_geom", + "wrist_yaw_link_convex_decomposition_p2_geom", + "forearm_link_convex_decomposition_p1_geom", + "forearm_link_convex_decomposition_p2_geom", + "upper_arm_link_convex_decomposition_p1_geom", + "upper_arm_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p1_geom", + "shoulder_link_convex_decomposition_p2_geom", + "shoulder_link_convex_decomposition_p3_geom", + "base_link_convex_geom", "table_contact_geom"] + + self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6", + "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10", + # "cup_base_table", "cup_base_table_contact", + "cup_geom_table15", + "cup_geom_table16", + "cup_geom_table17", "cup_geom1_table8", + # "cup_base_table_contact", + # "cup_base_table" + ] + + + self.ball_traj = None + self.dists = None + self.dists_final = None + self.costs = None + self.action_costs = None + self.angle_rewards = None + self.cup_angles = None + self.cup_z_axes = None + self.collision_penalty = 500 + self.reset(None) + + def reset(self, noisy): + self.ball_traj = [] + self.dists = [] + self.dists_final = [] + self.costs = [] + self.action_costs = [] + self.angle_rewards = [] + self.cup_angles = [] + self.cup_z_axes = [] + self.ball_ground_contact = False + self.ball_table_contact = False + self.ball_wall_contact = False + self.ball_cup_contact = False + self.noisy_bp = noisy + self._t_min_final_dist = -1 + + def compute_reward(self, env, action): + self.ball_id = env.sim.model._body_name2id["ball"] + self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"] + self.goal_id = env.sim.model._site_name2id["cup_goal_table"] + self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"] + self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects] + self.cup_table_id = env.sim.model._body_name2id["cup_table"] + self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"] + self.wall_collision_id = env.sim.model._geom_name2id["wall"] + self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"] + self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"] + self.ground_collision_id = env.sim.model._geom_name2id["ground"] + self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects] + + goal_pos = env.sim.data.site_xpos[self.goal_id] + ball_pos = env.sim.data.body_xpos[self.ball_id] + ball_vel = env.sim.data.body_xvelp[self.ball_id] + goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] + self.dists.append(np.linalg.norm(goal_pos - ball_pos)) + self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) + + action_cost = np.sum(np.square(action)) + self.action_costs.append(action_cost) + + self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) + if env._steps == env.ep_length - 1 or self._is_collided: + + min_dist = np.min(self.dists) + ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.table_collision_id) + ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id, + self.cup_collision_ids) + ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.wall_collision_id) + ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.cup_table_collision_id) + if not ball_in_cup: + cost_offset = 2 + if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont: + cost_offset += 2 + cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost + else: + cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7 + + reward = - 1 * cost - self.collision_penalty * int(self._is_collided) + success = ball_in_cup + crash = self._is_collided + else: + reward = - 1e-7 * action_cost + cost = 0 + success = False + crash = False + + infos = {} + infos["success"] = success + infos["is_collided"] = self._is_collided + infos["ball_pos"] = ball_pos.copy() + infos["ball_vel"] = ball_vel.copy() + infos["action_cost"] = 5e-4 * action_cost + infos["task_cost"] = cost + + return reward, infos + + def get_cost_offset(self): + if self.ball_ground_contact: + return 200 + + if not self.ball_table_contact: + return 100 + + if not self.ball_in_cup: + return 50 + + if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp: + return 10 + + return 0 + + def _check_collision_single_objects(self, sim, id_1, id_2): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 == id_1 and con.geom2 == id_2 + collision_trans = con.geom1 == id_2 and con.geom2 == id_1 + + if collision or collision_trans: + return True + return False + + def _check_collision_with_itself(self, sim, collision_ids): + col_1, col_2 = False, False + for j, id in enumerate(collision_ids): + col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j]) + if j != len(collision_ids) - 1: + col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:]) + else: + col_2 = False + collision = True if col_1 or col_2 else False + return collision + + def _check_collision_with_set_of_objects(self, sim, id_1, id_list): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 in id_list and con.geom2 == id_1 + collision_trans = con.geom1 == id_1 and con.geom2 in id_list + + if collision or collision_trans: + return True + return False \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py new file mode 100644 index 0000000..87c6b7e --- /dev/null +++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py @@ -0,0 +1,39 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # TODO: @Max Filter observations correctly + return np.hstack([ + [False] * 7, # cos + [False] * 7, # sin + # [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) + + @property + def start_pos(self): + return self._start_pos + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qpos[0:7].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qvel[0:7].copy() + + @property + def goal_pos(self): + # TODO: @Max I think the default value of returning to the start is reasonable here + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl new file mode 100644 index 0000000..3b05c27 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl new file mode 100644 index 0000000..5ff94a2 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl new file mode 100644 index 0000000..c548448 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl new file mode 100644 index 0000000..495160d Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..b4bb322 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..7b2f001 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..f05174e Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl new file mode 100644 index 0000000..eb252d9 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl new file mode 100644 index 0000000..0a986fa Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c039f0d Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..250acaf Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl new file mode 100644 index 0000000..993d0f7 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl new file mode 100644 index 0000000..8448a3f Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl new file mode 100644 index 0000000..c36f88f Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl new file mode 100644 index 0000000..d00cac1 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl new file mode 100644 index 0000000..13d2f73 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl new file mode 100644 index 0000000..0d95239 Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl differ diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py new file mode 100644 index 0000000..c5e6d2f --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py new file mode 100644 index 0000000..86d5a00 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py @@ -0,0 +1,38 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # TODO: @Max Filter observations correctly + return np.hstack([ + [True] * 7, # Joint Pos + [True] * 3, # Ball pos + [True] * 3 # goal pos + ]) + + @property + def start_pos(self): + return self.self.init_qpos_tt + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qpos[:7].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qvel[:7].copy() + + @property + def goal_pos(self): + # TODO: @Max I think the default value of returning to the start is reasonable here + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py new file mode 100644 index 0000000..635d49d --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py @@ -0,0 +1,168 @@ +import os + +import numpy as np +import mujoco_py +from gym import utils, spaces +from gym.envs.mujoco import MujocoEnv + +from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init +from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward + +#TODO: Check for simulation stability. Make sure the code runs even for sim crash + +MAX_EPISODE_STEPS = 1375 +BALL_NAME_CONTACT = "target_ball_contact" +BALL_NAME = "target_ball" +TABLE_NAME = 'table_tennis_table' +FLOOR_NAME = 'floor' +PADDLE_CONTACT_1_NAME = 'bat' +PADDLE_CONTACT_2_NAME = 'bat_back' +RACKET_NAME = 'bat' +# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]]) +CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]]) +CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]]) + +class TT_Env_Gym(MujocoEnv, utils.EzPickle): + + def __init__(self, ctxt_dim=2): + model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml') + + self.ctxt_dim = ctxt_dim + if ctxt_dim == 2: + self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM + self.goal = np.zeros(3) # 2 x,y + 1z + elif ctxt_dim == 4: + self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM + self.goal = np.zeros(3) + else: + raise ValueError("either 2 or 4 dimensional Contexts available") + + action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) + action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) + self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') + + self.time_steps = 0 + self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0]) + self.init_qvel_tt = np.zeros(10) + + self.reward_func = TT_Reward(self.ctxt_dim) + self.ball_landing_pos = None + self.hited_ball = False + self.ball_contact_after_hit = False + self._ids_set = False + super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1) + self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. + self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] + self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] + self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME] + self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this + self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this + self.racket_id = self.sim.model._geom_name2id[RACKET_NAME] + + def _set_ids(self): + self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. + self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] + self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME] + self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this + self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this + self.racket_id = self.sim.model._geom_name2id[RACKET_NAME] + self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] + self._ids_set = True + + def _get_obs(self): + ball_pos = self.sim.data.body_xpos[self.ball_id] + obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions + ball_pos, + self.goal.copy()]) + return obs + + def sample_context(self): + return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim) + + def reset_model(self): + self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state + self.time_steps = 0 + self.ball_landing_pos = None + self.hited_ball = False + self.ball_contact_after_hit = False + self.goal = self.sample_context()[:2] + if self.ctxt_dim == 2: + initial_ball_state = ball_init(random=False) # fixed velocity, fixed position + elif self.ctxt_dim == 4: + initial_ball_state = ball_init(random=False)#raise NotImplementedError + + self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0]) + self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1]) + self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2]) + + self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3]) + self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4]) + self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5]) + + z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77])) + self.goal = z_extended_goal_pos + self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow + self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green + self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red + self.sim.forward() + + self.reward_func.reset(self.goal) # reset the reward function + return self._get_obs() + + def _contact_checker(self, id_1, id_2): + for coni in range(0, self.sim.data.ncon): + con = self.sim.data.contact[coni] + collision = con.geom1 == id_1 and con.geom2 == id_2 + collision_trans = con.geom1 == id_2 and con.geom2 == id_1 + if collision or collision_trans: + return True + return False + + def step(self, action): + if not self._ids_set: + self._set_ids() + done = False + episode_end = False if self.time_steps+1 + + + + + + + + + + + diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml new file mode 100644 index 0000000..7e04c28 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml new file mode 100644 index 0000000..c313489 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml new file mode 100644 index 0000000..19543e2 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml new file mode 100644 index 0000000..25d0366 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml new file mode 100644 index 0000000..e349992 --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml new file mode 100644 index 0000000..9609a6e --- /dev/null +++ b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + \ No newline at end of file