diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 179839c..d9843c0 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -1,3 +1,4 @@
+import numpy as np
from gym import register
from . import classic_control, mujoco
@@ -193,6 +194,32 @@ register(
}
)
+## Table Tennis
+from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
+register(id='TableTennis2DCtxt-v0',
+ entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+ max_episode_steps=MAX_EPISODE_STEPS,
+ kwargs={'ctxt_dim':2})
+
+register(id='TableTennis4DCtxt-v0',
+ entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+ max_episode_steps=MAX_EPISODE_STEPS,
+ kwargs={'ctxt_dim':4})
+
+## BeerPong
+difficulties = ["simple", "intermediate", "hard", "hardest"]
+
+for v, difficulty in enumerate(difficulties):
+ register(
+ id='ALRBeerPong-v{}'.format(v),
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+ max_episode_steps=600,
+ kwargs={
+ "difficulty": difficulty,
+ "reward_type": "staged",
+ }
+ )
+
# Motion Primitive Environments
## Simple Reacher
@@ -327,3 +354,59 @@ for _v in _versions:
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id)
+
+## Beerpong
+register(
+ id='BeerpongDetPMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRBeerPong-v0",
+ "wrappers": [mujoco.beerpong.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 7,
+ "num_basis": 2,
+ "n_zero_bases": 2,
+ "duration": 0.5,
+ "post_traj_time": 2.5,
+ "width": 0.01,
+ "off": 0.01,
+ "policy_type": "motor",
+ "weights_scale": 0.08,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
+ "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("BeerpongDetPMP-v0")
+
+## Table Tennis
+register(
+ id='TableTennisDetPMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+ kwargs={
+ "name": "alr_envs:TableTennis4DCtxt-v0",
+ "wrappers": [mujoco.table_tennis.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 7,
+ "num_basis": 2,
+ "n_zero_bases": 2,
+ "duration": 1.25,
+ "post_traj_time": 4.5,
+ "width": 0.01,
+ "off": 0.01,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+ "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("TableTennisDetPMP-v0")
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 6744d15..30e1e7c 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -1,4 +1,6 @@
from .reacher.alr_reacher import ALRReacherEnv
from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
-from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
\ No newline at end of file
+from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
+from .table_tennis.tt_gym import TT_Env_Gym
+from .beerpong.beerpong import ALRBeerBongEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py
index e69de29..989b5a9 100644
--- a/alr_envs/alr/mujoco/beerpong/__init__.py
+++ b/alr_envs/alr/mujoco/beerpong/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
new file mode 100644
index 0000000..e96d2bc
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@@ -0,0 +1,187 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 4d8f389..a10e54a 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -1,3 +1,4 @@
+import mujoco_py.builder
import os
import numpy as np
@@ -5,44 +6,67 @@ from gym import utils
from gym.envs.mujoco import MujocoEnv
-
-class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
- utils.EzPickle.__init__(**locals())
- MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
+class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
+ context: np.ndarray = None, difficulty='simple'):
self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
- "beerpong" + ".xml")
-
- self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
- self.start_vel = np.zeros(7)
-
- self._q_pos = []
- self._q_vel = []
- # self.weight_matrix_scale = 50
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
- self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
- self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
+ "beerpong_wo_cup" + ".xml")
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
- self.context = None
- # alr_mujoco_env.AlrMujocoEnv.__init__(self,
- # self.xml_path,
- # apply_gravity_comp=apply_gravity_comp,
- # n_substeps=n_substeps)
+ self.context = context
+ self.apply_gravity_comp = apply_gravity_comp
+ self.add_noise = noisy
- self.sim_time = 8 # seconds
- self.sim_steps = int(self.sim_time / self.dt)
- if reward_function is None:
- from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward
- reward_function = BeerpongReward
- self.reward_function = reward_function(self.sim, self.sim_steps)
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+ self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
+ self._start_vel = np.zeros(7)
+
+ self.ball_site_id = 0
+ self.ball_id = 11
+
+ self._release_step = 100 # time step of ball release
+
+ self.sim_time = 4 # seconds
+ self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt)
+ self.cup_table_id = 10
+
+ if noisy:
+ self.noise_std = 0.01
+ else:
+ self.noise_std = 0
+
+ if difficulty == 'simple':
+ self.cup_goal_pos = np.array([0, -1.7, 0.840])
+ elif difficulty == 'intermediate':
+ self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
+ elif difficulty == 'hard':
+ self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
+ elif difficulty == 'hardest':
+ self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
+
+ if reward_type == "no_context":
+ from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
+ reward_function = BeerPongReward
+ elif reward_type == "staged":
+ from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
+ reward_function = BeerPongReward
+ else:
+ raise ValueError("Unknown reward type: {}".format(reward_type))
+ self.reward_function = reward_function()
+
+ MujocoEnv.__init__(self, self.xml_path, frame_skip)
+ utils.EzPickle.__init__(self)
+
+ @property
+ def start_pos(self):
+ return self._start_pos
+
+ @property
+ def start_vel(self):
+ return self._start_vel
@property
def current_pos(self):
@@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
def current_vel(self):
return self.sim.data.qvel[0:7].copy()
- def configure(self, context):
- self.context = context
- self.reward_function.reset(context)
+ def reset(self):
+ self.reward_function.reset(self.add_noise)
+ return super().reset()
def reset_model(self):
init_pos_all = self.init_qpos.copy()
@@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
init_vel = np.zeros_like(init_pos_all)
self._steps = 0
- self._q_pos = []
- self._q_vel = []
start_pos = init_pos_all
start_pos[0:7] = init_pos_robot
- # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
self.set_state(start_pos, init_vel)
-
- ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
- self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
- self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
-
+ self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
+ start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+ self.set_state(start_pos, init_vel)
return self._get_obs()
def step(self, a):
@@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
angular_vel = 0.0
reward_ctrl = - np.square(a).sum()
- crash = self.do_simulation(a)
- joint_cons_viol = self.check_traj_in_joint_limits()
-
- self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
- self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
+ if self.apply_gravity_comp:
+ a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
+ try:
+ self.do_simulation(a, self.frame_skip)
+ if self._steps < self._release_step:
+ self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+ self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
+ elif self._steps == self._release_step and self.add_noise:
+ self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
+ crash = False
+ except mujoco_py.builder.MujocoException:
+ crash = True
+ # joint_cons_viol = self.check_traj_in_joint_limits()
ob = self._get_obs()
- if not crash and not joint_cons_viol:
- reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
- done = success or self._steps == self.sim_steps - 1 or stop_sim
+ if not crash:
+ reward, reward_infos = self.reward_function.compute_reward(self, a)
+ success = reward_infos['success']
+ is_collided = reward_infos['is_collided']
+ ball_pos = reward_infos['ball_pos']
+ ball_vel = reward_infos['ball_vel']
+ done = is_collided or self._steps == self.ep_length - 1
self._steps += 1
else:
- reward = -1000
+ reward = -30
success = False
+ is_collided = False
done = True
+ ball_pos = np.zeros(3)
+ ball_vel = np.zeros(3)
+
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl,
+ reward=reward,
velocity=angular_vel,
- traj=self._q_pos, is_success=success,
- is_collided=crash or joint_cons_viol)
+ # traj=self._q_pos,
+ action=a,
+ q_pos=self.sim.data.qpos[0:7].ravel().copy(),
+ q_vel=self.sim.data.qvel[0:7].ravel().copy(),
+ ball_pos=ball_pos,
+ ball_vel=ball_vel,
+ is_success=success,
+ is_collided=is_collided, sim_crash=crash)
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
- # TODO
+ # TODO: extend observation space
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
return np.concatenate([
@@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
])
# TODO
+ @property
def active_obs(self):
- pass
-
+ return np.hstack([
+ [False] * 7, # cos
+ [False] * 7, # sin
+ # [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
if __name__ == "__main__":
- env = ALRBeerpongEnv()
- ctxt = np.array([0, -2, 0.840]) # initial
+ env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest')
- env.configure(ctxt)
+ # env.configure(ctxt)
env.reset()
- env.render()
- for i in range(16000):
- # test with random actions
- ac = 0.0 * env.action_space.sample()[0:7]
- ac[1] = -0.8
- ac[3] = - 0.3
- ac[5] = -0.2
- # ac = env.start_pos
- # ac[0] += np.pi/2
+ env.render("human")
+ for i in range(800):
+ ac = 10 * env.action_space.sample()[0:7]
obs, rew, d, info = env.step(ac)
- env.render()
+ env.render("human")
print(rew)
@@ -147,4 +187,3 @@ if __name__ == "__main__":
break
env.close()
-
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
index c3ca3c6..3896e82 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
@@ -1,124 +1,169 @@
import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct
-class BeerpongReward(alr_reward_fct.AlrReward):
- def __init__(self, sim, sim_time):
+class BeerPongReward:
+ def __init__(self):
- self.sim = sim
- self.sim_time = sim_time
+ self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+ "wrist_pitch_link_convex_decomposition_p1_geom",
+ "wrist_pitch_link_convex_decomposition_p2_geom",
+ "wrist_pitch_link_convex_decomposition_p3_geom",
+ "wrist_yaw_link_convex_decomposition_p1_geom",
+ "wrist_yaw_link_convex_decomposition_p2_geom",
+ "forearm_link_convex_decomposition_p1_geom",
+ "forearm_link_convex_decomposition_p2_geom",
+ "upper_arm_link_convex_decomposition_p1_geom",
+ "upper_arm_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p1_geom",
+ "shoulder_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p3_geom",
+ "base_link_convex_geom", "table_contact_geom"]
- self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
+ self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+ "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+ # "cup_base_table", "cup_base_table_contact",
+ "cup_geom_table15",
+ "cup_geom_table16",
+ "cup_geom_table17", "cup_geom1_table8",
+ # "cup_base_table_contact",
+ # "cup_base_table"
+ ]
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
self.ball_traj = None
self.dists = None
- self.dists_ctxt = None
self.dists_final = None
self.costs = None
-
+ self.action_costs = None
+ self.angle_rewards = None
+ self.cup_angles = None
+ self.cup_z_axes = None
+ self.collision_penalty = 500
self.reset(None)
def reset(self, context):
- self.ball_traj = np.zeros(shape=(self.sim_time, 3))
+ self.ball_traj = []
self.dists = []
- self.dists_ctxt = []
self.dists_final = []
self.costs = []
- self.context = context
- self.ball_in_cup = False
- self.dist_ctxt = 5
+ self.action_costs = []
+ self.angle_rewards = []
+ self.cup_angles = []
+ self.cup_z_axes = []
+ self.ball_ground_contact = False
+ self.ball_table_contact = False
+ self.ball_wall_contact = False
+ self.ball_cup_contact = False
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
- self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+ def compute_reward(self, env, action):
+ self.ball_id = env.sim.model._body_name2id["ball"]
+ self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+ self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+ self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+ self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+ self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+ self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+ self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+ self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+ self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+ self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+ self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
- def compute_reward(self, action, sim, step):
- action_cost = np.sum(np.square(action))
-
- stop_sim = False
- success = False
-
- if self.check_collision(sim):
- reward = - 1e-4 * action_cost - 1000
- stop_sim = True
- return reward, success, stop_sim
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = sim.data.site_xpos[self.goal_id]
- ball_pos = sim.data.body_xpos[self.ball_id]
- goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+ goal_pos = env.sim.data.site_xpos[self.goal_id]
+ ball_pos = env.sim.data.body_xpos[self.ball_id]
+ goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
- self.ball_traj[step, :] = ball_pos
- # Determine the first time when ball is in cup
- if not self.ball_in_cup:
- ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
- self.ball_in_cup = ball_in_cup
- if ball_in_cup:
- dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
- self.dist_ctxt = dist_to_ctxt
+ action_cost = np.sum(np.square(action))
+ self.action_costs.append(action_cost)
+
+ ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.table_collision_id)
+
+ if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con
+ self.ball_table_contact = True
+
+ ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+ self.cup_collision_ids)
+ if ball_cup_cont:
+ self.ball_cup_contact = True
+
+ ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
+ if ball_wall_cont and not self.ball_table_contact:
+ self.ball_wall_contact = True
+
+ ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.ground_collision_id)
+ if ball_ground_contact and not self.ball_table_contact:
+ self.ball_ground_contact = True
+
+ self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ if env._steps == env.ep_length - 1 or self._is_collided:
- if step == self.sim_time - 1:
min_dist = np.min(self.dists)
- dist_final = self.dists_final[-1]
- # dist_ctxt = self.dists_ctxt[-1]
- # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
- cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
- reward = np.exp(-1 * cost) - 1e-4 * action_cost
- success = dist_final < 0.05 and self.dist_ctxt < 0.05
+ ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
+
+ cost_offset = 0
+
+ if self.ball_ground_contact: # or self.ball_wall_contact:
+ cost_offset += 2
+
+ if not self.ball_table_contact:
+ cost_offset += 2
+
+ if not ball_in_cup:
+ cost_offset += 2
+ cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2
+ else:
+ if self.ball_cup_contact:
+ cost_offset += 1
+ cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
+
+ reward = - 1*cost - self.collision_penalty * int(self._is_collided)
+ success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
else:
reward = - 1e-4 * action_cost
success = False
- return reward, success, stop_sim
+ infos = {}
+ infos["success"] = success
+ infos["is_collided"] = self._is_collided
+ infos["ball_pos"] = ball_pos.copy()
+ infos["action_cost"] = 5e-4 * action_cost
- def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
- if not ball_in_cup:
- cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
- else:
- cost = 2 * dist_to_ctxt ** 2
- print('Context Distance:', dist_to_ctxt)
- return cost
+ return reward, infos
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
+ def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
if collision or collision_trans:
return True
return False
- def check_collision(self, sim):
+ def _check_collision_with_itself(self, sim, collision_ids):
+ col_1, col_2 = False, False
+ for j, id in enumerate(collision_ids):
+ col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+ if j != len(collision_ids) - 1:
+ col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+ else:
+ col_2 = False
+ collision = True if col_1 or col_2 else False
+ return collision
+
+ def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
+ collision = con.geom1 in id_list and con.geom2 == id_1
+ collision_trans = con.geom1 == id_1 and con.geom2 in id_list
if collision or collision_trans:
return True
- return False
+ return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
new file mode 100644
index 0000000..9d1d878
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -0,0 +1,169 @@
+import numpy as np
+
+
+class BeerPongReward:
+ def __init__(self):
+
+ self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+ "wrist_pitch_link_convex_decomposition_p1_geom",
+ "wrist_pitch_link_convex_decomposition_p2_geom",
+ "wrist_pitch_link_convex_decomposition_p3_geom",
+ "wrist_yaw_link_convex_decomposition_p1_geom",
+ "wrist_yaw_link_convex_decomposition_p2_geom",
+ "forearm_link_convex_decomposition_p1_geom",
+ "forearm_link_convex_decomposition_p2_geom",
+ "upper_arm_link_convex_decomposition_p1_geom",
+ "upper_arm_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p1_geom",
+ "shoulder_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p3_geom",
+ "base_link_convex_geom", "table_contact_geom"]
+
+ self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+ "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+ # "cup_base_table", "cup_base_table_contact",
+ "cup_geom_table15",
+ "cup_geom_table16",
+ "cup_geom_table17", "cup_geom1_table8",
+ # "cup_base_table_contact",
+ # "cup_base_table"
+ ]
+
+
+ self.ball_traj = None
+ self.dists = None
+ self.dists_final = None
+ self.costs = None
+ self.action_costs = None
+ self.angle_rewards = None
+ self.cup_angles = None
+ self.cup_z_axes = None
+ self.collision_penalty = 500
+ self.reset(None)
+
+ def reset(self, noisy):
+ self.ball_traj = []
+ self.dists = []
+ self.dists_final = []
+ self.costs = []
+ self.action_costs = []
+ self.angle_rewards = []
+ self.cup_angles = []
+ self.cup_z_axes = []
+ self.ball_ground_contact = False
+ self.ball_table_contact = False
+ self.ball_wall_contact = False
+ self.ball_cup_contact = False
+ self.noisy_bp = noisy
+ self._t_min_final_dist = -1
+
+ def compute_reward(self, env, action):
+ self.ball_id = env.sim.model._body_name2id["ball"]
+ self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+ self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+ self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+ self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+ self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+ self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+ self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+ self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+ self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+ self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+ self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+
+ goal_pos = env.sim.data.site_xpos[self.goal_id]
+ ball_pos = env.sim.data.body_xpos[self.ball_id]
+ ball_vel = env.sim.data.body_xvelp[self.ball_id]
+ goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+ self.dists.append(np.linalg.norm(goal_pos - ball_pos))
+ self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
+
+ action_cost = np.sum(np.square(action))
+ self.action_costs.append(action_cost)
+
+ self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ if env._steps == env.ep_length - 1 or self._is_collided:
+
+ min_dist = np.min(self.dists)
+ ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.table_collision_id)
+ ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+ self.cup_collision_ids)
+ ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.wall_collision_id)
+ ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.cup_table_collision_id)
+ if not ball_in_cup:
+ cost_offset = 2
+ if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont:
+ cost_offset += 2
+ cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost
+ else:
+ cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7
+
+ reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
+ success = ball_in_cup
+ crash = self._is_collided
+ else:
+ reward = - 1e-7 * action_cost
+ cost = 0
+ success = False
+ crash = False
+
+ infos = {}
+ infos["success"] = success
+ infos["is_collided"] = self._is_collided
+ infos["ball_pos"] = ball_pos.copy()
+ infos["ball_vel"] = ball_vel.copy()
+ infos["action_cost"] = 5e-4 * action_cost
+ infos["task_cost"] = cost
+
+ return reward, infos
+
+ def get_cost_offset(self):
+ if self.ball_ground_contact:
+ return 200
+
+ if not self.ball_table_contact:
+ return 100
+
+ if not self.ball_in_cup:
+ return 50
+
+ if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp:
+ return 10
+
+ return 0
+
+ def _check_collision_single_objects(self, sim, id_1, id_2):
+ for coni in range(0, sim.data.ncon):
+ con = sim.data.contact[coni]
+
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+
+ if collision or collision_trans:
+ return True
+ return False
+
+ def _check_collision_with_itself(self, sim, collision_ids):
+ col_1, col_2 = False, False
+ for j, id in enumerate(collision_ids):
+ col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+ if j != len(collision_ids) - 1:
+ col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+ else:
+ col_2 = False
+ collision = True if col_1 or col_2 else False
+ return collision
+
+ def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
+ for coni in range(0, sim.data.ncon):
+ con = sim.data.contact[coni]
+
+ collision = con.geom1 in id_list and con.geom2 == id_1
+ collision_trans = con.geom1 == id_1 and con.geom2 in id_list
+
+ if collision or collision_trans:
+ return True
+ return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
new file mode 100644
index 0000000..87c6b7e
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@@ -0,0 +1,39 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+ @property
+ def active_obs(self):
+ # TODO: @Max Filter observations correctly
+ return np.hstack([
+ [False] * 7, # cos
+ [False] * 7, # sin
+ # [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
+
+ @property
+ def start_pos(self):
+ return self._start_pos
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qpos[0:7].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qvel[0:7].copy()
+
+ @property
+ def goal_pos(self):
+ # TODO: @Max I think the default value of returning to the start is reasonable here
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
new file mode 100644
index 0000000..3b05c27
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
new file mode 100644
index 0000000..5ff94a2
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
new file mode 100644
index 0000000..c548448
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
new file mode 100644
index 0000000..495160d
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..b4bb322
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..7b2f001
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..f05174e
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
new file mode 100644
index 0000000..eb252d9
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
new file mode 100644
index 0000000..0a986fa
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c039f0d
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..250acaf
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..993d0f7
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
new file mode 100644
index 0000000..8448a3f
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c36f88f
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..d00cac1
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
new file mode 100644
index 0000000..13d2f73
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
new file mode 100644
index 0000000..0d95239
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
new file mode 100644
index 0000000..86d5a00
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@@ -0,0 +1,38 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+ @property
+ def active_obs(self):
+ # TODO: @Max Filter observations correctly
+ return np.hstack([
+ [True] * 7, # Joint Pos
+ [True] * 3, # Ball pos
+ [True] * 3 # goal pos
+ ])
+
+ @property
+ def start_pos(self):
+ return self.self.init_qpos_tt
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qpos[:7].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qvel[:7].copy()
+
+ @property
+ def goal_pos(self):
+ # TODO: @Max I think the default value of returning to the start is reasonable here
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
new file mode 100644
index 0000000..635d49d
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@@ -0,0 +1,168 @@
+import os
+
+import numpy as np
+import mujoco_py
+from gym import utils, spaces
+from gym.envs.mujoco import MujocoEnv
+
+from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
+from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
+
+#TODO: Check for simulation stability. Make sure the code runs even for sim crash
+
+MAX_EPISODE_STEPS = 1375
+BALL_NAME_CONTACT = "target_ball_contact"
+BALL_NAME = "target_ball"
+TABLE_NAME = 'table_tennis_table'
+FLOOR_NAME = 'floor'
+PADDLE_CONTACT_1_NAME = 'bat'
+PADDLE_CONTACT_2_NAME = 'bat_back'
+RACKET_NAME = 'bat'
+# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
+CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
+CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
+
+class TT_Env_Gym(MujocoEnv, utils.EzPickle):
+
+ def __init__(self, ctxt_dim=2):
+ model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
+
+ self.ctxt_dim = ctxt_dim
+ if ctxt_dim == 2:
+ self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
+ self.goal = np.zeros(3) # 2 x,y + 1z
+ elif ctxt_dim == 4:
+ self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
+ self.goal = np.zeros(3)
+ else:
+ raise ValueError("either 2 or 4 dimensional Contexts available")
+
+ action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
+ action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
+ self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
+
+ self.time_steps = 0
+ self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
+ self.init_qvel_tt = np.zeros(10)
+
+ self.reward_func = TT_Reward(self.ctxt_dim)
+ self.ball_landing_pos = None
+ self.hited_ball = False
+ self.ball_contact_after_hit = False
+ self._ids_set = False
+ super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1)
+ self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
+ self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+ self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+ self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+ self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
+ self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
+ self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+
+ def _set_ids(self):
+ self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
+ self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+ self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+ self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
+ self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
+ self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+ self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+ self._ids_set = True
+
+ def _get_obs(self):
+ ball_pos = self.sim.data.body_xpos[self.ball_id]
+ obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions
+ ball_pos,
+ self.goal.copy()])
+ return obs
+
+ def sample_context(self):
+ return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
+
+ def reset_model(self):
+ self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state
+ self.time_steps = 0
+ self.ball_landing_pos = None
+ self.hited_ball = False
+ self.ball_contact_after_hit = False
+ self.goal = self.sample_context()[:2]
+ if self.ctxt_dim == 2:
+ initial_ball_state = ball_init(random=False) # fixed velocity, fixed position
+ elif self.ctxt_dim == 4:
+ initial_ball_state = ball_init(random=False)#raise NotImplementedError
+
+ self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
+ self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
+ self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
+
+ self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
+ self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
+ self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
+
+ z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
+ self.goal = z_extended_goal_pos
+ self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow
+ self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green
+ self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red
+ self.sim.forward()
+
+ self.reward_func.reset(self.goal) # reset the reward function
+ return self._get_obs()
+
+ def _contact_checker(self, id_1, id_2):
+ for coni in range(0, self.sim.data.ncon):
+ con = self.sim.data.contact[coni]
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+ if collision or collision_trans:
+ return True
+ return False
+
+ def step(self, action):
+ if not self._ids_set:
+ self._set_ids()
+ done = False
+ episode_end = False if self.time_steps+1
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
new file mode 100644
index 0000000..7e04c28
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
@@ -0,0 +1,103 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
new file mode 100644
index 0000000..c313489
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
new file mode 100644
index 0000000..19543e2
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
new file mode 100644
index 0000000..25d0366
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
@@ -0,0 +1,47 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
new file mode 100644
index 0000000..e349992
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
new file mode 100644
index 0000000..9609a6e
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file