diff --git a/README.md b/README.md
index 2232efa..edd1aac 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,25 @@
## ALR Robotics Control Environments
-This project offers a large verity of reinforcement learning environments under a unifying interface base on OpenAI gym.
-Besides, some custom environments we also provide support for the benchmark suites
-[OpenAI gym](https://gym.openai.com/),
+This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
+Besides, we also provide support (under the OpenAI interface) for the benchmark suites
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
-(DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according
+(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we
-further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
+additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
we only consider the mean usually).
## Motion Primitive Environments (Episodic environments)
Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box
-optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full
+optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full
trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The
generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is,
however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position,
-velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
+velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this
-framework we support the above setting for the contextual setting, for which we expose all changing substates of the
+framework we support all of this also for the contextual setting, for which we expose all changing substates of the
task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each
-trajectory. All environments provide the next to the cumulative episode reward also all collected information from each
+trajectory. All environments provide next to the cumulative episode reward all collected information from each
step as part of the info dictionary. This information should, however, mainly be used for debugging and logging.
|Key| Description|
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 6853f0d..f835328 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -1,3 +1,4 @@
+import numpy as np
from gym import register
from . import classic_control, mujoco
@@ -9,6 +10,8 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .mujoco.reacher.alr_reacher import ALRReacherEnv
from .mujoco.reacher.balancing import BalancingEnv
+from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
+
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
# Classic Control
@@ -193,6 +196,31 @@ register(
}
)
+## Table Tennis
+register(id='TableTennis2DCtxt-v0',
+ entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+ max_episode_steps=MAX_EPISODE_STEPS,
+ kwargs={'ctxt_dim':2})
+
+register(id='TableTennis4DCtxt-v0',
+ entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+ max_episode_steps=MAX_EPISODE_STEPS,
+ kwargs={'ctxt_dim':4})
+
+## BeerPong
+difficulties = ["simple", "intermediate", "hard", "hardest"]
+
+for v, difficulty in enumerate(difficulties):
+ register(
+ id='ALRBeerPong-v{}'.format(v),
+ entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+ max_episode_steps=600,
+ kwargs={
+ "difficulty": difficulty,
+ "reward_type": "staged",
+ }
+ )
+
# Motion Primitive Environments
## Simple Reacher
@@ -330,3 +358,59 @@ for _v in _versions:
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+## Beerpong
+register(
+ id='BeerpongProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:ALRBeerPong-v0",
+ "wrappers": [mujoco.beerpong.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 7,
+ "num_basis": 2,
+ "n_zero_bases": 2,
+ "duration": 0.5,
+ "post_traj_time": 2.5,
+ # "width": 0.01,
+ # "off": 0.01,
+ "policy_type": "motor",
+ "weights_scale": 0.08,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
+ "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0")
+
+## Table Tennis
+register(
+ id='TableTennisProMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+ kwargs={
+ "name": "alr_envs:TableTennis4DCtxt-v0",
+ "wrappers": [mujoco.table_tennis.MPWrapper],
+ "mp_kwargs": {
+ "num_dof": 7,
+ "num_basis": 2,
+ "n_zero_bases": 2,
+ "duration": 1.25,
+ "post_traj_time": 4.5,
+ # "width": 0.01,
+ # "off": 0.01,
+ "policy_type": "motor",
+ "weights_scale": 1.0,
+ "zero_start": True,
+ "zero_goal": False,
+ "policy_kwargs": {
+ "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+ "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+ }
+ }
+ }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v0")
diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
index dd7321a..208f005 100644
--- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
+++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
@@ -106,7 +106,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
# self._tmp_hole_depth,
self.end_effector - self._goal,
self._steps
- ])
+ ]).astype(np.float32)
def _get_line_points(self, num_points_per_link=1):
theta = self._joint_angles[:, None]
diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
index 758f824..dac06a3 100644
--- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
+++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
@@ -73,7 +73,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
self._angle_velocity,
self.end_effector - self._goal,
self._steps
- ])
+ ]).astype(np.float32)
def _generate_goal(self):
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
index 3a1f0e5..292e40a 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
@@ -110,7 +110,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
self.end_effector - self._via_point,
self.end_effector - self._goal,
self._steps
- ])
+ ]).astype(np.float32)
def _check_collisions(self) -> bool:
return self._check_self_collision()
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 6744d15..30e1e7c 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -1,4 +1,6 @@
from .reacher.alr_reacher import ALRReacherEnv
from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
-from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
\ No newline at end of file
+from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
+from .table_tennis.tt_gym import TT_Env_Gym
+from .beerpong.beerpong import ALRBeerBongEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py
index e69de29..989b5a9 100644
--- a/alr_envs/alr/mujoco/beerpong/__init__.py
+++ b/alr_envs/alr/mujoco/beerpong/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
new file mode 100644
index 0000000..e96d2bc
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@@ -0,0 +1,187 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 4d8f389..a10e54a 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -1,3 +1,4 @@
+import mujoco_py.builder
import os
import numpy as np
@@ -5,44 +6,67 @@ from gym import utils
from gym.envs.mujoco import MujocoEnv
-
-class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
- def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
- utils.EzPickle.__init__(**locals())
- MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
+class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
+ def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
+ context: np.ndarray = None, difficulty='simple'):
self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
- "beerpong" + ".xml")
-
- self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
- self.start_vel = np.zeros(7)
-
- self._q_pos = []
- self._q_vel = []
- # self.weight_matrix_scale = 50
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
- self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
- self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
+ "beerpong_wo_cup" + ".xml")
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
- self.context = None
- # alr_mujoco_env.AlrMujocoEnv.__init__(self,
- # self.xml_path,
- # apply_gravity_comp=apply_gravity_comp,
- # n_substeps=n_substeps)
+ self.context = context
+ self.apply_gravity_comp = apply_gravity_comp
+ self.add_noise = noisy
- self.sim_time = 8 # seconds
- self.sim_steps = int(self.sim_time / self.dt)
- if reward_function is None:
- from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward
- reward_function = BeerpongReward
- self.reward_function = reward_function(self.sim, self.sim_steps)
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+ self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
+ self._start_vel = np.zeros(7)
+
+ self.ball_site_id = 0
+ self.ball_id = 11
+
+ self._release_step = 100 # time step of ball release
+
+ self.sim_time = 4 # seconds
+ self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt)
+ self.cup_table_id = 10
+
+ if noisy:
+ self.noise_std = 0.01
+ else:
+ self.noise_std = 0
+
+ if difficulty == 'simple':
+ self.cup_goal_pos = np.array([0, -1.7, 0.840])
+ elif difficulty == 'intermediate':
+ self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
+ elif difficulty == 'hard':
+ self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
+ elif difficulty == 'hardest':
+ self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
+
+ if reward_type == "no_context":
+ from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
+ reward_function = BeerPongReward
+ elif reward_type == "staged":
+ from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
+ reward_function = BeerPongReward
+ else:
+ raise ValueError("Unknown reward type: {}".format(reward_type))
+ self.reward_function = reward_function()
+
+ MujocoEnv.__init__(self, self.xml_path, frame_skip)
+ utils.EzPickle.__init__(self)
+
+ @property
+ def start_pos(self):
+ return self._start_pos
+
+ @property
+ def start_vel(self):
+ return self._start_vel
@property
def current_pos(self):
@@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
def current_vel(self):
return self.sim.data.qvel[0:7].copy()
- def configure(self, context):
- self.context = context
- self.reward_function.reset(context)
+ def reset(self):
+ self.reward_function.reset(self.add_noise)
+ return super().reset()
def reset_model(self):
init_pos_all = self.init_qpos.copy()
@@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
init_vel = np.zeros_like(init_pos_all)
self._steps = 0
- self._q_pos = []
- self._q_vel = []
start_pos = init_pos_all
start_pos[0:7] = init_pos_robot
- # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
self.set_state(start_pos, init_vel)
-
- ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
- self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
- self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
-
+ self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
+ start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+ self.set_state(start_pos, init_vel)
return self._get_obs()
def step(self, a):
@@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
angular_vel = 0.0
reward_ctrl = - np.square(a).sum()
- crash = self.do_simulation(a)
- joint_cons_viol = self.check_traj_in_joint_limits()
-
- self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
- self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
+ if self.apply_gravity_comp:
+ a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
+ try:
+ self.do_simulation(a, self.frame_skip)
+ if self._steps < self._release_step:
+ self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+ self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
+ elif self._steps == self._release_step and self.add_noise:
+ self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
+ crash = False
+ except mujoco_py.builder.MujocoException:
+ crash = True
+ # joint_cons_viol = self.check_traj_in_joint_limits()
ob = self._get_obs()
- if not crash and not joint_cons_viol:
- reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
- done = success or self._steps == self.sim_steps - 1 or stop_sim
+ if not crash:
+ reward, reward_infos = self.reward_function.compute_reward(self, a)
+ success = reward_infos['success']
+ is_collided = reward_infos['is_collided']
+ ball_pos = reward_infos['ball_pos']
+ ball_vel = reward_infos['ball_vel']
+ done = is_collided or self._steps == self.ep_length - 1
self._steps += 1
else:
- reward = -1000
+ reward = -30
success = False
+ is_collided = False
done = True
+ ball_pos = np.zeros(3)
+ ball_vel = np.zeros(3)
+
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl,
+ reward=reward,
velocity=angular_vel,
- traj=self._q_pos, is_success=success,
- is_collided=crash or joint_cons_viol)
+ # traj=self._q_pos,
+ action=a,
+ q_pos=self.sim.data.qpos[0:7].ravel().copy(),
+ q_vel=self.sim.data.qvel[0:7].ravel().copy(),
+ ball_pos=ball_pos,
+ ball_vel=ball_vel,
+ is_success=success,
+ is_collided=is_collided, sim_crash=crash)
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
- # TODO
+ # TODO: extend observation space
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
return np.concatenate([
@@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
])
# TODO
+ @property
def active_obs(self):
- pass
-
+ return np.hstack([
+ [False] * 7, # cos
+ [False] * 7, # sin
+ # [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
if __name__ == "__main__":
- env = ALRBeerpongEnv()
- ctxt = np.array([0, -2, 0.840]) # initial
+ env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest')
- env.configure(ctxt)
+ # env.configure(ctxt)
env.reset()
- env.render()
- for i in range(16000):
- # test with random actions
- ac = 0.0 * env.action_space.sample()[0:7]
- ac[1] = -0.8
- ac[3] = - 0.3
- ac[5] = -0.2
- # ac = env.start_pos
- # ac[0] += np.pi/2
+ env.render("human")
+ for i in range(800):
+ ac = 10 * env.action_space.sample()[0:7]
obs, rew, d, info = env.step(ac)
- env.render()
+ env.render("human")
print(rew)
@@ -147,4 +187,3 @@ if __name__ == "__main__":
break
env.close()
-
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
index c3ca3c6..3896e82 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
@@ -1,124 +1,169 @@
import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct
-class BeerpongReward(alr_reward_fct.AlrReward):
- def __init__(self, sim, sim_time):
+class BeerPongReward:
+ def __init__(self):
- self.sim = sim
- self.sim_time = sim_time
+ self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+ "wrist_pitch_link_convex_decomposition_p1_geom",
+ "wrist_pitch_link_convex_decomposition_p2_geom",
+ "wrist_pitch_link_convex_decomposition_p3_geom",
+ "wrist_yaw_link_convex_decomposition_p1_geom",
+ "wrist_yaw_link_convex_decomposition_p2_geom",
+ "forearm_link_convex_decomposition_p1_geom",
+ "forearm_link_convex_decomposition_p2_geom",
+ "upper_arm_link_convex_decomposition_p1_geom",
+ "upper_arm_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p1_geom",
+ "shoulder_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p3_geom",
+ "base_link_convex_geom", "table_contact_geom"]
- self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
+ self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+ "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+ # "cup_base_table", "cup_base_table_contact",
+ "cup_geom_table15",
+ "cup_geom_table16",
+ "cup_geom_table17", "cup_geom1_table8",
+ # "cup_base_table_contact",
+ # "cup_base_table"
+ ]
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
self.ball_traj = None
self.dists = None
- self.dists_ctxt = None
self.dists_final = None
self.costs = None
-
+ self.action_costs = None
+ self.angle_rewards = None
+ self.cup_angles = None
+ self.cup_z_axes = None
+ self.collision_penalty = 500
self.reset(None)
def reset(self, context):
- self.ball_traj = np.zeros(shape=(self.sim_time, 3))
+ self.ball_traj = []
self.dists = []
- self.dists_ctxt = []
self.dists_final = []
self.costs = []
- self.context = context
- self.ball_in_cup = False
- self.dist_ctxt = 5
+ self.action_costs = []
+ self.angle_rewards = []
+ self.cup_angles = []
+ self.cup_z_axes = []
+ self.ball_ground_contact = False
+ self.ball_table_contact = False
+ self.ball_wall_contact = False
+ self.ball_cup_contact = False
- self.ball_id = self.sim.model._body_name2id["ball"]
- self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
- self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
- self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
- self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
- self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
- self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+ def compute_reward(self, env, action):
+ self.ball_id = env.sim.model._body_name2id["ball"]
+ self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+ self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+ self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+ self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+ self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+ self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+ self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+ self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+ self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+ self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+ self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
- def compute_reward(self, action, sim, step):
- action_cost = np.sum(np.square(action))
-
- stop_sim = False
- success = False
-
- if self.check_collision(sim):
- reward = - 1e-4 * action_cost - 1000
- stop_sim = True
- return reward, success, stop_sim
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = sim.data.site_xpos[self.goal_id]
- ball_pos = sim.data.body_xpos[self.ball_id]
- goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+ goal_pos = env.sim.data.site_xpos[self.goal_id]
+ ball_pos = env.sim.data.body_xpos[self.ball_id]
+ goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
- self.ball_traj[step, :] = ball_pos
- # Determine the first time when ball is in cup
- if not self.ball_in_cup:
- ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
- self.ball_in_cup = ball_in_cup
- if ball_in_cup:
- dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
- self.dist_ctxt = dist_to_ctxt
+ action_cost = np.sum(np.square(action))
+ self.action_costs.append(action_cost)
+
+ ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.table_collision_id)
+
+ if ball_table_bounce: # or ball_cup_table_cont or ball_wall_con
+ self.ball_table_contact = True
+
+ ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+ self.cup_collision_ids)
+ if ball_cup_cont:
+ self.ball_cup_contact = True
+
+ ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
+ if ball_wall_cont and not self.ball_table_contact:
+ self.ball_wall_contact = True
+
+ ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.ground_collision_id)
+ if ball_ground_contact and not self.ball_table_contact:
+ self.ball_ground_contact = True
+
+ self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ if env._steps == env.ep_length - 1 or self._is_collided:
- if step == self.sim_time - 1:
min_dist = np.min(self.dists)
- dist_final = self.dists_final[-1]
- # dist_ctxt = self.dists_ctxt[-1]
- # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
- cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
- reward = np.exp(-1 * cost) - 1e-4 * action_cost
- success = dist_final < 0.05 and self.dist_ctxt < 0.05
+ ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
+
+ cost_offset = 0
+
+ if self.ball_ground_contact: # or self.ball_wall_contact:
+ cost_offset += 2
+
+ if not self.ball_table_contact:
+ cost_offset += 2
+
+ if not ball_in_cup:
+ cost_offset += 2
+ cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost # + min_dist ** 2
+ else:
+ if self.ball_cup_contact:
+ cost_offset += 1
+ cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
+
+ reward = - 1*cost - self.collision_penalty * int(self._is_collided)
+ success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
else:
reward = - 1e-4 * action_cost
success = False
- return reward, success, stop_sim
+ infos = {}
+ infos["success"] = success
+ infos["is_collided"] = self._is_collided
+ infos["ball_pos"] = ball_pos.copy()
+ infos["action_cost"] = 5e-4 * action_cost
- def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
- if not ball_in_cup:
- cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
- else:
- cost = 2 * dist_to_ctxt ** 2
- print('Context Distance:', dist_to_ctxt)
- return cost
+ return reward, infos
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
+ def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
if collision or collision_trans:
return True
return False
- def check_collision(self, sim):
+ def _check_collision_with_itself(self, sim, collision_ids):
+ col_1, col_2 = False, False
+ for j, id in enumerate(collision_ids):
+ col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+ if j != len(collision_ids) - 1:
+ col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+ else:
+ col_2 = False
+ collision = True if col_1 or col_2 else False
+ return collision
+
+ def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
for coni in range(0, sim.data.ncon):
con = sim.data.contact[coni]
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
+ collision = con.geom1 in id_list and con.geom2 == id_1
+ collision_trans = con.geom1 == id_1 and con.geom2 in id_list
if collision or collision_trans:
return True
- return False
+ return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
new file mode 100644
index 0000000..9d1d878
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -0,0 +1,169 @@
+import numpy as np
+
+
+class BeerPongReward:
+ def __init__(self):
+
+ self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+ "wrist_pitch_link_convex_decomposition_p1_geom",
+ "wrist_pitch_link_convex_decomposition_p2_geom",
+ "wrist_pitch_link_convex_decomposition_p3_geom",
+ "wrist_yaw_link_convex_decomposition_p1_geom",
+ "wrist_yaw_link_convex_decomposition_p2_geom",
+ "forearm_link_convex_decomposition_p1_geom",
+ "forearm_link_convex_decomposition_p2_geom",
+ "upper_arm_link_convex_decomposition_p1_geom",
+ "upper_arm_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p1_geom",
+ "shoulder_link_convex_decomposition_p2_geom",
+ "shoulder_link_convex_decomposition_p3_geom",
+ "base_link_convex_geom", "table_contact_geom"]
+
+ self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+ "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+ # "cup_base_table", "cup_base_table_contact",
+ "cup_geom_table15",
+ "cup_geom_table16",
+ "cup_geom_table17", "cup_geom1_table8",
+ # "cup_base_table_contact",
+ # "cup_base_table"
+ ]
+
+
+ self.ball_traj = None
+ self.dists = None
+ self.dists_final = None
+ self.costs = None
+ self.action_costs = None
+ self.angle_rewards = None
+ self.cup_angles = None
+ self.cup_z_axes = None
+ self.collision_penalty = 500
+ self.reset(None)
+
+ def reset(self, noisy):
+ self.ball_traj = []
+ self.dists = []
+ self.dists_final = []
+ self.costs = []
+ self.action_costs = []
+ self.angle_rewards = []
+ self.cup_angles = []
+ self.cup_z_axes = []
+ self.ball_ground_contact = False
+ self.ball_table_contact = False
+ self.ball_wall_contact = False
+ self.ball_cup_contact = False
+ self.noisy_bp = noisy
+ self._t_min_final_dist = -1
+
+ def compute_reward(self, env, action):
+ self.ball_id = env.sim.model._body_name2id["ball"]
+ self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+ self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+ self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+ self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+ self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+ self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+ self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+ self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+ self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+ self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+ self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+
+ goal_pos = env.sim.data.site_xpos[self.goal_id]
+ ball_pos = env.sim.data.body_xpos[self.ball_id]
+ ball_vel = env.sim.data.body_xvelp[self.ball_id]
+ goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+ self.dists.append(np.linalg.norm(goal_pos - ball_pos))
+ self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
+
+ action_cost = np.sum(np.square(action))
+ self.action_costs.append(action_cost)
+
+ self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+ if env._steps == env.ep_length - 1 or self._is_collided:
+
+ min_dist = np.min(self.dists)
+ ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.table_collision_id)
+ ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+ self.cup_collision_ids)
+ ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.wall_collision_id)
+ ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+ self.cup_table_collision_id)
+ if not ball_in_cup:
+ cost_offset = 2
+ if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont:
+ cost_offset += 2
+ cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost
+ else:
+ cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7
+
+ reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
+ success = ball_in_cup
+ crash = self._is_collided
+ else:
+ reward = - 1e-7 * action_cost
+ cost = 0
+ success = False
+ crash = False
+
+ infos = {}
+ infos["success"] = success
+ infos["is_collided"] = self._is_collided
+ infos["ball_pos"] = ball_pos.copy()
+ infos["ball_vel"] = ball_vel.copy()
+ infos["action_cost"] = 5e-4 * action_cost
+ infos["task_cost"] = cost
+
+ return reward, infos
+
+ def get_cost_offset(self):
+ if self.ball_ground_contact:
+ return 200
+
+ if not self.ball_table_contact:
+ return 100
+
+ if not self.ball_in_cup:
+ return 50
+
+ if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp:
+ return 10
+
+ return 0
+
+ def _check_collision_single_objects(self, sim, id_1, id_2):
+ for coni in range(0, sim.data.ncon):
+ con = sim.data.contact[coni]
+
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+
+ if collision or collision_trans:
+ return True
+ return False
+
+ def _check_collision_with_itself(self, sim, collision_ids):
+ col_1, col_2 = False, False
+ for j, id in enumerate(collision_ids):
+ col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+ if j != len(collision_ids) - 1:
+ col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+ else:
+ col_2 = False
+ collision = True if col_1 or col_2 else False
+ return collision
+
+ def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
+ for coni in range(0, sim.data.ncon):
+ con = sim.data.contact[coni]
+
+ collision = con.geom1 in id_list and con.geom2 == id_1
+ collision_trans = con.geom1 == id_1 and con.geom2 in id_list
+
+ if collision or collision_trans:
+ return True
+ return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
new file mode 100644
index 0000000..87c6b7e
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@@ -0,0 +1,39 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+ @property
+ def active_obs(self):
+ # TODO: @Max Filter observations correctly
+ return np.hstack([
+ [False] * 7, # cos
+ [False] * 7, # sin
+ # [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
+
+ @property
+ def start_pos(self):
+ return self._start_pos
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qpos[0:7].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qvel[0:7].copy()
+
+ @property
+ def goal_pos(self):
+ # TODO: @Max I think the default value of returning to the start is reasonable here
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
new file mode 100644
index 0000000..3b05c27
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
new file mode 100644
index 0000000..5ff94a2
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
new file mode 100644
index 0000000..c548448
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
new file mode 100644
index 0000000..495160d
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..b4bb322
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..7b2f001
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..f05174e
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
new file mode 100644
index 0000000..eb252d9
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
new file mode 100644
index 0000000..0a986fa
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c039f0d
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..250acaf
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..993d0f7
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
new file mode 100644
index 0000000..8448a3f
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c36f88f
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..d00cac1
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
new file mode 100644
index 0000000..13d2f73
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
new file mode 100644
index 0000000..0d95239
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
new file mode 100644
index 0000000..86d5a00
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@@ -0,0 +1,38 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+ @property
+ def active_obs(self):
+ # TODO: @Max Filter observations correctly
+ return np.hstack([
+ [True] * 7, # Joint Pos
+ [True] * 3, # Ball pos
+ [True] * 3 # goal pos
+ ])
+
+ @property
+ def start_pos(self):
+ return self.self.init_qpos_tt
+
+ @property
+ def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qpos[:7].copy()
+
+ @property
+ def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+ return self.sim.data.qvel[:7].copy()
+
+ @property
+ def goal_pos(self):
+ # TODO: @Max I think the default value of returning to the start is reasonable here
+ raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+ @property
+ def dt(self) -> Union[float, int]:
+ return self.env.dt
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
new file mode 100644
index 0000000..635d49d
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@@ -0,0 +1,168 @@
+import os
+
+import numpy as np
+import mujoco_py
+from gym import utils, spaces
+from gym.envs.mujoco import MujocoEnv
+
+from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
+from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
+
+#TODO: Check for simulation stability. Make sure the code runs even for sim crash
+
+MAX_EPISODE_STEPS = 1375
+BALL_NAME_CONTACT = "target_ball_contact"
+BALL_NAME = "target_ball"
+TABLE_NAME = 'table_tennis_table'
+FLOOR_NAME = 'floor'
+PADDLE_CONTACT_1_NAME = 'bat'
+PADDLE_CONTACT_2_NAME = 'bat_back'
+RACKET_NAME = 'bat'
+# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
+CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
+CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
+
+class TT_Env_Gym(MujocoEnv, utils.EzPickle):
+
+ def __init__(self, ctxt_dim=2):
+ model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
+
+ self.ctxt_dim = ctxt_dim
+ if ctxt_dim == 2:
+ self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
+ self.goal = np.zeros(3) # 2 x,y + 1z
+ elif ctxt_dim == 4:
+ self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
+ self.goal = np.zeros(3)
+ else:
+ raise ValueError("either 2 or 4 dimensional Contexts available")
+
+ action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
+ action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
+ self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
+
+ self.time_steps = 0
+ self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
+ self.init_qvel_tt = np.zeros(10)
+
+ self.reward_func = TT_Reward(self.ctxt_dim)
+ self.ball_landing_pos = None
+ self.hited_ball = False
+ self.ball_contact_after_hit = False
+ self._ids_set = False
+ super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1)
+ self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
+ self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+ self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+ self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+ self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
+ self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
+ self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+
+ def _set_ids(self):
+ self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
+ self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+ self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+ self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
+ self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
+ self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+ self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+ self._ids_set = True
+
+ def _get_obs(self):
+ ball_pos = self.sim.data.body_xpos[self.ball_id]
+ obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions
+ ball_pos,
+ self.goal.copy()])
+ return obs
+
+ def sample_context(self):
+ return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
+
+ def reset_model(self):
+ self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state
+ self.time_steps = 0
+ self.ball_landing_pos = None
+ self.hited_ball = False
+ self.ball_contact_after_hit = False
+ self.goal = self.sample_context()[:2]
+ if self.ctxt_dim == 2:
+ initial_ball_state = ball_init(random=False) # fixed velocity, fixed position
+ elif self.ctxt_dim == 4:
+ initial_ball_state = ball_init(random=False)#raise NotImplementedError
+
+ self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
+ self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
+ self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
+
+ self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
+ self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
+ self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
+
+ z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
+ self.goal = z_extended_goal_pos
+ self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow
+ self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green
+ self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red
+ self.sim.forward()
+
+ self.reward_func.reset(self.goal) # reset the reward function
+ return self._get_obs()
+
+ def _contact_checker(self, id_1, id_2):
+ for coni in range(0, self.sim.data.ncon):
+ con = self.sim.data.contact[coni]
+ collision = con.geom1 == id_1 and con.geom2 == id_2
+ collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+ if collision or collision_trans:
+ return True
+ return False
+
+ def step(self, action):
+ if not self._ids_set:
+ self._set_ids()
+ done = False
+ episode_end = False if self.time_steps+1
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
new file mode 100644
index 0000000..7e04c28
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
@@ -0,0 +1,103 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
new file mode 100644
index 0000000..c313489
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
new file mode 100644
index 0000000..19543e2
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
new file mode 100644
index 0000000..25d0366
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
@@ -0,0 +1,47 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
new file mode 100644
index 0000000..e349992
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
new file mode 100644
index 0000000..9609a6e
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/alr_envs/dmc/dmc_wrapper.py b/alr_envs/dmc/dmc_wrapper.py
index 10f1af9..aa6c7aa 100644
--- a/alr_envs/dmc/dmc_wrapper.py
+++ b/alr_envs/dmc/dmc_wrapper.py
@@ -15,10 +15,10 @@ def _spec_to_box(spec):
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
dim = int(np.prod(s.shape))
if type(s) == specs.Array:
- bound = np.inf * np.ones(dim, dtype=np.float32)
+ bound = np.inf * np.ones(dim, dtype=s.dtype)
return -bound, bound
elif type(s) == specs.BoundedArray:
- zeros = np.zeros(dim, dtype=np.float32)
+ zeros = np.zeros(dim, dtype=s.dtype)
return s.minimum + zeros, s.maximum + zeros
mins, maxs = [], []
@@ -29,7 +29,7 @@ def _spec_to_box(spec):
low = np.concatenate(mins, axis=0)
high = np.concatenate(maxs, axis=0)
assert low.shape == high.shape
- return spaces.Box(low, high, dtype=np.float32)
+ return spaces.Box(low, high, dtype=s.dtype)
def _flatten_obs(obs: collections.MutableMapping):
@@ -113,7 +113,7 @@ class DMCWrapper(core.Env):
if self._channels_first:
obs = obs.transpose(2, 0, 1).copy()
else:
- obs = _flatten_obs(time_step.observation)
+ obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
return obs
@property