Merge branch 'master' into deprecate_detpmp

# Conflicts: # README.md # alr_envs/alr/__init__.py
2021-12-01 16:34:18 +01:00 · 2021-12-01 16:34:18 +01:00 · 083e937e17
commit 083e937e17
parent 8b88ce3476 22ac9a0317
42 changed files with 1277 additions and 165 deletions
--- a/README.md
+++ b/README.md
@ -1,26 +1,25 @@
 ## ALR Robotics Control Environments

-This project offers a large verity of reinforcement learning environments under a unifying interface base on OpenAI gym.
-Besides, some custom environments we also provide support for the benchmark suites
-[OpenAI gym](https://gym.openai.com/),
+This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
+Besides, we also provide support (under the OpenAI interface) for the benchmark suites
 [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
-(DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according
+(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
 to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we
-further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
+additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
 we only consider the mean usually).

 ## Motion Primitive Environments (Episodic environments)

 Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box
-optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full
+optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full
 trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The
 generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is,
 however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position,
-velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
+velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
 represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this
-framework we support the above setting for the contextual setting, for which we expose all changing substates of the
+framework we support all of this also for the contextual setting, for which we expose all changing substates of the
 task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each
-trajectory. All environments provide the next to the cumulative episode reward also all collected information from each
+trajectory. All environments provide next to the cumulative episode reward all collected information from each
 step as part of the info dictionary. This information should, however, mainly be used for debugging and logging.

 |Key| Description|
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -1,3 +1,4 @@
+import numpy as np
 from gym import register

 from . import classic_control, mujoco
@ -9,6 +10,8 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
 from .mujoco.reacher.alr_reacher import ALRReacherEnv
 from .mujoco.reacher.balancing import BalancingEnv

+from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
+
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}

 # Classic Control
@ -193,6 +196,31 @@ register(
    }
 )

+## Table Tennis
+register(id='TableTennis2DCtxt-v0',
+         entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+         max_episode_steps=MAX_EPISODE_STEPS,
+         kwargs={'ctxt_dim':2})
+
+register(id='TableTennis4DCtxt-v0',
+         entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+         max_episode_steps=MAX_EPISODE_STEPS,
+         kwargs={'ctxt_dim':4})
+
+## BeerPong
+difficulties = ["simple", "intermediate", "hard", "hardest"]
+
+for v, difficulty in enumerate(difficulties):
+    register(
+        id='ALRBeerPong-v{}'.format(v),
+        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+        max_episode_steps=600,
+        kwargs={
+            "difficulty": difficulty,
+            "reward_type": "staged",
+        }
+    )
+
 # Motion Primitive Environments

 ## Simple Reacher
@ -330,3 +358,59 @@ for _v in _versions:
        }
    )
    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+## Beerpong
+register(
+    id='BeerpongProMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+    kwargs={
+        "name": "alr_envs:ALRBeerPong-v0",
+        "wrappers": [mujoco.beerpong.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 7,
+            "num_basis": 2,
+            "n_zero_bases": 2,
+            "duration": 0.5,
+            "post_traj_time": 2.5,
+            # "width": 0.01,
+            # "off": 0.01,
+            "policy_type": "motor",
+            "weights_scale": 0.08,
+            "zero_start": True,
+            "zero_goal": False,
+            "policy_kwargs": {
+                "p_gains": np.array([       1.5,   5,   2.55,    3,   2.,    2,   1.25]),
+                "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+            }
+        }
+    }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0")
+
+## Table Tennis
+register(
+    id='TableTennisProMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+    kwargs={
+        "name": "alr_envs:TableTennis4DCtxt-v0",
+        "wrappers": [mujoco.table_tennis.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 7,
+            "num_basis": 2,
+            "n_zero_bases": 2,
+            "duration": 1.25,
+            "post_traj_time": 4.5,
+            # "width": 0.01,
+            # "off": 0.01,
+            "policy_type": "motor",
+            "weights_scale": 1.0,
+            "zero_start": True,
+            "zero_goal": False,
+            "policy_kwargs": {
+                "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+                "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+            }
+        }
+    }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v0")
--- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
+++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
@ -106,7 +106,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
            # self._tmp_hole_depth,
            self.end_effector - self._goal,
            self._steps
-        ])
+        ]).astype(np.float32)

    def _get_line_points(self, num_points_per_link=1):
        theta = self._joint_angles[:, None]
--- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
+++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
@ -73,7 +73,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
            self._angle_velocity,
            self.end_effector - self._goal,
            self._steps
-        ])
+        ]).astype(np.float32)

    def _generate_goal(self):

--- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
@ -110,7 +110,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
            self.end_effector - self._via_point,
            self.end_effector - self._goal,
            self._steps
-        ])
+        ]).astype(np.float32)

    def _check_collisions(self) -> bool:
        return self._check_self_collision()
--- a/alr_envs/alr/mujoco/init.py
+++ b/alr_envs/alr/mujoco/init.py
@ -2,3 +2,5 @@ from .reacher.alr_reacher import ALRReacherEnv
 from .reacher.balancing import BalancingEnv
 from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
 from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
+from .table_tennis.tt_gym import TT_Env_Gym
+from .beerpong.beerpong import ALRBeerBongEnv
--- a/alr_envs/alr/mujoco/beerpong/init.py
+++ b/alr_envs/alr/mujoco/beerpong/init.py
@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@ -0,0 +1,187 @@
+<mujoco model="wam(v1.31)">
+    <compiler angle="radian" meshdir="../../meshes/wam/" />
+    <option timestep="0.005" integrator="Euler" />
+    <size njmax="500" nconmax="100" />
+    <default class="main">
+        <joint limited="true" frictionloss="0.001" damping="0.07"/>
+        <default class="viz">
+            <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
+        </default>
+        <default class="col">
+            <geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
+        </default>
+    </default>
+    <asset>
+        <texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
+        <material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
+        <mesh name="base_link_fine" file="base_link_fine.stl" />
+        <mesh name="base_link_convex" file="base_link_convex.stl" />
+        <mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
+        <mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
+        <mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
+        <mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
+        <mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
+        <mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
+        <mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
+        <mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
+        <mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
+        <mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
+        <mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
+        <mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
+        <mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
+        <mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
+        <mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
+        <mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
+    </asset>
+
+    <worldbody>
+        <geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
+        <light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
+
+        <body name="wam/base_link" pos="0 0 0.6">
+            <inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
+            <geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
+            <geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
+            <body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
+                <inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
+                <joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
+                <geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
+                <body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
+                    <inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
+                    <joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
+                    <geom class="viz" mesh="shoulder_pitch_link_fine" />
+                    <geom class="col" mesh="shoulder_pitch_link_convex" />
+                    <body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
+                        <inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
+                        <joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
+                        <geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
+                        <geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
+                        <geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
+                        <body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
+                            <inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
+                            <joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
+                            <geom class="viz" mesh="elbow_link_fine" />
+                            <geom class="col" mesh="elbow_link_convex" />
+                            <geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
+                            <geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
+                            <geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
+                            <body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
+                                <inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
+                                <joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
+                                <geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
+                                <geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
+                                <geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
+                                <body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
+                                    <inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
+                                    <joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
+                                    <geom class="viz" mesh="wrist_pitch_link_fine" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
+                                    <body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
+                                        <inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
+                                        <joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
+                                        <geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
+                                        <geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
+                                        <site name="init_ball_pos_site" size="0.025 0.025 0.025" pos="0.0 0.0 0.035" rgba="0 1 0 1"/>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+        <body name="table_body" pos="0 -1.85 0.4025">
+            <geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+            <geom name="table_contact_geom" type="box" size="0.4 0.6 0.1" pos="0 0 0.31" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+        </body>
+        <geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+        <geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+
+        <body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
+            <inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
+            <geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
+            <geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
+            <geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.07 0.01" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
+            <geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
+            <geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
+            <site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
+            <site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
+        </body>
+        <body name="ball" pos="0 0 0">
+            <joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
+                  friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
+            <site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
+        </body>
+<!--        <camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>-->
+<!--        <camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />-->
+        <camera name="visualization" mode="fixed" euler="2.35 2.0 -0.75" pos="2.0 -0.0 1.85"/>
+    </worldbody>
+     <actuator>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="wam/shoulder_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="50.0" joint="wam/shoulder_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
+    </actuator>
+
+</mujoco>
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@ -1,3 +1,4 @@
+import mujoco_py.builder
 import os

 import numpy as np
@ -5,44 +6,67 @@ from gym import utils
 from gym.envs.mujoco import MujocoEnv


-
-class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
-    def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
-        utils.EzPickle.__init__(**locals())
-        MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
+class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
+    def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
+                 context: np.ndarray = None, difficulty='simple'):
        self._steps = 0

        self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
-                                     "beerpong" + ".xml")
-
-        self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
-        self.start_vel = np.zeros(7)
-
-        self._q_pos = []
-        self._q_vel = []
-        # self.weight_matrix_scale = 50
-        self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-        self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
-        self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
+                                     "beerpong_wo_cup" + ".xml")

        self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
        self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])

-        self.context = None
-        # alr_mujoco_env.AlrMujocoEnv.__init__(self,
-        #                                      self.xml_path,
-        #                                      apply_gravity_comp=apply_gravity_comp,
-        #                                      n_substeps=n_substeps)
+        self.context = context
+        self.apply_gravity_comp = apply_gravity_comp
+        self.add_noise = noisy

-        self.sim_time = 8  # seconds
-        self.sim_steps = int(self.sim_time / self.dt)
-        if reward_function is None:
-            from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward
-            reward_function = BeerpongReward
-        self.reward_function = reward_function(self.sim, self.sim_steps)
-        self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
-        self.ball_id = self.sim.model._body_name2id["ball"]
-        self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+        self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
+        self._start_vel = np.zeros(7)
+
+        self.ball_site_id = 0
+        self.ball_id = 11
+
+        self._release_step = 100  # time step of ball release
+
+        self.sim_time = 4  # seconds
+        self.ep_length = 600  # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt)
+        self.cup_table_id = 10
+
+        if noisy:
+            self.noise_std = 0.01
+        else:
+            self.noise_std = 0
+
+        if difficulty == 'simple':
+            self.cup_goal_pos = np.array([0, -1.7, 0.840])
+        elif difficulty == 'intermediate':
+            self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
+        elif difficulty == 'hard':
+            self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
+        elif difficulty == 'hardest':
+            self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
+
+        if reward_type == "no_context":
+            from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
+            reward_function = BeerPongReward
+        elif reward_type == "staged":
+            from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
+            reward_function = BeerPongReward
+        else:
+            raise ValueError("Unknown reward type: {}".format(reward_type))
+        self.reward_function = reward_function()
+
+        MujocoEnv.__init__(self, self.xml_path, frame_skip)
+        utils.EzPickle.__init__(self)
+
+    @property
+    def start_pos(self):
+        return self._start_pos
+
+    @property
+    def start_vel(self):
+        return self._start_vel

    @property
    def current_pos(self):
@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
    def current_vel(self):
        return self.sim.data.qvel[0:7].copy()

-    def configure(self, context):
-        self.context = context
-        self.reward_function.reset(context)
+    def reset(self):
+        self.reward_function.reset(self.add_noise)
+        return super().reset()

    def reset_model(self):
        init_pos_all = self.init_qpos.copy()
@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
        init_vel = np.zeros_like(init_pos_all)

        self._steps = 0
-        self._q_pos = []
-        self._q_vel = []

        start_pos = init_pos_all
        start_pos[0:7] = init_pos_robot
-        # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])

        self.set_state(start_pos, init_vel)
-
-        ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
-        self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
-        self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
-
+        self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
+        start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+        self.set_state(start_pos, init_vel)
        return self._get_obs()

    def step(self, a):
@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
        angular_vel = 0.0
        reward_ctrl = - np.square(a).sum()

-        crash = self.do_simulation(a)
-        joint_cons_viol = self.check_traj_in_joint_limits()
-
-        self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
-        self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
+        if self.apply_gravity_comp:
+            a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
+        try:
+            self.do_simulation(a, self.frame_skip)
+            if self._steps < self._release_step:
+                self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+                self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
+            elif self._steps == self._release_step and self.add_noise:
+                 self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
+            crash = False
+        except mujoco_py.builder.MujocoException:
+            crash = True
+        # joint_cons_viol = self.check_traj_in_joint_limits()

        ob = self._get_obs()

-        if not crash and not joint_cons_viol:
-            reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
-            done = success or self._steps == self.sim_steps - 1 or stop_sim
+        if not crash:
+            reward, reward_infos = self.reward_function.compute_reward(self, a)
+            success = reward_infos['success']
+            is_collided = reward_infos['is_collided']
+            ball_pos = reward_infos['ball_pos']
+            ball_vel = reward_infos['ball_vel']
+            done = is_collided or self._steps == self.ep_length - 1
            self._steps += 1
        else:
-            reward = -1000
+            reward = -30
            success = False
+            is_collided = False
            done = True
+            ball_pos = np.zeros(3)
+            ball_vel = np.zeros(3)
+
        return ob, reward, done, dict(reward_dist=reward_dist,
                                      reward_ctrl=reward_ctrl,
+                                      reward=reward,
                                      velocity=angular_vel,
-                                      traj=self._q_pos, is_success=success,
-                                      is_collided=crash or joint_cons_viol)
+                                      # traj=self._q_pos,
+                                      action=a,
+                                      q_pos=self.sim.data.qpos[0:7].ravel().copy(),
+                                      q_vel=self.sim.data.qvel[0:7].ravel().copy(),
+                                      ball_pos=ball_pos,
+                                      ball_vel=ball_vel,
+                                      is_success=success,
+                                      is_collided=is_collided, sim_crash=crash)

    def check_traj_in_joint_limits(self):
        return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)

-    # TODO
+    # TODO: extend observation space
    def _get_obs(self):
        theta = self.sim.data.qpos.flat[:7]
        return np.concatenate([
@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
        ])

    # TODO
+    @property
    def active_obs(self):
-        pass
-
+        return np.hstack([
+            [False] * 7,  # cos
+            [False] * 7,  # sin
+            # [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])


 if __name__ == "__main__":
-    env = ALRBeerpongEnv()
-    ctxt = np.array([0, -2, 0.840])    # initial
+    env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest')

-    env.configure(ctxt)
+    # env.configure(ctxt)
    env.reset()
-    env.render()
-    for i in range(16000):
-        # test with random actions
-        ac = 0.0 * env.action_space.sample()[0:7]
-        ac[1] = -0.8
-        ac[3] = - 0.3
-        ac[5] = -0.2
-        # ac = env.start_pos
-        # ac[0] += np.pi/2
+    env.render("human")
+    for i in range(800):
+        ac = 10 * env.action_space.sample()[0:7]
        obs, rew, d, info = env.step(ac)
-        env.render()
+        env.render("human")

        print(rew)

@ -147,4 +187,3 @@ if __name__ == "__main__":
            break

    env.close()
-
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
@ -1,123 +1,168 @@
 import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct


-class BeerpongReward(alr_reward_fct.AlrReward):
-    def __init__(self, sim, sim_time):
+class BeerPongReward:
+    def __init__(self):

-        self.sim = sim
-        self.sim_time = sim_time
+        self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+                                        "wrist_pitch_link_convex_decomposition_p1_geom",
+                                        "wrist_pitch_link_convex_decomposition_p2_geom",
+                                        "wrist_pitch_link_convex_decomposition_p3_geom",
+                                        "wrist_yaw_link_convex_decomposition_p1_geom",
+                                        "wrist_yaw_link_convex_decomposition_p2_geom",
+                                        "forearm_link_convex_decomposition_p1_geom",
+                                        "forearm_link_convex_decomposition_p2_geom",
+                                        "upper_arm_link_convex_decomposition_p1_geom",
+                                        "upper_arm_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p1_geom",
+                                        "shoulder_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p3_geom",
+                                        "base_link_convex_geom", "table_contact_geom"]

-        self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
-                                  "wrist_pitch_link_convex_decomposition_p1_geom",
-                                  "wrist_pitch_link_convex_decomposition_p2_geom",
-                                  "wrist_pitch_link_convex_decomposition_p3_geom",
-                                  "wrist_yaw_link_convex_decomposition_p1_geom",
-                                  "wrist_yaw_link_convex_decomposition_p2_geom",
-                                  "forearm_link_convex_decomposition_p1_geom",
-                                  "forearm_link_convex_decomposition_p2_geom"]
+        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+                                      # "cup_base_table", "cup_base_table_contact",
+                                      "cup_geom_table15",
+                                      "cup_geom_table16",
+                                      "cup_geom_table17", "cup_geom1_table8",
+                                      # "cup_base_table_contact",
+                                      # "cup_base_table"
+                                      ]

-        self.ball_id = None
-        self.ball_collision_id = None
-        self.goal_id = None
-        self.goal_final_id = None
-        self.collision_ids = None

        self.ball_traj = None
        self.dists = None
-        self.dists_ctxt = None
        self.dists_final = None
        self.costs = None
-
+        self.action_costs = None
+        self.angle_rewards = None
+        self.cup_angles = None
+        self.cup_z_axes = None
+        self.collision_penalty = 500
        self.reset(None)

    def reset(self, context):
-        self.ball_traj = np.zeros(shape=(self.sim_time, 3))
+        self.ball_traj = []
        self.dists = []
-        self.dists_ctxt = []
        self.dists_final = []
        self.costs = []
-        self.context = context
-        self.ball_in_cup = False
-        self.dist_ctxt = 5
+        self.action_costs = []
+        self.angle_rewards = []
+        self.cup_angles = []
+        self.cup_z_axes = []
+        self.ball_ground_contact = False
+        self.ball_table_contact = False
+        self.ball_wall_contact = False
+        self.ball_cup_contact = False

-        self.ball_id = self.sim.model._body_name2id["ball"]
-        self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
-        self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
-        self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
-        self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
-        self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
-        self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+    def compute_reward(self, env, action):
+        self.ball_id = env.sim.model._body_name2id["ball"]
+        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+        self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+        self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+        self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+        self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+        self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+        self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+        self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+        self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+        self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]

-    def compute_reward(self, action, sim, step):
-        action_cost = np.sum(np.square(action))
-
-        stop_sim = False
-        success = False
-
-        if self.check_collision(sim):
-            reward = - 1e-4 * action_cost - 1000
-            stop_sim = True
-            return reward, success, stop_sim
-
-        # Compute the current distance from the ball to the inner part of the cup
-        goal_pos = sim.data.site_xpos[self.goal_id]
-        ball_pos = sim.data.body_xpos[self.ball_id]
-        goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+        goal_pos = env.sim.data.site_xpos[self.goal_id]
+        ball_pos = env.sim.data.body_xpos[self.ball_id]
+        goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
        self.dists.append(np.linalg.norm(goal_pos - ball_pos))
        self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-        self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
-        self.ball_traj[step, :] = ball_pos

-        # Determine the first time when ball is in cup
-        if not self.ball_in_cup:
-            ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
-            self.ball_in_cup = ball_in_cup
-            if ball_in_cup:
-                dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
-                self.dist_ctxt = dist_to_ctxt
+        action_cost = np.sum(np.square(action))
+        self.action_costs.append(action_cost)
+
+        ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                 self.table_collision_id)
+
+        if ball_table_bounce:  # or ball_cup_table_cont or ball_wall_con
+            self.ball_table_contact = True
+
+        ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+                                                                        self.cup_collision_ids)
+        if ball_cup_cont:
+            self.ball_cup_contact = True
+
+        ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
+        if ball_wall_cont and not self.ball_table_contact:
+            self.ball_wall_contact = True
+
+        ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                   self.ground_collision_id)
+        if ball_ground_contact and not self.ball_table_contact:
+            self.ball_ground_contact = True
+
+        self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        if env._steps == env.ep_length - 1 or self._is_collided:

-        if step == self.sim_time - 1:
            min_dist = np.min(self.dists)
-            dist_final = self.dists_final[-1]
-            # dist_ctxt = self.dists_ctxt[-1]

-            # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
-            cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
-            reward = np.exp(-1 * cost) - 1e-4 * action_cost
-            success = dist_final < 0.05 and self.dist_ctxt < 0.05
+            ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
+
+            cost_offset = 0
+
+            if self.ball_ground_contact:  #  or self.ball_wall_contact:
+                cost_offset += 2
+
+            if not self.ball_table_contact:
+                cost_offset += 2
+            
+            if not ball_in_cup:
+                cost_offset += 2
+                cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost  # + min_dist ** 2
+            else:
+                if self.ball_cup_contact:
+                    cost_offset += 1
+                cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
+
+            reward = - 1*cost - self.collision_penalty * int(self._is_collided)
+            success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
        else:
            reward = - 1e-4 * action_cost
            success = False

-        return reward, success, stop_sim
+        infos = {}
+        infos["success"] = success
+        infos["is_collided"] = self._is_collided
+        infos["ball_pos"] = ball_pos.copy()
+        infos["action_cost"] = 5e-4 * action_cost

-    def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
-        if not ball_in_cup:
-            cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
-        else:
-            cost = 2 * dist_to_ctxt ** 2
-            print('Context Distance:', dist_to_ctxt)
-        return cost
+        return reward, infos

-    def check_ball_in_cup(self, sim, ball_collision_id):
-        cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
+    def _check_collision_single_objects(self, sim, id_1, id_2):
        for coni in range(0, sim.data.ncon):
            con = sim.data.contact[coni]

-            collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
-            collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1

            if collision or collision_trans:
                return True
        return False

-    def check_collision(self, sim):
+    def _check_collision_with_itself(self, sim, collision_ids):
+        col_1, col_2 = False, False
+        for j, id in enumerate(collision_ids):
+            col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+            if j != len(collision_ids) - 1:
+                col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+            else:
+                col_2 = False
+        collision = True if col_1 or col_2 else False
+        return collision
+
+    def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
        for coni in range(0, sim.data.ncon):
            con = sim.data.contact[coni]

-            collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
-            collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
+            collision = con.geom1 in id_list and con.geom2 == id_1
+            collision_trans = con.geom1 == id_1 and con.geom2 in id_list

            if collision or collision_trans:
                return True
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@ -0,0 +1,169 @@
+import numpy as np
+
+
+class BeerPongReward:
+    def __init__(self):
+
+        self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+                                        "wrist_pitch_link_convex_decomposition_p1_geom",
+                                        "wrist_pitch_link_convex_decomposition_p2_geom",
+                                        "wrist_pitch_link_convex_decomposition_p3_geom",
+                                        "wrist_yaw_link_convex_decomposition_p1_geom",
+                                        "wrist_yaw_link_convex_decomposition_p2_geom",
+                                        "forearm_link_convex_decomposition_p1_geom",
+                                        "forearm_link_convex_decomposition_p2_geom",
+                                        "upper_arm_link_convex_decomposition_p1_geom",
+                                        "upper_arm_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p1_geom",
+                                        "shoulder_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p3_geom",
+                                        "base_link_convex_geom", "table_contact_geom"]
+
+        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+                                      # "cup_base_table", "cup_base_table_contact",
+                                      "cup_geom_table15",
+                                      "cup_geom_table16",
+                                      "cup_geom_table17", "cup_geom1_table8",
+                                      # "cup_base_table_contact",
+                                      # "cup_base_table"
+                                      ]
+
+
+        self.ball_traj = None
+        self.dists = None
+        self.dists_final = None
+        self.costs = None
+        self.action_costs = None
+        self.angle_rewards = None
+        self.cup_angles = None
+        self.cup_z_axes = None
+        self.collision_penalty = 500
+        self.reset(None)
+
+    def reset(self, noisy):
+        self.ball_traj = []
+        self.dists = []
+        self.dists_final = []
+        self.costs = []
+        self.action_costs = []
+        self.angle_rewards = []
+        self.cup_angles = []
+        self.cup_z_axes = []
+        self.ball_ground_contact = False
+        self.ball_table_contact = False
+        self.ball_wall_contact = False
+        self.ball_cup_contact = False
+        self.noisy_bp = noisy
+        self._t_min_final_dist = -1
+
+    def compute_reward(self, env, action):
+        self.ball_id = env.sim.model._body_name2id["ball"]
+        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+        self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+        self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+        self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+        self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+        self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+        self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+        self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+        self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+        self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+
+        goal_pos = env.sim.data.site_xpos[self.goal_id]
+        ball_pos = env.sim.data.body_xpos[self.ball_id]
+        ball_vel = env.sim.data.body_xvelp[self.ball_id]
+        goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+        self.dists.append(np.linalg.norm(goal_pos - ball_pos))
+        self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
+
+        action_cost = np.sum(np.square(action))
+        self.action_costs.append(action_cost)
+
+        self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        if env._steps == env.ep_length - 1 or self._is_collided:
+
+            min_dist = np.min(self.dists)
+            ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                     self.table_collision_id)
+            ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+                                                                            self.cup_collision_ids)
+            ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                  self.wall_collision_id)
+            ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                               self.cup_table_collision_id)
+            if not ball_in_cup:
+                cost_offset = 2
+                if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont:
+                    cost_offset += 2
+                cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost
+            else:
+                cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7
+
+            reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
+            success = ball_in_cup
+            crash = self._is_collided
+        else:
+            reward = - 1e-7 * action_cost
+            cost = 0
+            success = False
+            crash = False
+
+        infos = {}
+        infos["success"] = success
+        infos["is_collided"] = self._is_collided
+        infos["ball_pos"] = ball_pos.copy()
+        infos["ball_vel"] = ball_vel.copy()
+        infos["action_cost"] = 5e-4 * action_cost
+        infos["task_cost"] = cost
+
+        return reward, infos
+
+    def get_cost_offset(self):
+        if self.ball_ground_contact:
+            return 200
+
+        if not self.ball_table_contact:
+            return 100
+
+        if not self.ball_in_cup:
+            return 50
+
+        if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp:
+            return 10
+
+        return 0
+
+    def _check_collision_single_objects(self, sim, id_1, id_2):
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+
+            if collision or collision_trans:
+                return True
+        return False
+
+    def _check_collision_with_itself(self, sim, collision_ids):
+        col_1, col_2 = False, False
+        for j, id in enumerate(collision_ids):
+            col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+            if j != len(collision_ids) - 1:
+                col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+            else:
+                col_2 = False
+        collision = True if col_1 or col_2 else False
+        return collision
+
+    def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 in id_list and con.geom2 == id_1
+            collision_trans = con.geom1 == id_1 and con.geom2 in id_list
+
+            if collision or collision_trans:
+                return True
+        return False
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@ -0,0 +1,39 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def active_obs(self):
+        # TODO: @Max Filter observations correctly
+        return np.hstack([
+            [False] * 7,  # cos
+            [False] * 7,  # sin
+            # [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def start_pos(self):
+        return self._start_pos
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qpos[0:7].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qvel[0:7].copy()
+
+    @property
+    def goal_pos(self):
+        # TODO: @Max I think the default value of returning to the start is reasonable here
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
--- a/alr_envs/alr/mujoco/table_tennis/init.py
+++ b/alr_envs/alr/mujoco/table_tennis/init.py
@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@ -0,0 +1,38 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def active_obs(self):
+        # TODO: @Max Filter observations correctly
+        return np.hstack([
+            [True] * 7,  # Joint Pos
+            [True] * 3, # Ball pos
+            [True] * 3  # goal pos
+        ])
+
+    @property
+    def start_pos(self):
+        return self.self.init_qpos_tt
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qpos[:7].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qvel[:7].copy()
+
+    @property
+    def goal_pos(self):
+        # TODO: @Max I think the default value of returning to the start is reasonable here
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@ -0,0 +1,168 @@
+import os
+
+import numpy as np
+import mujoco_py
+from gym import utils, spaces
+from gym.envs.mujoco import MujocoEnv
+
+from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
+from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
+
+#TODO: Check for simulation stability. Make sure the code runs even for sim crash
+
+MAX_EPISODE_STEPS = 1375
+BALL_NAME_CONTACT = "target_ball_contact"
+BALL_NAME = "target_ball"
+TABLE_NAME = 'table_tennis_table'
+FLOOR_NAME = 'floor'
+PADDLE_CONTACT_1_NAME = 'bat'
+PADDLE_CONTACT_2_NAME = 'bat_back'
+RACKET_NAME = 'bat'
+# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
+CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
+CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
+
+class TT_Env_Gym(MujocoEnv, utils.EzPickle):
+
+    def __init__(self, ctxt_dim=2):
+        model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
+
+        self.ctxt_dim = ctxt_dim
+        if ctxt_dim == 2:
+            self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
+            self.goal = np.zeros(3)  # 2 x,y + 1z
+        elif ctxt_dim == 4:
+            self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
+            self.goal = np.zeros(3)
+        else:
+            raise ValueError("either 2 or 4 dimensional Contexts available")
+
+        action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
+        action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
+        self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
+
+        self.time_steps = 0
+        self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
+        self.init_qvel_tt = np.zeros(10)
+
+        self.reward_func = TT_Reward(self.ctxt_dim)
+        self.ball_landing_pos = None
+        self.hited_ball = False
+        self.ball_contact_after_hit = False
+        self._ids_set = False
+        super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1)
+        self.ball_id = self.sim.model._body_name2id[BALL_NAME]  # find the proper -> not protected func.
+        self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+        self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+        self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+        self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME]  # check if we need both or only this
+        self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME]  # check if we need both or only this
+        self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+
+    def _set_ids(self):
+        self.ball_id = self.sim.model._body_name2id[BALL_NAME]  # find the proper -> not protected func.
+        self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+        self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+        self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME]  # check if we need both or only this
+        self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME]  # check if we need both or only this
+        self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+        self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+        self._ids_set = True
+
+    def _get_obs(self):
+        ball_pos = self.sim.data.body_xpos[self.ball_id]
+        obs = np.concatenate([self.sim.data.qpos[:7].copy(),  # 7 joint positions
+                              ball_pos,
+                              self.goal.copy()])
+        return obs
+
+    def sample_context(self):
+        return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
+
+    def reset_model(self):
+        self.set_state(self.init_qpos_tt, self.init_qvel_tt)    # reset to initial sim state
+        self.time_steps = 0
+        self.ball_landing_pos = None
+        self.hited_ball = False
+        self.ball_contact_after_hit = False
+        self.goal = self.sample_context()[:2]
+        if self.ctxt_dim == 2:
+            initial_ball_state = ball_init(random=False)  # fixed velocity, fixed position
+        elif self.ctxt_dim == 4:
+            initial_ball_state = ball_init(random=False)#raise NotImplementedError
+
+        self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
+        self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
+        self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
+
+        self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
+        self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
+        self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
+
+        z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
+        self.goal = z_extended_goal_pos
+        self.sim.model.body_pos[5] = self.goal[:3]          # Desired Landing Position, Yellow
+        self.sim.model.body_pos[3] = np.array([0, 0, 0.5])  # Outgoing Ball Landing Position, Green
+        self.sim.model.body_pos[4] = np.array([0, 0, 0.5])  # Incoming Ball Landing Position, Red
+        self.sim.forward()
+
+        self.reward_func.reset(self.goal)                   # reset the reward function
+        return self._get_obs()
+
+    def _contact_checker(self, id_1, id_2):
+        for coni in range(0, self.sim.data.ncon):
+            con = self.sim.data.contact[coni]
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+            if collision or collision_trans:
+                return True
+        return False
+
+    def step(self, action):
+        if not self._ids_set:
+            self._set_ids()
+        done = False
+        episode_end = False if self.time_steps+1<MAX_EPISODE_STEPS else True
+        if not self.hited_ball:
+            self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side
+            if not self.hited_ball:
+                self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side
+        if self.hited_ball:
+            if not self.ball_contact_after_hit:
+                if self._contact_checker(self.ball_contact_id, self.floor_contact_id):  # first check contact with floor
+                    self.ball_contact_after_hit = True
+                    self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
+                elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table
+                    self.ball_contact_after_hit = True
+                    self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
+        c_ball_pos = self.sim.data.body_xpos[self.ball_id]
+        racket_pos = self.sim.data.geom_xpos[self.racket_id]        # TODO: use this to reach out the position of the paddle?
+        if self.ball_landing_pos is not None:
+            done = True
+            episode_end =True
+        reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hited_ball, self.ball_landing_pos)
+        self.time_steps += 1
+        # gravity compensation on joints:
+        #action += self.sim.data.qfrc_bias[:7].copy()
+        try:
+            self.do_simulation(action, self.frame_skip)
+        except mujoco_py.MujocoException as e:
+            print('Simulation got unstable returning')
+            done = True
+            reward = -25
+        ob = self._get_obs()
+        return ob, reward, done, {"hit_ball":self.hited_ball}# might add some information here ....
+
+    def set_context(self, context):
+        old_state = self.sim.get_state()
+        qpos = old_state.qpos.copy()
+        qvel = old_state.qvel.copy()
+        self.set_state(qpos, qvel)
+        self.goal = context
+        z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77]))
+        if self.ctxt_dim == 4:
+            z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77]))
+        self.goal = z_extended_goal_pos
+        self.sim.model.body_pos[5] = self.goal[:3]      # TODO: Missing: Setting the desired incomoing landing position
+        self.sim.forward()
+        return self._get_obs()
--- a/alr_envs/alr/mujoco/table_tennis/tt_reward.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_reward.py
@ -0,0 +1,48 @@
+import numpy as np
+
+
+class TT_Reward:
+
+    def __init__(self, ctxt_dim):
+        self.ctxt_dim = ctxt_dim
+        self.c_goal = None          # current desired landing point
+        self.c_ball_traj = []
+        self.c_racket_traj = []
+        self.constant = 8
+
+    def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos):
+        self.c_ball_traj.append(ball_position.copy())
+        self.c_racket_traj.append(racket_pos.copy())
+        if not episode_end:
+            return 0
+        else:
+            # # seems to work for episodic case
+            min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
+            if not hited_ball:
+                return 0.2 * (1- np.tanh(min_r_b_dist**2))
+            else:
+                if ball_landing_pos is None:
+                    min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))
+                    return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
+                else:
+                    min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2])
+                    over_net_bonus = int(ball_landing_pos[0] < 0)
+                    return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
+
+
+            # if not hited_ball:
+            #     min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
+            #     return -min_r_b_dist
+            # else:
+            #     if ball_landing_pos is None:
+            #         dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant
+            #     else:
+            #         dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant
+            #     if dist_to_des_pos < -0.2:
+            #         dist_to_des_pos = -0.2
+            #     return -dist_to_des_pos
+
+    def reset(self, goal):
+        self.c_goal = goal.copy()
+        self.c_ball_traj = []
+        self.c_racket_traj = []
--- a/alr_envs/alr/mujoco/table_tennis/tt_utils.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_utils.py
@ -0,0 +1,26 @@
+import numpy as np
+
+
+def ball_init(random=False, context_range=None):
+    if random:
+        dx = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+        dy = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+        dz = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+
+        v_x = np.random.uniform(1.7, 1.8)
+        v_y = np.random.uniform(0.7, 0.8)
+        v_z = np.random.uniform(0.1, 0.2)
+    else:
+        dx = 1
+        dy = 0
+        dz = 0.05
+
+        v_x = 2.5
+        v_y = 2
+        v_z = 0.5
+
+    initial_x = 0 + dx - 1.2
+    initial_y = -0.2 + dy - 0.6
+    initial_z = 0.3 + dz + 1.5
+    initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
+    return initial_ball_state
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
@ -0,0 +1,12 @@
+<mujocoinclude>
+    <actuator>
+        <motor name="wam/base_motor" joint="wam/base_yaw_joint_right" ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0"/>
+        <motor name="wam/shoulder_pitch_motor" joint='wam/shoulder_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0"/>
+        <motor name="wam/shoulder_yaw_motor" joint='wam/shoulder_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0"/>
+        <motor name="wam/elbow_pitch_motor" joint='wam/elbow_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0"/>
+        <motor name="wam/wrist_yaw_motor" joint='wam/wrist_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
+        <motor name="wam/wrist_pitch_motor" joint='wam/wrist_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
+        <motor name="wam/palm_yaw_motor" joint='wam/palm_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0"/>
+    </actuator>
+</mujocoinclude>
+
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
@ -0,0 +1,103 @@
+<mujocoinclue>
+    <body name="wam/base_link_right" pos="2.1 0 2.0" quat="0 0 1 0" childclass="wam" >
+            <inertial pos="0 0 0" mass="1" diaginertia="0.1 0.1 0.1"/>
+            <geom name="base_link_fine" class="viz" mesh="base_link_fine" rgba="0.5 0.5 0.5 0"/>
+            <geom name="base_link_convex" class="col" mesh="base_link_convex" rgba="0.5 0.5 0.5 1"/>
+            <body name="wam/shoulder_yaw_link_right" pos="0 0 0.346">
+                <inertial pos="-0.00443422 -0.00066489 -0.128904" quat="0.69566 0.716713 -0.0354863 0.0334839" mass="5"
+                          diaginertia="0.135089 0.113095 0.0904426"/>
+<!--                control 0: 1.6-->
+                <joint name="wam/base_yaw_joint_right" range="-2.6 2.6" damping="1.98"/>
+                <geom name="shoulder_link_fine" class="viz" mesh="shoulder_link_fine" rgba="1 1 1 0"/>
+                <geom name="shoulder_link_convex_decomposition_p1" class="col"
+                      mesh="shoulder_link_convex_decomposition_p1"/>
+                <geom name="shoulder_link_convex_decomposition_p2" class="col"
+                      mesh="shoulder_link_convex_decomposition_p2"/>
+                <geom name="shoulder_link_convex_decomposition_p3" class="col"
+                      mesh="shoulder_link_convex_decomposition_p3"/>
+                <body name="wam/shoulder_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                    <inertial pos="-0.00236981 -0.0154211 0.0310561" quat="0.961794 0.273112 -0.0169316 0.00866592"
+                              mass="3.87494" diaginertia="0.0214195 0.0167127 0.0126452"/> <!--seems off-->
+<!--                    control 1: 0-->
+                    <joint name="wam/shoulder_pitch_joint_right" range="-2 2" damping="0.55"/>
+                    <geom name="shoulder_pitch_link_fine" class="viz" mesh="shoulder_pitch_link_fine" rgba="1 1 1 0"/>
+                    <geom name="shoulder_pitch_link_convex" class="col" mesh="shoulder_pitch_link_convex"/>
+                    <body name="wam/upper_arm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
+                        <inertial pos="0.00683259 3.309e-005 0.392492" quat="0.647136 0.0170822 0.0143038 0.762049"
+                                  mass="2.20228" diaginertia="0.0592718 0.0592207 0.00313419"/>
+<!--                        control 2: 0-->
+                        <joint name="wam/shoulder_yaw_joint_right" range="-2.8 2.8" damping="1.65"/>
+                        <geom name="upper_arm_link_fine" class="viz" mesh="upper_arm_link_fine" rgba="1 1 1 0"/>
+                        <geom name="upper_arm_link_convex_decomposition_p1" class="col"
+                              mesh="upper_arm_link_convex_decomposition_p1" rgba="0.094 0.48 0.804 1"/>
+                        <geom name="upper_arm_link_convex_decomposition_p2" class="col"
+                              mesh="upper_arm_link_convex_decomposition_p2" rgba="0.094 0.48 0.804 1"/>
+                        <body name="wam/forearm_link_right" pos="0.045 0 0.55" quat="0.707107 -0.707107 0 0">
+                            <inertial pos="-0.0400149 -0.142717 -0.00022942"
+                                      quat="0.704281 0.706326 0.0180333 0.0690353" mass="0.500168"
+                                      diaginertia="0.0151047 0.0148285 0.00275805"/>
+<!--                            control 3: 2.4-->
+                            <joint name="wam/elbow_pitch_joint_right" range="-0.9 3.1" damping="0.88"/>
+                            <geom name="elbow_link_fine" class="viz" mesh="elbow_link_fine" rgba="1 1 1 0"/>
+                            <geom name="elbow_link_convex" class="col" mesh="elbow_link_convex"/>
+                            <geom name="forearm_link_fine" class="viz" mesh="forearm_link_fine" pos="-.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="1 1 1 0"/>
+                            <geom name="forearm_link_convex_decomposition_p1" class="col"
+                                  mesh="forearm_link_convex_decomposition_p1" pos="-0.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
+                            <geom name="forearm_link_convex_decomposition_p2" class="col"
+                                  mesh="forearm_link_convex_decomposition_p2" pos="-.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
+                            <body name="wam/wrist_yaw_link_right" pos="-0.045 -0.3 0" quat="0.707107 0.707107 0 0">
+                                <inertial pos="8.921e-005 0.00435824 -0.00511217"
+                                          quat="0.630602 0.776093 0.00401969 -0.002372" mass="1.05376"
+                                          diaginertia="0.000555168 0.00046317 0.000234072"/> <!--this is an approximation-->
+<!--                                control 4: 0-->
+                                <joint name="wam/wrist_yaw_joint_right" range="-4.8 1.3" damping="0.55"/>
+                                <geom name="wrist_yaw_link_fine" class="viz" mesh="wrist_yaw_link_fine" rgba="1 1 1 0"/>
+                                <geom name="wrist_yaw_link_convex_decomposition_p1" class="col"
+                                      mesh="wrist_yaw_link_convex_decomposition_p1"/>
+                                <geom name="wrist_yaw_link_convex_decomposition_p2" class="col"
+                                      mesh="wrist_yaw_link_convex_decomposition_p2"/>
+                                <body name="wam/wrist_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                                    <inertial pos="-0.00012262 -0.0246834 -0.0170319"
+                                              quat="0.630602 0.776093 0.00401969 -0.002372" mass="0.517974"
+                                              diaginertia="0.000555168 0.00046317 0.000234072"/>
+<!--                                    control 5: 0-->
+                                    <joint name="wam/wrist_pitch_joint_right" range="-1.6 1.6" damping="0.11"/>
+                                    <geom name="wrist_pitch_link_fine" class="viz" mesh="wrist_pitch_link_fine"
+                                          rgba="1 1 1 0"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p1" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p1"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p2" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p2"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p3" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p3"/>
+                                    <body name="wam/wrist_palm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
+                                        <inertial pos="0 0 0.055" quat="0.707107 0 0 0.707107" mass="0.0828613"
+                                                  diaginertia="0.00020683 0.00010859 0.00010851"/>
+<!--                                        control 6: 1.8-->
+                                        <joint name="wam/palm_yaw_joint_right" range="-2.2 2.2" damping="0.11"/>
+                                        <geom name="wrist_palm_link_fine" class="viz" mesh="wrist_palm_link_fine"
+                                              rgba="1 1 1 0"/>
+                                        <geom name="wrist_palm_link_convex" class="col" mesh="wrist_palm_link_convex"/>
+<!--                                        EE=wam/paddle, configure name to the end effector name-->
+                                        <body name="EE" pos="0 0 0.26" childclass="contact_geom">
+                                            <geom name="bat" type="cylinder" size="0.075 0.005" rgba="1 0 0 1"
+                                                  quat="0.71 0 0.71 0"/>
+                                            <geom name="bat_back" type="cylinder" size="0.0749 0.0025" rgba="0 1 0 1"
+                                                  quat="0.71 0 0.71 0" pos="-0.0026 0 0"/>
+                                            <geom name="wam/paddle_handle" type="box" size="0.005 0.01 0.05" pos="0 0 -0.08"
+                                                  rgba="1 1 1 1"/>
+                                            <!--                                            Extract information for sampling goals.-->
+                                            <site name="wam/paddle_center" pos="0 0 0" rgba="1 1 1 1" size="0.00001"/>
+                                        </body>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+</mujocoinclue>
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
@ -0,0 +1,30 @@
+<mujocoinclude>
+    <body name="table_tennis_table" pos="0 0 0">
+        <geom class="viz" name="table_base_1" pos="1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_2" pos="1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_3" pos="-1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_4" pos="-1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <body name="table_top" pos="0 0 0.76">
+            <geom class="contact_geom" name="table_tennis_table" pos="0 0 0" rgba="0 0 0.5 1" size="1.37 .7625 .01" type="box" />
+            <site name="left_up_corner" pos="-1.37 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="middle_up_corner" pos="0 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="left_down_corner" pos="-1.37 -0.7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="middle_down_corner" pos="0 -.7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <geom class="contact_geom" material="floor_plane" name="table_te_context_spacennis_net" pos="0 0 0.08625" rgba="0 0 1 0.5" size="0.01 0.915 0.07625" type="box" />
+            <geom class="viz" name="left_while_line" pos="0 -0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
+            <geom class="viz" name="center_while_line" pos="0 0 0.01" rgba="1 1 1 1" size="1.37 .01 .001" type="box" />
+            <geom class="viz" name="right_while_line" pos="0 0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
+            <geom class="viz" name="right_side_line" pos="1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
+            <geom class="viz" name="left_side_line" pos="-1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
+        </body>
+        <body name="achieved_pos" pos="0 0 0.5">
+            <geom class="viz" name="achieved_point_geom" pos="0 0 0" rgba="0 1 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+        <body name="right_achieved_pos" pos="0 0 0.5">
+            <geom class="viz" name="hitting_achieved_point_geom" pos="0 0 0" rgba="1 0 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+        <body name="target_point" pos="0 0 0.5">
+            <geom class="viz" name="target_point_geom" pos="0 0 0" rgba="1 1 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+    </body>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
@ -0,0 +1,10 @@
+<mujocoinclude>
+    <body name="target_ball" pos="0 0 0">
+        <joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <geom size="0.025 0.025 0.025" type="sphere" condim="4" name="target_ball_contact" rgba="1 1 0 1" mass="0.1"
+              friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="0.1 0.03" priority="1"/>
+        <site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
+    </body>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
@ -0,0 +1,47 @@
+<mujocoinclude>
+    <actuator>
+
+<!--                        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="162.0" />-->
+<!--                        <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="122.0" />-->
+<!--                        <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="102.0" />-->
+<!--                        <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" />-->
+
+<!--                <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="151.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="125.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="121.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+
+<!--                        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="600.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="500.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+
+                <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>
+                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="1000.0" ctrllimited="true"/>
+                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>
+
+
+<!--        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="1600.0" ctrllimited="true"/>-->
+<!--&lt;!&ndash;        <velocity ctrlrange="-50 50" joint="wam/base_yaw_joint_right" kv="100" ctrllimited="true"/>&ndash;&gt;-->
+
+<!--        <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="1200.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+    </actuator>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
@ -0,0 +1,46 @@
+<mujocoinclude>
+    <default>
+        <default class="wam">
+            <joint type="hinge" limited="true" pos="0 0 0" axis="0 0 1"/>
+        </default>
+        <default class="viz">
+            <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 1 1 1"/>
+        </default>
+        <default class="col">
+            <geom type="mesh" contype="0" conaffinity="1" group="0" rgba="1 1 1 1"/>
+        </default>
+        <default class="contact_geom">
+            <geom condim="4" friction="0.1 0.1 0.1" margin="0" solimp="1 1 0" solref="0.1 0.03"/>
+        </default>
+    </default>
+    <asset>
+        <mesh file="base_link_fine.stl"/>
+        <mesh file="base_link_convex.stl"/>
+        <mesh file="shoulder_link_fine.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p1.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p2.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p3.stl"/>
+        <mesh file="shoulder_pitch_link_fine.stl"/>
+        <mesh file="shoulder_pitch_link_convex.stl"/>
+        <mesh file="upper_arm_link_fine.stl"/>
+        <mesh file="upper_arm_link_convex_decomposition_p1.stl"/>
+        <mesh file="upper_arm_link_convex_decomposition_p2.stl"/>
+        <mesh file="elbow_link_fine.stl"/>
+        <mesh file="elbow_link_convex.stl"/>
+        <mesh file="forearm_link_fine.stl"/>
+        <mesh file="forearm_link_convex_decomposition_p1.stl"/>
+        <mesh file="forearm_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_yaw_link_fine.stl"/>
+        <mesh file="wrist_yaw_link_convex_decomposition_p1.stl"/>
+        <mesh file="wrist_yaw_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_pitch_link_fine.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p1.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p3.stl"/>
+        <mesh file="wrist_palm_link_fine.stl"/>
+        <mesh file="wrist_palm_link_convex.stl"/>
+        <texture builtin="checker" height="512" name="texplane" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" type="2d"
+                 width="512"/>
+        <material name="floor_plane" reflectance="0.5" texrepeat="1 1" texture="texplane" texuniform="true"/>
+    </asset>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
@ -0,0 +1,18 @@
+<mujoco model="table_tennis(v0.1)">
+    <compiler angle="radian" coordinate="local" meshdir="../../meshes/wam" />
+    <option gravity="0 0 -9.81" timestep="0.002">
+        <flag warmstart="enable" />
+    </option>
+    <custom>
+        <numeric data="0 0 0 0 0 0 0" name="START_ANGLES" />
+    </custom>
+    <include file="shared.xml" />
+    <worldbody>
+        <light cutoff="60" diffuse="1 1 1" dir="-.1 -.2 -1.3" directional="true" exponent="1" pos=".1 .2 1.3" specular=".1 .1 .1" />
+        <geom conaffinity="1" contype="1" material="floor_plane" name="floor" pos="0 0 0" size="10 5 1" type="plane" />
+        <include file="include_table.xml" />
+        <include file="include_barrett_wam_7dof_right.xml" />
+        <include file="include_target_ball.xml" />
+    </worldbody>
+    <include file="include_7_motor_actuator.xml" />
+</mujoco>
--- a/alr_envs/dmc/dmc_wrapper.py
+++ b/alr_envs/dmc/dmc_wrapper.py
@ -15,10 +15,10 @@ def _spec_to_box(spec):
        assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
        dim = int(np.prod(s.shape))
        if type(s) == specs.Array:
-            bound = np.inf * np.ones(dim, dtype=np.float32)
+            bound = np.inf * np.ones(dim, dtype=s.dtype)
            return -bound, bound
        elif type(s) == specs.BoundedArray:
-            zeros = np.zeros(dim, dtype=np.float32)
+            zeros = np.zeros(dim, dtype=s.dtype)
            return s.minimum + zeros, s.maximum + zeros

    mins, maxs = [], []
@ -29,7 +29,7 @@ def _spec_to_box(spec):
    low = np.concatenate(mins, axis=0)
    high = np.concatenate(maxs, axis=0)
    assert low.shape == high.shape
-    return spaces.Box(low, high, dtype=np.float32)
+    return spaces.Box(low, high, dtype=s.dtype)


 def _flatten_obs(obs: collections.MutableMapping):
@ -113,7 +113,7 @@ class DMCWrapper(core.Env):
            if self._channels_first:
                obs = obs.transpose(2, 0, 1).copy()
        else:
-            obs = _flatten_obs(time_step.observation)
+            obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
        return obs

    @property