diff --git a/README.md b/README.md
index 2232efa..edd1aac 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,25 @@
 ## ALR Robotics Control Environments
 
-This project offers a large verity of reinforcement learning environments under a unifying interface base on OpenAI gym.
-Besides, some custom environments we also provide support for the benchmark suites
-[OpenAI gym](https://gym.openai.com/),
+This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
+Besides, we also provide support (under the OpenAI interface) for the benchmark suites
 [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
-(DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according
+(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
 to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we
-further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
+additionally support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP,
 we only consider the mean usually).
 
 ## Motion Primitive Environments (Episodic environments)
 
 Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box
-optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full
+optimization, and methods that are often used in robotics. MP environments are trajectory-based and always execute a full
 trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The
 generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is,
 however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position,
-velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
+velocity, and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action
 represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this
-framework we support the above setting for the contextual setting, for which we expose all changing substates of the
+framework we support all of this also for the contextual setting, for which we expose all changing substates of the
 task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each
-trajectory. All environments provide the next to the cumulative episode reward also all collected information from each
+trajectory. All environments provide next to the cumulative episode reward all collected information from each
 step as part of the info dictionary. This information should, however, mainly be used for debugging and logging.
 
 |Key| Description|
diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 6853f0d..f835328 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -1,3 +1,4 @@
+import numpy as np
 from gym import register
 
 from . import classic_control, mujoco
@@ -9,6 +10,8 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
 from .mujoco.reacher.alr_reacher import ALRReacherEnv
 from .mujoco.reacher.balancing import BalancingEnv
 
+from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
+
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
 
 # Classic Control
@@ -193,6 +196,31 @@ register(
     }
 )
 
+## Table Tennis
+register(id='TableTennis2DCtxt-v0',
+         entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+         max_episode_steps=MAX_EPISODE_STEPS,
+         kwargs={'ctxt_dim':2})
+
+register(id='TableTennis4DCtxt-v0',
+         entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+         max_episode_steps=MAX_EPISODE_STEPS,
+         kwargs={'ctxt_dim':4})
+
+## BeerPong
+difficulties = ["simple", "intermediate", "hard", "hardest"]
+
+for v, difficulty in enumerate(difficulties):
+    register(
+        id='ALRBeerPong-v{}'.format(v),
+        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+        max_episode_steps=600,
+        kwargs={
+            "difficulty": difficulty,
+            "reward_type": "staged",
+        }
+    )
+
 # Motion Primitive Environments
 
 ## Simple Reacher
@@ -330,3 +358,59 @@ for _v in _versions:
         }
     )
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+## Beerpong
+register(
+    id='BeerpongProMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+    kwargs={
+        "name": "alr_envs:ALRBeerPong-v0",
+        "wrappers": [mujoco.beerpong.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 7,
+            "num_basis": 2,
+            "n_zero_bases": 2,
+            "duration": 0.5,
+            "post_traj_time": 2.5,
+            # "width": 0.01,
+            # "off": 0.01,
+            "policy_type": "motor",
+            "weights_scale": 0.08,
+            "zero_start": True,
+            "zero_goal": False,
+            "policy_kwargs": {
+                "p_gains": np.array([       1.5,   5,   2.55,    3,   2.,    2,   1.25]),
+                "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+            }
+        }
+    }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0")
+
+## Table Tennis
+register(
+    id='TableTennisProMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+    kwargs={
+        "name": "alr_envs:TableTennis4DCtxt-v0",
+        "wrappers": [mujoco.table_tennis.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 7,
+            "num_basis": 2,
+            "n_zero_bases": 2,
+            "duration": 1.25,
+            "post_traj_time": 4.5,
+            # "width": 0.01,
+            # "off": 0.01,
+            "policy_type": "motor",
+            "weights_scale": 1.0,
+            "zero_start": True,
+            "zero_goal": False,
+            "policy_kwargs": {
+                "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+                "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+            }
+        }
+    }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v0")
diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
index dd7321a..208f005 100644
--- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
+++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py
@@ -106,7 +106,7 @@ class HoleReacherEnv(BaseReacherDirectEnv):
             # self._tmp_hole_depth,
             self.end_effector - self._goal,
             self._steps
-        ])
+        ]).astype(np.float32)
 
     def _get_line_points(self, num_points_per_link=1):
         theta = self._joint_angles[:, None]
diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
index 758f824..dac06a3 100644
--- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
+++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py
@@ -73,7 +73,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv):
             self._angle_velocity,
             self.end_effector - self._goal,
             self._steps
-        ])
+        ]).astype(np.float32)
 
     def _generate_goal(self):
 
diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
index 3a1f0e5..292e40a 100644
--- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py
@@ -110,7 +110,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
             self.end_effector - self._via_point,
             self.end_effector - self._goal,
             self._steps
-        ])
+        ]).astype(np.float32)
 
     def _check_collisions(self) -> bool:
         return self._check_self_collision()
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index 6744d15..30e1e7c 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -1,4 +1,6 @@
 from .reacher.alr_reacher import ALRReacherEnv
 from .reacher.balancing import BalancingEnv
 from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
-from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
\ No newline at end of file
+from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
+from .table_tennis.tt_gym import TT_Env_Gym
+from .beerpong.beerpong import ALRBeerBongEnv
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py
index e69de29..989b5a9 100644
--- a/alr_envs/alr/mujoco/beerpong/__init__.py
+++ b/alr_envs/alr/mujoco/beerpong/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
new file mode 100644
index 0000000..e96d2bc
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@@ -0,0 +1,187 @@
+<mujoco model="wam(v1.31)">
+    <compiler angle="radian" meshdir="../../meshes/wam/" />
+    <option timestep="0.005" integrator="Euler" />
+    <size njmax="500" nconmax="100" />
+    <default class="main">
+        <joint limited="true" frictionloss="0.001" damping="0.07"/>
+        <default class="viz">
+            <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
+        </default>
+        <default class="col">
+            <geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
+        </default>
+    </default>
+    <asset>
+        <texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
+        <material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
+        <mesh name="base_link_fine" file="base_link_fine.stl" />
+        <mesh name="base_link_convex" file="base_link_convex.stl" />
+        <mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
+        <mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
+        <mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
+        <mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
+        <mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
+        <mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
+        <mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
+        <mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
+        <mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
+        <mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
+        <mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
+        <mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
+        <mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
+        <mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
+        <mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
+        <mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
+    </asset>
+
+    <worldbody>
+        <geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
+        <light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
+
+        <body name="wam/base_link" pos="0 0 0.6">
+            <inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
+            <geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
+            <geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
+            <body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
+                <inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
+                <joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
+                <geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
+                <body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
+                    <inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
+                    <joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
+                    <geom class="viz" mesh="shoulder_pitch_link_fine" />
+                    <geom class="col" mesh="shoulder_pitch_link_convex" />
+                    <body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
+                        <inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
+                        <joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
+                        <geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
+                        <geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
+                        <geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
+                        <body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
+                            <inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
+                            <joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
+                            <geom class="viz" mesh="elbow_link_fine" />
+                            <geom class="col" mesh="elbow_link_convex" />
+                            <geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
+                            <geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
+                            <geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
+                            <body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
+                                <inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
+                                <joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
+                                <geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
+                                <geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
+                                <geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
+                                <body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
+                                    <inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
+                                    <joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
+                                    <geom class="viz" mesh="wrist_pitch_link_fine" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
+                                    <body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
+                                        <inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
+                                        <joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
+                                        <geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
+                                        <geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
+                                        <site name="init_ball_pos_site" size="0.025 0.025 0.025" pos="0.0 0.0 0.035" rgba="0 1 0 1"/>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+        <body name="table_body" pos="0 -1.85 0.4025">
+            <geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+            <geom name="table_contact_geom" type="box" size="0.4 0.6 0.1" pos="0 0 0.31" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+        </body>
+        <geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+        <geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+
+        <body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
+            <inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
+            <geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
+            <geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
+            <geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.07 0.01" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
+            <geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
+            <geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
+            <site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
+            <site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
+        </body>
+        <body name="ball" pos="0 0 0">
+            <joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
+                  friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
+            <site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
+        </body>
+<!--        <camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>-->
+<!--        <camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />-->
+        <camera name="visualization" mode="fixed" euler="2.35 2.0 -0.75" pos="2.0 -0.0 1.85"/>
+    </worldbody>
+     <actuator>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="wam/shoulder_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="50.0" joint="wam/shoulder_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
+    </actuator>
+
+</mujoco>
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 4d8f389..a10e54a 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -1,3 +1,4 @@
+import mujoco_py.builder
 import os
 
 import numpy as np
@@ -5,44 +6,67 @@ from gym import utils
 from gym.envs.mujoco import MujocoEnv
 
 
-
-class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
-    def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
-        utils.EzPickle.__init__(**locals())
-        MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
+class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
+    def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
+                 context: np.ndarray = None, difficulty='simple'):
         self._steps = 0
 
         self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
-                                     "beerpong" + ".xml")
-
-        self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
-        self.start_vel = np.zeros(7)
-
-        self._q_pos = []
-        self._q_vel = []
-        # self.weight_matrix_scale = 50
-        self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-        self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
-        self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
+                                     "beerpong_wo_cup" + ".xml")
 
         self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
         self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
 
-        self.context = None
-        # alr_mujoco_env.AlrMujocoEnv.__init__(self,
-        #                                      self.xml_path,
-        #                                      apply_gravity_comp=apply_gravity_comp,
-        #                                      n_substeps=n_substeps)
+        self.context = context
+        self.apply_gravity_comp = apply_gravity_comp
+        self.add_noise = noisy
 
-        self.sim_time = 8  # seconds
-        self.sim_steps = int(self.sim_time / self.dt)
-        if reward_function is None:
-            from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward
-            reward_function = BeerpongReward
-        self.reward_function = reward_function(self.sim, self.sim_steps)
-        self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
-        self.ball_id = self.sim.model._body_name2id["ball"]
-        self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+        self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
+        self._start_vel = np.zeros(7)
+
+        self.ball_site_id = 0
+        self.ball_id = 11
+
+        self._release_step = 100  # time step of ball release
+
+        self.sim_time = 4  # seconds
+        self.ep_length = 600  # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt)
+        self.cup_table_id = 10
+
+        if noisy:
+            self.noise_std = 0.01
+        else:
+            self.noise_std = 0
+
+        if difficulty == 'simple':
+            self.cup_goal_pos = np.array([0, -1.7, 0.840])
+        elif difficulty == 'intermediate':
+            self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
+        elif difficulty == 'hard':
+            self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
+        elif difficulty == 'hardest':
+            self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
+
+        if reward_type == "no_context":
+            from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
+            reward_function = BeerPongReward
+        elif reward_type == "staged":
+            from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
+            reward_function = BeerPongReward
+        else:
+            raise ValueError("Unknown reward type: {}".format(reward_type))
+        self.reward_function = reward_function()
+
+        MujocoEnv.__init__(self, self.xml_path, frame_skip)
+        utils.EzPickle.__init__(self)
+
+    @property
+    def start_pos(self):
+        return self._start_pos
+
+    @property
+    def start_vel(self):
+        return self._start_vel
 
     @property
     def current_pos(self):
@@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
     def current_vel(self):
         return self.sim.data.qvel[0:7].copy()
 
-    def configure(self, context):
-        self.context = context
-        self.reward_function.reset(context)
+    def reset(self):
+        self.reward_function.reset(self.add_noise)
+        return super().reset()
 
     def reset_model(self):
         init_pos_all = self.init_qpos.copy()
@@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
         init_vel = np.zeros_like(init_pos_all)
 
         self._steps = 0
-        self._q_pos = []
-        self._q_vel = []
 
         start_pos = init_pos_all
         start_pos[0:7] = init_pos_robot
-        # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
 
         self.set_state(start_pos, init_vel)
-
-        ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
-        self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
-        self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
-
+        self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
+        start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+        self.set_state(start_pos, init_vel)
         return self._get_obs()
 
     def step(self, a):
@@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
         angular_vel = 0.0
         reward_ctrl = - np.square(a).sum()
 
-        crash = self.do_simulation(a)
-        joint_cons_viol = self.check_traj_in_joint_limits()
-
-        self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
-        self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
+        if self.apply_gravity_comp:
+            a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
+        try:
+            self.do_simulation(a, self.frame_skip)
+            if self._steps < self._release_step:
+                self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+                self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
+            elif self._steps == self._release_step and self.add_noise:
+                 self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
+            crash = False
+        except mujoco_py.builder.MujocoException:
+            crash = True
+        # joint_cons_viol = self.check_traj_in_joint_limits()
 
         ob = self._get_obs()
 
-        if not crash and not joint_cons_viol:
-            reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
-            done = success or self._steps == self.sim_steps - 1 or stop_sim
+        if not crash:
+            reward, reward_infos = self.reward_function.compute_reward(self, a)
+            success = reward_infos['success']
+            is_collided = reward_infos['is_collided']
+            ball_pos = reward_infos['ball_pos']
+            ball_vel = reward_infos['ball_vel']
+            done = is_collided or self._steps == self.ep_length - 1
             self._steps += 1
         else:
-            reward = -1000
+            reward = -30
             success = False
+            is_collided = False
             done = True
+            ball_pos = np.zeros(3)
+            ball_vel = np.zeros(3)
+
         return ob, reward, done, dict(reward_dist=reward_dist,
                                       reward_ctrl=reward_ctrl,
+                                      reward=reward,
                                       velocity=angular_vel,
-                                      traj=self._q_pos, is_success=success,
-                                      is_collided=crash or joint_cons_viol)
+                                      # traj=self._q_pos,
+                                      action=a,
+                                      q_pos=self.sim.data.qpos[0:7].ravel().copy(),
+                                      q_vel=self.sim.data.qvel[0:7].ravel().copy(),
+                                      ball_pos=ball_pos,
+                                      ball_vel=ball_vel,
+                                      is_success=success,
+                                      is_collided=is_collided, sim_crash=crash)
 
     def check_traj_in_joint_limits(self):
         return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
 
-    # TODO
+    # TODO: extend observation space
     def _get_obs(self):
         theta = self.sim.data.qpos.flat[:7]
         return np.concatenate([
@@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
         ])
 
     # TODO
+    @property
     def active_obs(self):
-        pass
-
+        return np.hstack([
+            [False] * 7,  # cos
+            [False] * 7,  # sin
+            # [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
 
 
 if __name__ == "__main__":
-    env = ALRBeerpongEnv()
-    ctxt = np.array([0, -2, 0.840])    # initial
+    env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest')
 
-    env.configure(ctxt)
+    # env.configure(ctxt)
     env.reset()
-    env.render()
-    for i in range(16000):
-        # test with random actions
-        ac = 0.0 * env.action_space.sample()[0:7]
-        ac[1] = -0.8
-        ac[3] = - 0.3
-        ac[5] = -0.2
-        # ac = env.start_pos
-        # ac[0] += np.pi/2
+    env.render("human")
+    for i in range(800):
+        ac = 10 * env.action_space.sample()[0:7]
         obs, rew, d, info = env.step(ac)
-        env.render()
+        env.render("human")
 
         print(rew)
 
@@ -147,4 +187,3 @@ if __name__ == "__main__":
             break
 
     env.close()
-
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
index c3ca3c6..3896e82 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
@@ -1,124 +1,169 @@
 import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct
 
 
-class BeerpongReward(alr_reward_fct.AlrReward):
-    def __init__(self, sim, sim_time):
+class BeerPongReward:
+    def __init__(self):
 
-        self.sim = sim
-        self.sim_time = sim_time
+        self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+                                        "wrist_pitch_link_convex_decomposition_p1_geom",
+                                        "wrist_pitch_link_convex_decomposition_p2_geom",
+                                        "wrist_pitch_link_convex_decomposition_p3_geom",
+                                        "wrist_yaw_link_convex_decomposition_p1_geom",
+                                        "wrist_yaw_link_convex_decomposition_p2_geom",
+                                        "forearm_link_convex_decomposition_p1_geom",
+                                        "forearm_link_convex_decomposition_p2_geom",
+                                        "upper_arm_link_convex_decomposition_p1_geom",
+                                        "upper_arm_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p1_geom",
+                                        "shoulder_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p3_geom",
+                                        "base_link_convex_geom", "table_contact_geom"]
 
-        self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
-                                  "wrist_pitch_link_convex_decomposition_p1_geom",
-                                  "wrist_pitch_link_convex_decomposition_p2_geom",
-                                  "wrist_pitch_link_convex_decomposition_p3_geom",
-                                  "wrist_yaw_link_convex_decomposition_p1_geom",
-                                  "wrist_yaw_link_convex_decomposition_p2_geom",
-                                  "forearm_link_convex_decomposition_p1_geom",
-                                  "forearm_link_convex_decomposition_p2_geom"]
+        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+                                      # "cup_base_table", "cup_base_table_contact",
+                                      "cup_geom_table15",
+                                      "cup_geom_table16",
+                                      "cup_geom_table17", "cup_geom1_table8",
+                                      # "cup_base_table_contact",
+                                      # "cup_base_table"
+                                      ]
 
-        self.ball_id = None
-        self.ball_collision_id = None
-        self.goal_id = None
-        self.goal_final_id = None
-        self.collision_ids = None
 
         self.ball_traj = None
         self.dists = None
-        self.dists_ctxt = None
         self.dists_final = None
         self.costs = None
-
+        self.action_costs = None
+        self.angle_rewards = None
+        self.cup_angles = None
+        self.cup_z_axes = None
+        self.collision_penalty = 500
         self.reset(None)
 
     def reset(self, context):
-        self.ball_traj = np.zeros(shape=(self.sim_time, 3))
+        self.ball_traj = []
         self.dists = []
-        self.dists_ctxt = []
         self.dists_final = []
         self.costs = []
-        self.context = context
-        self.ball_in_cup = False
-        self.dist_ctxt = 5
+        self.action_costs = []
+        self.angle_rewards = []
+        self.cup_angles = []
+        self.cup_z_axes = []
+        self.ball_ground_contact = False
+        self.ball_table_contact = False
+        self.ball_wall_contact = False
+        self.ball_cup_contact = False
 
-        self.ball_id = self.sim.model._body_name2id["ball"]
-        self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
-        self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
-        self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
-        self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
-        self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
-        self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+    def compute_reward(self, env, action):
+        self.ball_id = env.sim.model._body_name2id["ball"]
+        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+        self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+        self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+        self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+        self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+        self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+        self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+        self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+        self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+        self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
 
-    def compute_reward(self, action, sim, step):
-        action_cost = np.sum(np.square(action))
-
-        stop_sim = False
-        success = False
-
-        if self.check_collision(sim):
-            reward = - 1e-4 * action_cost - 1000
-            stop_sim = True
-            return reward, success, stop_sim
-
-        # Compute the current distance from the ball to the inner part of the cup
-        goal_pos = sim.data.site_xpos[self.goal_id]
-        ball_pos = sim.data.body_xpos[self.ball_id]
-        goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+        goal_pos = env.sim.data.site_xpos[self.goal_id]
+        ball_pos = env.sim.data.body_xpos[self.ball_id]
+        goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
         self.dists.append(np.linalg.norm(goal_pos - ball_pos))
         self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-        self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
-        self.ball_traj[step, :] = ball_pos
 
-        # Determine the first time when ball is in cup
-        if not self.ball_in_cup:
-            ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
-            self.ball_in_cup = ball_in_cup
-            if ball_in_cup:
-                dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
-                self.dist_ctxt = dist_to_ctxt
+        action_cost = np.sum(np.square(action))
+        self.action_costs.append(action_cost)
+
+        ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                 self.table_collision_id)
+
+        if ball_table_bounce:  # or ball_cup_table_cont or ball_wall_con
+            self.ball_table_contact = True
+
+        ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+                                                                        self.cup_collision_ids)
+        if ball_cup_cont:
+            self.ball_cup_contact = True
+
+        ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
+        if ball_wall_cont and not self.ball_table_contact:
+            self.ball_wall_contact = True
+
+        ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                   self.ground_collision_id)
+        if ball_ground_contact and not self.ball_table_contact:
+            self.ball_ground_contact = True
+
+        self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        if env._steps == env.ep_length - 1 or self._is_collided:
 
-        if step == self.sim_time - 1:
             min_dist = np.min(self.dists)
-            dist_final = self.dists_final[-1]
-            # dist_ctxt = self.dists_ctxt[-1]
 
-            # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
-            cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
-            reward = np.exp(-1 * cost) - 1e-4 * action_cost
-            success = dist_final < 0.05 and self.dist_ctxt < 0.05
+            ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
+
+            cost_offset = 0
+
+            if self.ball_ground_contact:  #  or self.ball_wall_contact:
+                cost_offset += 2
+
+            if not self.ball_table_contact:
+                cost_offset += 2
+            
+            if not ball_in_cup:
+                cost_offset += 2
+                cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost  # + min_dist ** 2
+            else:
+                if self.ball_cup_contact:
+                    cost_offset += 1
+                cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
+
+            reward = - 1*cost - self.collision_penalty * int(self._is_collided)
+            success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
         else:
             reward = - 1e-4 * action_cost
             success = False
 
-        return reward, success, stop_sim
+        infos = {}
+        infos["success"] = success
+        infos["is_collided"] = self._is_collided
+        infos["ball_pos"] = ball_pos.copy()
+        infos["action_cost"] = 5e-4 * action_cost
 
-    def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
-        if not ball_in_cup:
-            cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
-        else:
-            cost = 2 * dist_to_ctxt ** 2
-            print('Context Distance:', dist_to_ctxt)
-        return cost
+        return reward, infos
 
-    def check_ball_in_cup(self, sim, ball_collision_id):
-        cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
+    def _check_collision_single_objects(self, sim, id_1, id_2):
         for coni in range(0, sim.data.ncon):
             con = sim.data.contact[coni]
 
-            collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
-            collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
 
             if collision or collision_trans:
                 return True
         return False
 
-    def check_collision(self, sim):
+    def _check_collision_with_itself(self, sim, collision_ids):
+        col_1, col_2 = False, False
+        for j, id in enumerate(collision_ids):
+            col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+            if j != len(collision_ids) - 1:
+                col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+            else:
+                col_2 = False
+        collision = True if col_1 or col_2 else False
+        return collision
+
+    def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
         for coni in range(0, sim.data.ncon):
             con = sim.data.contact[coni]
 
-            collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
-            collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
+            collision = con.geom1 in id_list and con.geom2 == id_1
+            collision_trans = con.geom1 == id_1 and con.geom2 in id_list
 
             if collision or collision_trans:
                 return True
-        return False
+        return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
new file mode 100644
index 0000000..9d1d878
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -0,0 +1,169 @@
+import numpy as np
+
+
+class BeerPongReward:
+    def __init__(self):
+
+        self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+                                        "wrist_pitch_link_convex_decomposition_p1_geom",
+                                        "wrist_pitch_link_convex_decomposition_p2_geom",
+                                        "wrist_pitch_link_convex_decomposition_p3_geom",
+                                        "wrist_yaw_link_convex_decomposition_p1_geom",
+                                        "wrist_yaw_link_convex_decomposition_p2_geom",
+                                        "forearm_link_convex_decomposition_p1_geom",
+                                        "forearm_link_convex_decomposition_p2_geom",
+                                        "upper_arm_link_convex_decomposition_p1_geom",
+                                        "upper_arm_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p1_geom",
+                                        "shoulder_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p3_geom",
+                                        "base_link_convex_geom", "table_contact_geom"]
+
+        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+                                      # "cup_base_table", "cup_base_table_contact",
+                                      "cup_geom_table15",
+                                      "cup_geom_table16",
+                                      "cup_geom_table17", "cup_geom1_table8",
+                                      # "cup_base_table_contact",
+                                      # "cup_base_table"
+                                      ]
+
+
+        self.ball_traj = None
+        self.dists = None
+        self.dists_final = None
+        self.costs = None
+        self.action_costs = None
+        self.angle_rewards = None
+        self.cup_angles = None
+        self.cup_z_axes = None
+        self.collision_penalty = 500
+        self.reset(None)
+
+    def reset(self, noisy):
+        self.ball_traj = []
+        self.dists = []
+        self.dists_final = []
+        self.costs = []
+        self.action_costs = []
+        self.angle_rewards = []
+        self.cup_angles = []
+        self.cup_z_axes = []
+        self.ball_ground_contact = False
+        self.ball_table_contact = False
+        self.ball_wall_contact = False
+        self.ball_cup_contact = False
+        self.noisy_bp = noisy
+        self._t_min_final_dist = -1
+
+    def compute_reward(self, env, action):
+        self.ball_id = env.sim.model._body_name2id["ball"]
+        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+        self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+        self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+        self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+        self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+        self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+        self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+        self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+        self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+        self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+
+        goal_pos = env.sim.data.site_xpos[self.goal_id]
+        ball_pos = env.sim.data.body_xpos[self.ball_id]
+        ball_vel = env.sim.data.body_xvelp[self.ball_id]
+        goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+        self.dists.append(np.linalg.norm(goal_pos - ball_pos))
+        self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
+
+        action_cost = np.sum(np.square(action))
+        self.action_costs.append(action_cost)
+
+        self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        if env._steps == env.ep_length - 1 or self._is_collided:
+
+            min_dist = np.min(self.dists)
+            ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                     self.table_collision_id)
+            ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+                                                                            self.cup_collision_ids)
+            ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                  self.wall_collision_id)
+            ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                               self.cup_table_collision_id)
+            if not ball_in_cup:
+                cost_offset = 2
+                if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont:
+                    cost_offset += 2
+                cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost
+            else:
+                cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7
+
+            reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
+            success = ball_in_cup
+            crash = self._is_collided
+        else:
+            reward = - 1e-7 * action_cost
+            cost = 0
+            success = False
+            crash = False
+
+        infos = {}
+        infos["success"] = success
+        infos["is_collided"] = self._is_collided
+        infos["ball_pos"] = ball_pos.copy()
+        infos["ball_vel"] = ball_vel.copy()
+        infos["action_cost"] = 5e-4 * action_cost
+        infos["task_cost"] = cost
+
+        return reward, infos
+
+    def get_cost_offset(self):
+        if self.ball_ground_contact:
+            return 200
+
+        if not self.ball_table_contact:
+            return 100
+
+        if not self.ball_in_cup:
+            return 50
+
+        if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp:
+            return 10
+
+        return 0
+
+    def _check_collision_single_objects(self, sim, id_1, id_2):
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+
+            if collision or collision_trans:
+                return True
+        return False
+
+    def _check_collision_with_itself(self, sim, collision_ids):
+        col_1, col_2 = False, False
+        for j, id in enumerate(collision_ids):
+            col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+            if j != len(collision_ids) - 1:
+                col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+            else:
+                col_2 = False
+        collision = True if col_1 or col_2 else False
+        return collision
+
+    def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 in id_list and con.geom2 == id_1
+            collision_trans = con.geom1 == id_1 and con.geom2 in id_list
+
+            if collision or collision_trans:
+                return True
+        return False
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
new file mode 100644
index 0000000..87c6b7e
--- /dev/null
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@@ -0,0 +1,39 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def active_obs(self):
+        # TODO: @Max Filter observations correctly
+        return np.hstack([
+            [False] * 7,  # cos
+            [False] * 7,  # sin
+            # [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def start_pos(self):
+        return self._start_pos
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qpos[0:7].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qvel[0:7].copy()
+
+    @property
+    def goal_pos(self):
+        # TODO: @Max I think the default value of returning to the start is reasonable here
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
new file mode 100644
index 0000000..3b05c27
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
new file mode 100644
index 0000000..5ff94a2
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
new file mode 100644
index 0000000..c548448
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
new file mode 100644
index 0000000..495160d
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..b4bb322
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..7b2f001
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..f05174e
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
new file mode 100644
index 0000000..eb252d9
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
new file mode 100644
index 0000000..0a986fa
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c039f0d
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..250acaf
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
new file mode 100644
index 0000000..993d0f7
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
new file mode 100644
index 0000000..8448a3f
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
new file mode 100644
index 0000000..c36f88f
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
new file mode 100644
index 0000000..d00cac1
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
new file mode 100644
index 0000000..13d2f73
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
new file mode 100644
index 0000000..0d95239
Binary files /dev/null and b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl differ
diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py
new file mode 100644
index 0000000..c5e6d2f
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/__init__.py
@@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
new file mode 100644
index 0000000..86d5a00
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@@ -0,0 +1,38 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def active_obs(self):
+        # TODO: @Max Filter observations correctly
+        return np.hstack([
+            [True] * 7,  # Joint Pos
+            [True] * 3, # Ball pos
+            [True] * 3  # goal pos
+        ])
+
+    @property
+    def start_pos(self):
+        return self.self.init_qpos_tt
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qpos[:7].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qvel[:7].copy()
+
+    @property
+    def goal_pos(self):
+        # TODO: @Max I think the default value of returning to the start is reasonable here
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
new file mode 100644
index 0000000..635d49d
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@@ -0,0 +1,168 @@
+import os
+
+import numpy as np
+import mujoco_py
+from gym import utils, spaces
+from gym.envs.mujoco import MujocoEnv
+
+from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
+from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
+
+#TODO: Check for simulation stability. Make sure the code runs even for sim crash
+
+MAX_EPISODE_STEPS = 1375
+BALL_NAME_CONTACT = "target_ball_contact"
+BALL_NAME = "target_ball"
+TABLE_NAME = 'table_tennis_table'
+FLOOR_NAME = 'floor'
+PADDLE_CONTACT_1_NAME = 'bat'
+PADDLE_CONTACT_2_NAME = 'bat_back'
+RACKET_NAME = 'bat'
+# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
+CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
+CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
+
+class TT_Env_Gym(MujocoEnv, utils.EzPickle):
+
+    def __init__(self, ctxt_dim=2):
+        model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
+
+        self.ctxt_dim = ctxt_dim
+        if ctxt_dim == 2:
+            self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
+            self.goal = np.zeros(3)  # 2 x,y + 1z
+        elif ctxt_dim == 4:
+            self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
+            self.goal = np.zeros(3)
+        else:
+            raise ValueError("either 2 or 4 dimensional Contexts available")
+
+        action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
+        action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
+        self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
+
+        self.time_steps = 0
+        self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
+        self.init_qvel_tt = np.zeros(10)
+
+        self.reward_func = TT_Reward(self.ctxt_dim)
+        self.ball_landing_pos = None
+        self.hited_ball = False
+        self.ball_contact_after_hit = False
+        self._ids_set = False
+        super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1)
+        self.ball_id = self.sim.model._body_name2id[BALL_NAME]  # find the proper -> not protected func.
+        self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+        self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+        self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+        self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME]  # check if we need both or only this
+        self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME]  # check if we need both or only this
+        self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+
+    def _set_ids(self):
+        self.ball_id = self.sim.model._body_name2id[BALL_NAME]  # find the proper -> not protected func.
+        self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+        self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+        self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME]  # check if we need both or only this
+        self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME]  # check if we need both or only this
+        self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+        self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+        self._ids_set = True
+
+    def _get_obs(self):
+        ball_pos = self.sim.data.body_xpos[self.ball_id]
+        obs = np.concatenate([self.sim.data.qpos[:7].copy(),  # 7 joint positions
+                              ball_pos,
+                              self.goal.copy()])
+        return obs
+
+    def sample_context(self):
+        return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
+
+    def reset_model(self):
+        self.set_state(self.init_qpos_tt, self.init_qvel_tt)    # reset to initial sim state
+        self.time_steps = 0
+        self.ball_landing_pos = None
+        self.hited_ball = False
+        self.ball_contact_after_hit = False
+        self.goal = self.sample_context()[:2]
+        if self.ctxt_dim == 2:
+            initial_ball_state = ball_init(random=False)  # fixed velocity, fixed position
+        elif self.ctxt_dim == 4:
+            initial_ball_state = ball_init(random=False)#raise NotImplementedError
+
+        self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
+        self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
+        self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
+
+        self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
+        self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
+        self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
+
+        z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
+        self.goal = z_extended_goal_pos
+        self.sim.model.body_pos[5] = self.goal[:3]          # Desired Landing Position, Yellow
+        self.sim.model.body_pos[3] = np.array([0, 0, 0.5])  # Outgoing Ball Landing Position, Green
+        self.sim.model.body_pos[4] = np.array([0, 0, 0.5])  # Incoming Ball Landing Position, Red
+        self.sim.forward()
+
+        self.reward_func.reset(self.goal)                   # reset the reward function
+        return self._get_obs()
+
+    def _contact_checker(self, id_1, id_2):
+        for coni in range(0, self.sim.data.ncon):
+            con = self.sim.data.contact[coni]
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+            if collision or collision_trans:
+                return True
+        return False
+
+    def step(self, action):
+        if not self._ids_set:
+            self._set_ids()
+        done = False
+        episode_end = False if self.time_steps+1<MAX_EPISODE_STEPS else True
+        if not self.hited_ball:
+            self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side
+            if not self.hited_ball:
+                self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side
+        if self.hited_ball:
+            if not self.ball_contact_after_hit:
+                if self._contact_checker(self.ball_contact_id, self.floor_contact_id):  # first check contact with floor
+                    self.ball_contact_after_hit = True
+                    self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
+                elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table
+                    self.ball_contact_after_hit = True
+                    self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
+        c_ball_pos = self.sim.data.body_xpos[self.ball_id]
+        racket_pos = self.sim.data.geom_xpos[self.racket_id]        # TODO: use this to reach out the position of the paddle?
+        if self.ball_landing_pos is not None:
+            done = True
+            episode_end =True
+        reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hited_ball, self.ball_landing_pos)
+        self.time_steps += 1
+        # gravity compensation on joints:
+        #action += self.sim.data.qfrc_bias[:7].copy()
+        try:
+            self.do_simulation(action, self.frame_skip)
+        except mujoco_py.MujocoException as e:
+            print('Simulation got unstable returning')
+            done = True
+            reward = -25
+        ob = self._get_obs()
+        return ob, reward, done, {"hit_ball":self.hited_ball}# might add some information here ....
+
+    def set_context(self, context):
+        old_state = self.sim.get_state()
+        qpos = old_state.qpos.copy()
+        qvel = old_state.qvel.copy()
+        self.set_state(qpos, qvel)
+        self.goal = context
+        z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77]))
+        if self.ctxt_dim == 4:
+            z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77]))
+        self.goal = z_extended_goal_pos
+        self.sim.model.body_pos[5] = self.goal[:3]      # TODO: Missing: Setting the desired incomoing landing position
+        self.sim.forward()
+        return self._get_obs()
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_reward.py b/alr_envs/alr/mujoco/table_tennis/tt_reward.py
new file mode 100644
index 0000000..eab2dd3
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/tt_reward.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+
+class TT_Reward:
+
+    def __init__(self, ctxt_dim):
+        self.ctxt_dim = ctxt_dim
+        self.c_goal = None          # current desired landing point
+        self.c_ball_traj = []
+        self.c_racket_traj = []
+        self.constant = 8
+
+    def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos):
+        self.c_ball_traj.append(ball_position.copy())
+        self.c_racket_traj.append(racket_pos.copy())
+        if not episode_end:
+            return 0
+        else:
+            # # seems to work for episodic case
+            min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
+            if not hited_ball:
+                return 0.2 * (1- np.tanh(min_r_b_dist**2))
+            else:
+                if ball_landing_pos is None:
+                    min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))
+                    return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
+                else:
+                    min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2])
+                    over_net_bonus = int(ball_landing_pos[0] < 0)
+                    return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
+
+
+            # if not hited_ball:
+            #     min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
+            #     return -min_r_b_dist
+            # else:
+            #     if ball_landing_pos is None:
+            #         dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant
+            #     else:
+            #         dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant
+            #     if dist_to_des_pos < -0.2:
+            #         dist_to_des_pos = -0.2
+            #     return -dist_to_des_pos
+
+    def reset(self, goal):
+        self.c_goal = goal.copy()
+        self.c_ball_traj = []
+        self.c_racket_traj = []
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_utils.py b/alr_envs/alr/mujoco/table_tennis/tt_utils.py
new file mode 100644
index 0000000..04a1b97
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/tt_utils.py
@@ -0,0 +1,26 @@
+import numpy as np
+
+
+def ball_init(random=False, context_range=None):
+    if random:
+        dx = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+        dy = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+        dz = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+
+        v_x = np.random.uniform(1.7, 1.8)
+        v_y = np.random.uniform(0.7, 0.8)
+        v_z = np.random.uniform(0.1, 0.2)
+    else:
+        dx = 1
+        dy = 0
+        dz = 0.05
+
+        v_x = 2.5
+        v_y = 2
+        v_z = 0.5
+
+    initial_x = 0 + dx - 1.2
+    initial_y = -0.2 + dy - 0.6
+    initial_z = 0.3 + dz + 1.5
+    initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
+    return initial_ball_state
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
new file mode 100644
index 0000000..ec94d96
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
@@ -0,0 +1,12 @@
+<mujocoinclude>
+    <actuator>
+        <motor name="wam/base_motor" joint="wam/base_yaw_joint_right" ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0"/>
+        <motor name="wam/shoulder_pitch_motor" joint='wam/shoulder_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0"/>
+        <motor name="wam/shoulder_yaw_motor" joint='wam/shoulder_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0"/>
+        <motor name="wam/elbow_pitch_motor" joint='wam/elbow_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0"/>
+        <motor name="wam/wrist_yaw_motor" joint='wam/wrist_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
+        <motor name="wam/wrist_pitch_motor" joint='wam/wrist_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
+        <motor name="wam/palm_yaw_motor" joint='wam/palm_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0"/>
+    </actuator>
+</mujocoinclude>
+
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
new file mode 100644
index 0000000..7e04c28
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
@@ -0,0 +1,103 @@
+<mujocoinclue>
+    <body name="wam/base_link_right" pos="2.1 0 2.0" quat="0 0 1 0" childclass="wam" >
+            <inertial pos="0 0 0" mass="1" diaginertia="0.1 0.1 0.1"/>
+            <geom name="base_link_fine" class="viz" mesh="base_link_fine" rgba="0.5 0.5 0.5 0"/>
+            <geom name="base_link_convex" class="col" mesh="base_link_convex" rgba="0.5 0.5 0.5 1"/>
+            <body name="wam/shoulder_yaw_link_right" pos="0 0 0.346">
+                <inertial pos="-0.00443422 -0.00066489 -0.128904" quat="0.69566 0.716713 -0.0354863 0.0334839" mass="5"
+                          diaginertia="0.135089 0.113095 0.0904426"/>
+<!--                control 0: 1.6-->
+                <joint name="wam/base_yaw_joint_right" range="-2.6 2.6" damping="1.98"/>
+                <geom name="shoulder_link_fine" class="viz" mesh="shoulder_link_fine" rgba="1 1 1 0"/>
+                <geom name="shoulder_link_convex_decomposition_p1" class="col"
+                      mesh="shoulder_link_convex_decomposition_p1"/>
+                <geom name="shoulder_link_convex_decomposition_p2" class="col"
+                      mesh="shoulder_link_convex_decomposition_p2"/>
+                <geom name="shoulder_link_convex_decomposition_p3" class="col"
+                      mesh="shoulder_link_convex_decomposition_p3"/>
+                <body name="wam/shoulder_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                    <inertial pos="-0.00236981 -0.0154211 0.0310561" quat="0.961794 0.273112 -0.0169316 0.00866592"
+                              mass="3.87494" diaginertia="0.0214195 0.0167127 0.0126452"/> <!--seems off-->
+<!--                    control 1: 0-->
+                    <joint name="wam/shoulder_pitch_joint_right" range="-2 2" damping="0.55"/>
+                    <geom name="shoulder_pitch_link_fine" class="viz" mesh="shoulder_pitch_link_fine" rgba="1 1 1 0"/>
+                    <geom name="shoulder_pitch_link_convex" class="col" mesh="shoulder_pitch_link_convex"/>
+                    <body name="wam/upper_arm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
+                        <inertial pos="0.00683259 3.309e-005 0.392492" quat="0.647136 0.0170822 0.0143038 0.762049"
+                                  mass="2.20228" diaginertia="0.0592718 0.0592207 0.00313419"/>
+<!--                        control 2: 0-->
+                        <joint name="wam/shoulder_yaw_joint_right" range="-2.8 2.8" damping="1.65"/>
+                        <geom name="upper_arm_link_fine" class="viz" mesh="upper_arm_link_fine" rgba="1 1 1 0"/>
+                        <geom name="upper_arm_link_convex_decomposition_p1" class="col"
+                              mesh="upper_arm_link_convex_decomposition_p1" rgba="0.094 0.48 0.804 1"/>
+                        <geom name="upper_arm_link_convex_decomposition_p2" class="col"
+                              mesh="upper_arm_link_convex_decomposition_p2" rgba="0.094 0.48 0.804 1"/>
+                        <body name="wam/forearm_link_right" pos="0.045 0 0.55" quat="0.707107 -0.707107 0 0">
+                            <inertial pos="-0.0400149 -0.142717 -0.00022942"
+                                      quat="0.704281 0.706326 0.0180333 0.0690353" mass="0.500168"
+                                      diaginertia="0.0151047 0.0148285 0.00275805"/>
+<!--                            control 3: 2.4-->
+                            <joint name="wam/elbow_pitch_joint_right" range="-0.9 3.1" damping="0.88"/>
+                            <geom name="elbow_link_fine" class="viz" mesh="elbow_link_fine" rgba="1 1 1 0"/>
+                            <geom name="elbow_link_convex" class="col" mesh="elbow_link_convex"/>
+                            <geom name="forearm_link_fine" class="viz" mesh="forearm_link_fine" pos="-.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="1 1 1 0"/>
+                            <geom name="forearm_link_convex_decomposition_p1" class="col"
+                                  mesh="forearm_link_convex_decomposition_p1" pos="-0.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
+                            <geom name="forearm_link_convex_decomposition_p2" class="col"
+                                  mesh="forearm_link_convex_decomposition_p2" pos="-.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
+                            <body name="wam/wrist_yaw_link_right" pos="-0.045 -0.3 0" quat="0.707107 0.707107 0 0">
+                                <inertial pos="8.921e-005 0.00435824 -0.00511217"
+                                          quat="0.630602 0.776093 0.00401969 -0.002372" mass="1.05376"
+                                          diaginertia="0.000555168 0.00046317 0.000234072"/> <!--this is an approximation-->
+<!--                                control 4: 0-->
+                                <joint name="wam/wrist_yaw_joint_right" range="-4.8 1.3" damping="0.55"/>
+                                <geom name="wrist_yaw_link_fine" class="viz" mesh="wrist_yaw_link_fine" rgba="1 1 1 0"/>
+                                <geom name="wrist_yaw_link_convex_decomposition_p1" class="col"
+                                      mesh="wrist_yaw_link_convex_decomposition_p1"/>
+                                <geom name="wrist_yaw_link_convex_decomposition_p2" class="col"
+                                      mesh="wrist_yaw_link_convex_decomposition_p2"/>
+                                <body name="wam/wrist_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                                    <inertial pos="-0.00012262 -0.0246834 -0.0170319"
+                                              quat="0.630602 0.776093 0.00401969 -0.002372" mass="0.517974"
+                                              diaginertia="0.000555168 0.00046317 0.000234072"/>
+<!--                                    control 5: 0-->
+                                    <joint name="wam/wrist_pitch_joint_right" range="-1.6 1.6" damping="0.11"/>
+                                    <geom name="wrist_pitch_link_fine" class="viz" mesh="wrist_pitch_link_fine"
+                                          rgba="1 1 1 0"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p1" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p1"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p2" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p2"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p3" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p3"/>
+                                    <body name="wam/wrist_palm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
+                                        <inertial pos="0 0 0.055" quat="0.707107 0 0 0.707107" mass="0.0828613"
+                                                  diaginertia="0.00020683 0.00010859 0.00010851"/>
+<!--                                        control 6: 1.8-->
+                                        <joint name="wam/palm_yaw_joint_right" range="-2.2 2.2" damping="0.11"/>
+                                        <geom name="wrist_palm_link_fine" class="viz" mesh="wrist_palm_link_fine"
+                                              rgba="1 1 1 0"/>
+                                        <geom name="wrist_palm_link_convex" class="col" mesh="wrist_palm_link_convex"/>
+<!--                                        EE=wam/paddle, configure name to the end effector name-->
+                                        <body name="EE" pos="0 0 0.26" childclass="contact_geom">
+                                            <geom name="bat" type="cylinder" size="0.075 0.005" rgba="1 0 0 1"
+                                                  quat="0.71 0 0.71 0"/>
+                                            <geom name="bat_back" type="cylinder" size="0.0749 0.0025" rgba="0 1 0 1"
+                                                  quat="0.71 0 0.71 0" pos="-0.0026 0 0"/>
+                                            <geom name="wam/paddle_handle" type="box" size="0.005 0.01 0.05" pos="0 0 -0.08"
+                                                  rgba="1 1 1 1"/>
+                                            <!--                                            Extract information for sampling goals.-->
+                                            <site name="wam/paddle_center" pos="0 0 0" rgba="1 1 1 1" size="0.00001"/>
+                                        </body>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+</mujocoinclue>
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
new file mode 100644
index 0000000..c313489
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
@@ -0,0 +1,30 @@
+<mujocoinclude>
+    <body name="table_tennis_table" pos="0 0 0">
+        <geom class="viz" name="table_base_1" pos="1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_2" pos="1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_3" pos="-1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_4" pos="-1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <body name="table_top" pos="0 0 0.76">
+            <geom class="contact_geom" name="table_tennis_table" pos="0 0 0" rgba="0 0 0.5 1" size="1.37 .7625 .01" type="box" />
+            <site name="left_up_corner" pos="-1.37 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="middle_up_corner" pos="0 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="left_down_corner" pos="-1.37 -0.7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="middle_down_corner" pos="0 -.7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <geom class="contact_geom" material="floor_plane" name="table_te_context_spacennis_net" pos="0 0 0.08625" rgba="0 0 1 0.5" size="0.01 0.915 0.07625" type="box" />
+            <geom class="viz" name="left_while_line" pos="0 -0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
+            <geom class="viz" name="center_while_line" pos="0 0 0.01" rgba="1 1 1 1" size="1.37 .01 .001" type="box" />
+            <geom class="viz" name="right_while_line" pos="0 0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
+            <geom class="viz" name="right_side_line" pos="1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
+            <geom class="viz" name="left_side_line" pos="-1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
+        </body>
+        <body name="achieved_pos" pos="0 0 0.5">
+            <geom class="viz" name="achieved_point_geom" pos="0 0 0" rgba="0 1 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+        <body name="right_achieved_pos" pos="0 0 0.5">
+            <geom class="viz" name="hitting_achieved_point_geom" pos="0 0 0" rgba="1 0 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+        <body name="target_point" pos="0 0 0.5">
+            <geom class="viz" name="target_point_geom" pos="0 0 0" rgba="1 1 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+    </body>
+</mujocoinclude>
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
new file mode 100644
index 0000000..19543e2
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
@@ -0,0 +1,10 @@
+<mujocoinclude>
+    <body name="target_ball" pos="0 0 0">
+        <joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <geom size="0.025 0.025 0.025" type="sphere" condim="4" name="target_ball_contact" rgba="1 1 0 1" mass="0.1"
+              friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="0.1 0.03" priority="1"/>
+        <site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
+    </body>
+</mujocoinclude>
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
new file mode 100644
index 0000000..25d0366
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
@@ -0,0 +1,47 @@
+<mujocoinclude>
+    <actuator>
+
+<!--                        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="162.0" />-->
+<!--                        <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="122.0" />-->
+<!--                        <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="102.0" />-->
+<!--                        <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" />-->
+
+<!--                <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="151.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="125.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="121.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+
+<!--                        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="600.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="500.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+
+                <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>
+                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="1000.0" ctrllimited="true"/>
+                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>
+
+
+<!--        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="1600.0" ctrllimited="true"/>-->
+<!--&lt;!&ndash;        <velocity ctrlrange="-50 50" joint="wam/base_yaw_joint_right" kv="100" ctrllimited="true"/>&ndash;&gt;-->
+
+<!--        <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="1200.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+    </actuator>
+</mujocoinclude>
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
new file mode 100644
index 0000000..e349992
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
@@ -0,0 +1,46 @@
+<mujocoinclude>
+    <default>
+        <default class="wam">
+            <joint type="hinge" limited="true" pos="0 0 0" axis="0 0 1"/>
+        </default>
+        <default class="viz">
+            <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 1 1 1"/>
+        </default>
+        <default class="col">
+            <geom type="mesh" contype="0" conaffinity="1" group="0" rgba="1 1 1 1"/>
+        </default>
+        <default class="contact_geom">
+            <geom condim="4" friction="0.1 0.1 0.1" margin="0" solimp="1 1 0" solref="0.1 0.03"/>
+        </default>
+    </default>
+    <asset>
+        <mesh file="base_link_fine.stl"/>
+        <mesh file="base_link_convex.stl"/>
+        <mesh file="shoulder_link_fine.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p1.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p2.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p3.stl"/>
+        <mesh file="shoulder_pitch_link_fine.stl"/>
+        <mesh file="shoulder_pitch_link_convex.stl"/>
+        <mesh file="upper_arm_link_fine.stl"/>
+        <mesh file="upper_arm_link_convex_decomposition_p1.stl"/>
+        <mesh file="upper_arm_link_convex_decomposition_p2.stl"/>
+        <mesh file="elbow_link_fine.stl"/>
+        <mesh file="elbow_link_convex.stl"/>
+        <mesh file="forearm_link_fine.stl"/>
+        <mesh file="forearm_link_convex_decomposition_p1.stl"/>
+        <mesh file="forearm_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_yaw_link_fine.stl"/>
+        <mesh file="wrist_yaw_link_convex_decomposition_p1.stl"/>
+        <mesh file="wrist_yaw_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_pitch_link_fine.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p1.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p3.stl"/>
+        <mesh file="wrist_palm_link_fine.stl"/>
+        <mesh file="wrist_palm_link_convex.stl"/>
+        <texture builtin="checker" height="512" name="texplane" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" type="2d"
+                 width="512"/>
+        <material name="floor_plane" reflectance="0.5" texrepeat="1 1" texture="texplane" texuniform="true"/>
+    </asset>
+</mujocoinclude>
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
new file mode 100644
index 0000000..9609a6e
--- /dev/null
+++ b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
@@ -0,0 +1,18 @@
+<mujoco model="table_tennis(v0.1)">
+    <compiler angle="radian" coordinate="local" meshdir="../../meshes/wam" />
+    <option gravity="0 0 -9.81" timestep="0.002">
+        <flag warmstart="enable" />
+    </option>
+    <custom>
+        <numeric data="0 0 0 0 0 0 0" name="START_ANGLES" />
+    </custom>
+    <include file="shared.xml" />
+    <worldbody>
+        <light cutoff="60" diffuse="1 1 1" dir="-.1 -.2 -1.3" directional="true" exponent="1" pos=".1 .2 1.3" specular=".1 .1 .1" />
+        <geom conaffinity="1" contype="1" material="floor_plane" name="floor" pos="0 0 0" size="10 5 1" type="plane" />
+        <include file="include_table.xml" />
+        <include file="include_barrett_wam_7dof_right.xml" />
+        <include file="include_target_ball.xml" />
+    </worldbody>
+    <include file="include_7_motor_actuator.xml" />
+</mujoco>
\ No newline at end of file
diff --git a/alr_envs/dmc/dmc_wrapper.py b/alr_envs/dmc/dmc_wrapper.py
index 10f1af9..aa6c7aa 100644
--- a/alr_envs/dmc/dmc_wrapper.py
+++ b/alr_envs/dmc/dmc_wrapper.py
@@ -15,10 +15,10 @@ def _spec_to_box(spec):
         assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
         dim = int(np.prod(s.shape))
         if type(s) == specs.Array:
-            bound = np.inf * np.ones(dim, dtype=np.float32)
+            bound = np.inf * np.ones(dim, dtype=s.dtype)
             return -bound, bound
         elif type(s) == specs.BoundedArray:
-            zeros = np.zeros(dim, dtype=np.float32)
+            zeros = np.zeros(dim, dtype=s.dtype)
             return s.minimum + zeros, s.maximum + zeros
 
     mins, maxs = [], []
@@ -29,7 +29,7 @@ def _spec_to_box(spec):
     low = np.concatenate(mins, axis=0)
     high = np.concatenate(maxs, axis=0)
     assert low.shape == high.shape
-    return spaces.Box(low, high, dtype=np.float32)
+    return spaces.Box(low, high, dtype=s.dtype)
 
 
 def _flatten_obs(obs: collections.MutableMapping):
@@ -113,7 +113,7 @@ class DMCWrapper(core.Env):
             if self._channels_first:
                 obs = obs.transpose(2, 0, 1).copy()
         else:
-            obs = _flatten_obs(time_step.observation)
+            obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
         return obs
 
     @property