Working Beerpong and TableTennis with MPWrapper

2021-11-05 11:04:04 +01:00 · 2021-11-05 11:04:04 +01:00 · 598a7d5512
commit 598a7d5512
parent ac969d490a
37 changed files with 1261 additions and 149 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -1,3 +1,4 @@
+import numpy as np
 from gym import register

 from . import classic_control, mujoco
@ -193,6 +194,32 @@ register(
    }
 )

+## Table Tennis
+from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
+register(id='TableTennis2DCtxt-v0',
+         entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+         max_episode_steps=MAX_EPISODE_STEPS,
+         kwargs={'ctxt_dim':2})
+
+register(id='TableTennis4DCtxt-v0',
+         entry_point='alr_envs.alr.mujoco:TT_Env_Gym',
+         max_episode_steps=MAX_EPISODE_STEPS,
+         kwargs={'ctxt_dim':4})
+
+## BeerPong
+difficulties = ["simple", "intermediate", "hard", "hardest"]
+
+for v, difficulty in enumerate(difficulties):
+    register(
+        id='ALRBeerPong-v{}'.format(v),
+        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+        max_episode_steps=600,
+        kwargs={
+            "difficulty": difficulty,
+            "reward_type": "staged",
+        }
+    )
+
 # Motion Primitive Environments

 ## Simple Reacher
@ -327,3 +354,59 @@ for _v in _versions:
        }
    )
    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id)
+
+## Beerpong
+register(
+    id='BeerpongDetPMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "alr_envs:ALRBeerPong-v0",
+        "wrappers": [mujoco.beerpong.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 7,
+            "num_basis": 2,
+            "n_zero_bases": 2,
+            "duration": 0.5,
+            "post_traj_time": 2.5,
+            "width": 0.01,
+            "off": 0.01,
+            "policy_type": "motor",
+            "weights_scale": 0.08,
+            "zero_start": True,
+            "zero_goal": False,
+            "policy_kwargs": {
+                "p_gains": np.array([       1.5,   5,   2.55,    3,   2.,    2,   1.25]),
+                "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+            }
+        }
+    }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("BeerpongDetPMP-v0")
+
+## Table Tennis
+register(
+    id='TableTennisDetPMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "alr_envs:TableTennis4DCtxt-v0",
+        "wrappers": [mujoco.table_tennis.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 7,
+            "num_basis": 2,
+            "n_zero_bases": 2,
+            "duration": 1.25,
+            "post_traj_time": 4.5,
+            "width": 0.01,
+            "off": 0.01,
+            "policy_type": "motor",
+            "weights_scale": 1.0,
+            "zero_start": True,
+            "zero_goal": False,
+            "policy_kwargs": {
+                "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
+                "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
+            }
+        }
+    }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("TableTennisDetPMP-v0")
--- a/alr_envs/alr/mujoco/init.py
+++ b/alr_envs/alr/mujoco/init.py
@ -1,4 +1,6 @@
 from .reacher.alr_reacher import ALRReacherEnv
 from .reacher.balancing import BalancingEnv
 from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
-from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
+from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
+from .table_tennis.tt_gym import TT_Env_Gym
+from .beerpong.beerpong import ALRBeerBongEnv
--- a/alr_envs/alr/mujoco/beerpong/init.py
+++ b/alr_envs/alr/mujoco/beerpong/init.py
@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml
@ -0,0 +1,187 @@
+<mujoco model="wam(v1.31)">
+    <compiler angle="radian" meshdir="../../meshes/wam/" />
+    <option timestep="0.005" integrator="Euler" />
+    <size njmax="500" nconmax="100" />
+    <default class="main">
+        <joint limited="true" frictionloss="0.001" damping="0.07"/>
+        <default class="viz">
+            <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="0.7 0.7 0.7 1" />
+        </default>
+        <default class="col">
+            <geom type="mesh" contype="0" rgba="0.5 0.6 0.7 1" />
+        </default>
+    </default>
+    <asset>
+        <texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.25 0.26 0.25" rgb2="0.22 0.22 0.22" markrgb="0.3 0.3 0.3" width="100" height="100" />
+        <material name="MatGnd" texture="groundplane" texrepeat="5 5" specular="1" shininess="0.3" reflectance="1e-05" />
+        <mesh name="base_link_fine" file="base_link_fine.stl" />
+        <mesh name="base_link_convex" file="base_link_convex.stl" />
+        <mesh name="shoulder_link_fine" file="shoulder_link_fine.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p1" file="shoulder_link_convex_decomposition_p1.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p2" file="shoulder_link_convex_decomposition_p2.stl" />
+        <mesh name="shoulder_link_convex_decomposition_p3" file="shoulder_link_convex_decomposition_p3.stl" />
+        <mesh name="shoulder_pitch_link_fine" file="shoulder_pitch_link_fine.stl" />
+        <mesh name="shoulder_pitch_link_convex" file="shoulder_pitch_link_convex.stl" />
+        <mesh name="upper_arm_link_fine" file="upper_arm_link_fine.stl" />
+        <mesh name="upper_arm_link_convex_decomposition_p1" file="upper_arm_link_convex_decomposition_p1.stl" />
+        <mesh name="upper_arm_link_convex_decomposition_p2" file="upper_arm_link_convex_decomposition_p2.stl" />
+        <mesh name="elbow_link_fine" file="elbow_link_fine.stl" />
+        <mesh name="elbow_link_convex" file="elbow_link_convex.stl" />
+        <mesh name="forearm_link_fine" file="forearm_link_fine.stl" />
+        <mesh name="forearm_link_convex_decomposition_p1" file="forearm_link_convex_decomposition_p1.stl" />
+        <mesh name="forearm_link_convex_decomposition_p2" file="forearm_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_yaw_link_fine" file="wrist_yaw_link_fine.stl" />
+        <mesh name="wrist_yaw_link_convex_decomposition_p1" file="wrist_yaw_link_convex_decomposition_p1.stl" />
+        <mesh name="wrist_yaw_link_convex_decomposition_p2" file="wrist_yaw_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_pitch_link_fine" file="wrist_pitch_link_fine.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p1" file="wrist_pitch_link_convex_decomposition_p1.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p2" file="wrist_pitch_link_convex_decomposition_p2.stl" />
+        <mesh name="wrist_pitch_link_convex_decomposition_p3" file="wrist_pitch_link_convex_decomposition_p3.stl" />
+        <mesh name="wrist_palm_link_fine" file="wrist_palm_link_fine.stl" />
+        <mesh name="wrist_palm_link_convex" file="wrist_palm_link_convex.stl" />
+        <mesh name="cup1" file="cup_split1.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup2" file="cup_split2.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup3" file="cup_split3.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup4" file="cup_split4.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup5" file="cup_split5.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup6" file="cup_split6.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup7" file="cup_split7.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup8" file="cup_split8.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup9" file="cup_split9.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup10" file="cup_split10.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup11" file="cup_split11.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup12" file="cup_split12.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup13" file="cup_split13.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup14" file="cup_split14.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup15" file="cup_split15.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup16" file="cup_split16.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup17" file="cup_split17.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup18" file="cup_split18.stl" scale="0.001 0.001 0.001" />
+        <mesh name="cup3_table" file="cup_split3.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup4_table" file="cup_split4.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup5_table" file="cup_split5.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup6_table" file="cup_split6.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup7_table" file="cup_split7.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup8_table" file="cup_split8.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup9_table" file="cup_split9.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup10_table" file="cup_split10.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup15_table" file="cup_split15.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup16_table" file="cup_split16.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup17_table" file="cup_split17.stl" scale="0.00211 0.00211 0.01" />
+        <mesh name="cup18_table" file="cup_split18.stl" scale="0.00211 0.00211 0.01" />
+    </asset>
+
+    <worldbody>
+        <geom name="ground" size="5 5 1" type="plane" material="MatGnd" />
+        <light pos="0.1 0.2 1.3" dir="-0.0758098 -0.32162 -0.985527" directional="true" cutoff="60" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1" />
+
+        <body name="wam/base_link" pos="0 0 0.6">
+            <inertial pos="6.93764e-06 0.0542887 0.076438" quat="0.496481 0.503509 -0.503703 0.496255" mass="27.5544" diaginertia="0.432537 0.318732 0.219528" />
+            <geom class="viz" quat="0.707107 0 0 -0.707107" mesh="base_link_fine" />
+            <geom class="col" quat="0.707107 0 0 -0.707107" mesh="base_link_convex" name="base_link_convex_geom"/>
+            <body name="wam/shoulder_yaw_link" pos="0 0 0.16" quat="0.707107 0 0 -0.707107">
+                <inertial pos="-0.00443422 -0.00066489 -0.12189" quat="0.999995 0.000984795 0.00270132 0.00136071" mass="10.7677" diaginertia="0.507411 0.462983 0.113271" />
+                <joint name="wam/base_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.6 2.6" />
+                <geom class="viz" pos="0 0 0.186" mesh="shoulder_link_fine" />
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p1" name="shoulder_link_convex_decomposition_p1_geom"/>
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p2" name="shoulder_link_convex_decomposition_p2_geom"/>
+                <geom class="col" pos="0 0 0.186" mesh="shoulder_link_convex_decomposition_p3" name="shoulder_link_convex_decomposition_p3_geom"/>
+                <body name="wam/shoulder_pitch_link" pos="0 0 0.184" quat="0.707107 -0.707107 0 0">
+                    <inertial pos="-0.00236983 -0.0154211 0.0310561" quat="0.961781 -0.272983 0.0167269 0.0133385" mass="3.87494" diaginertia="0.0214207 0.0167101 0.0126465" />
+                    <joint name="wam/shoulder_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.985 1.985" />
+                    <geom class="viz" mesh="shoulder_pitch_link_fine" />
+                    <geom class="col" mesh="shoulder_pitch_link_convex" />
+                    <body name="wam/upper_arm_link" pos="0 -0.505 0" quat="0.707107 0.707107 0 0">
+                        <inertial pos="-0.0382586 3.309e-05 -0.207508" quat="0.705455 0.0381914 0.0383402 0.706686" mass="1.80228" diaginertia="0.0665697 0.0634285 0.00622701" />
+                        <joint name="wam/shoulder_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.8 2.8" />
+                        <geom class="viz" pos="0 0 -0.505" mesh="upper_arm_link_fine" />
+                        <geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p1" name="upper_arm_link_convex_decomposition_p1_geom"/>
+                        <geom class="col" pos="0 0 -0.505" mesh="upper_arm_link_convex_decomposition_p2" name="upper_arm_link_convex_decomposition_p2_geom"/>
+                        <body name="wam/forearm_link" pos="0.045 0 0.045" quat="0.707107 -0.707107 0 0">
+                            <inertial pos="0.00498512 -0.132717 -0.00022942" quat="0.546303 0.447151 -0.548676 0.447842" mass="2.40017" diaginertia="0.0196896 0.0152225 0.00749914" />
+                            <joint name="wam/elbow_pitch_joint" pos="0 0 0" axis="0 0 1" range="-0.9 3.14159" />
+                            <geom class="viz" mesh="elbow_link_fine" />
+                            <geom class="col" mesh="elbow_link_convex" />
+                            <geom class="viz" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_fine" />
+                            <geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p1" name="forearm_link_convex_decomposition_p1_geom" />
+                            <geom class="col" pos="-0.045 -0.073 0" quat="0.707388 0.706825 0 0" mesh="forearm_link_convex_decomposition_p2" name="forearm_link_convex_decomposition_p2_geom" />
+                            <body name="wam/wrist_yaw_link" pos="-0.045 0 0" quat="0.707107 0.707107 0 0">
+                                <inertial pos="8.921e-05 0.00435824 -0.00511217" quat="0.708528 -0.000120667 0.000107481 0.705683" mass="0.12376" diaginertia="0.0112011 0.0111887 7.58188e-05" />
+                                <joint name="wam/wrist_yaw_joint" pos="0 0 0" axis="0 0 1" range="-4.55 1.25" />
+                                <geom class="viz" pos="0 0 0.3" mesh="wrist_yaw_link_fine" />
+                                <geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p1" name="wrist_yaw_link_convex_decomposition_p1_geom" />
+                                <geom class="col" pos="0 0 0.3" mesh="wrist_yaw_link_convex_decomposition_p2" name="wrist_yaw_link_convex_decomposition_p2_geom" />
+                                <body name="wam/wrist_pitch_link" pos="0 0 0.3" quat="0.707107 -0.707107 0 0">
+                                    <inertial pos="-0.00012262 -0.0246834 -0.0170319" quat="0.994687 -0.102891 0.000824211 -0.00336105" mass="0.417974" diaginertia="0.000555166 0.000463174 0.00023407" />
+                                    <joint name="wam/wrist_pitch_joint" pos="0 0 0" axis="0 0 1" range="-1.5707 1.5707" />
+                                    <geom class="viz" mesh="wrist_pitch_link_fine" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p1" name="wrist_pitch_link_convex_decomposition_p1_geom" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p2" name="wrist_pitch_link_convex_decomposition_p2_geom" />
+                                    <geom class="col" mesh="wrist_pitch_link_convex_decomposition_p3" name="wrist_pitch_link_convex_decomposition_p3_geom" />
+                                    <body name="wam/wrist_palm_link" pos="0 -0.06 0" quat="0.707107 0.707107 0 0">
+                                        <inertial pos="-7.974e-05 -0.00323552 -0.00016313" quat="0.594752 0.382453 0.382453 0.594752" mass="0.0686475" diaginertia="7.408e-05 3.81466e-05 3.76434e-05" />
+                                        <joint name="wam/palm_yaw_joint" pos="0 0 0" axis="0 0 1" range="-2.7 2.7" />
+                                        <geom class="viz" pos="0 0 -0.06" mesh="wrist_palm_link_fine" />
+                                        <geom class="col" pos="0 0 -0.06" mesh="wrist_palm_link_convex" name="wrist_palm_link_convex_geom" />
+                                        <site name="init_ball_pos_site" size="0.025 0.025 0.025" pos="0.0 0.0 0.035" rgba="0 1 0 1"/>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+        <body name="table_body" pos="0 -1.85 0.4025">
+            <geom name="table" type="box" size="0.4 0.6 0.4" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+            <geom name="table_contact_geom" type="box" size="0.4 0.6 0.1" pos="0 0 0.31" rgba="1.4 0.8 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+        </body>
+        <geom name="table_robot" type="box" size="0.1 0.1 0.3" pos="0 0.00 0.3025" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+        <geom name="wall" type="box" quat="1 0 0 0" size="0.4 0.04 1.1" pos="0. -2.45 1.1" rgba="0.8 0.655 0.45 1" solimp="0.999 0.999 0.001"
+                  solref="-10000 -100"/>
+
+        <body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">
+            <inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
+            <geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table5" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup5_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table6" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup6_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table7" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup7_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup8_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table9" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup9_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table10" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -010" type="mesh" mesh="cup10_table" mass="10"/>
+            <geom priority= "1" name="cup_base_table" pos="0 -0.035 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.08 0.045" solref="-10000 -100" mass="10"/>
+            <geom priority= "1" name="cup_base_table_contact" pos="0 0.015 0.1337249" euler="-1.57 0 0" type="cylinder" size="0.07 0.01" solref="-10000 -100" rgba="0 0 255 1" mass="10"/>
+            <geom priority= "1" name="cup_geom_table15" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup15_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table16" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup16_table" mass="10"/>
+            <geom priority= "1" name="cup_geom_table17" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup17_table" mass="10"/>
+            <geom priority= "1" name="cup_geom1_table8" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup18_table" mass="10"/>
+            <site name="cup_goal_table" pos="0 0.11 0.1337249" rgba="255 0 0 1"/>
+            <site name="cup_goal_final_table" pos="0.0 0.025 0.1337249" rgba="0 255 0 1"/>
+        </body>
+        <body name="ball" pos="0 0 0">
+            <joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0" limited="false"/>
+            <geom priority= "1" size="0.025 0.025 0.025" type="sphere" condim="4" name="ball_geom" rgba="0.8 0.2 0.1 1" mass="0.1"
+                  friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="-10000 -10"/>
+            <site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
+        </body>
+<!--        <camera name="visualization" mode="targetbody" target="wam/wrist_yaw_link" pos="1.5 -0.4 2.2"/>-->
+<!--        <camera name="experiment" mode="fixed" quat="0.44418059 0.41778323 0.54301123 0.57732103" pos="1.5 -0.3 1.33" />-->
+        <camera name="visualization" mode="fixed" euler="2.35 2.0 -0.75" pos="2.0 -0.0 1.85"/>
+    </worldbody>
+     <actuator>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0" joint="wam/base_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="wam/shoulder_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="50.0" joint="wam/shoulder_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0" joint="wam/elbow_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_yaw_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0" joint="wam/wrist_pitch_joint"/>
+        <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0" joint="wam/palm_yaw_joint"/>
+    </actuator>
+
+</mujoco>
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@ -1,3 +1,4 @@
+import mujoco_py.builder
 import os

 import numpy as np
@ -5,44 +6,67 @@ from gym import utils
 from gym.envs.mujoco import MujocoEnv


-
-class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
-    def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
-        utils.EzPickle.__init__(**locals())
-        MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
+class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
+    def __init__(self, frame_skip=1, apply_gravity_comp=True, reward_type: str = "staged", noisy=False,
+                 context: np.ndarray = None, difficulty='simple'):
        self._steps = 0

        self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
-                                     "beerpong" + ".xml")
-
-        self.start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
-        self.start_vel = np.zeros(7)
-
-        self._q_pos = []
-        self._q_vel = []
-        # self.weight_matrix_scale = 50
-        self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-        self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
-        self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
+                                     "beerpong_wo_cup" + ".xml")

        self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
        self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])

-        self.context = None
-        # alr_mujoco_env.AlrMujocoEnv.__init__(self,
-        #                                      self.xml_path,
-        #                                      apply_gravity_comp=apply_gravity_comp,
-        #                                      n_substeps=n_substeps)
+        self.context = context
+        self.apply_gravity_comp = apply_gravity_comp
+        self.add_noise = noisy

-        self.sim_time = 8  # seconds
-        self.sim_steps = int(self.sim_time / self.dt)
-        if reward_function is None:
-            from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward
-            reward_function = BeerpongReward
-        self.reward_function = reward_function(self.sim, self.sim_steps)
-        self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
-        self.ball_id = self.sim.model._body_name2id["ball"]
-        self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+        self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59])
+        self._start_vel = np.zeros(7)
+
+        self.ball_site_id = 0
+        self.ball_id = 11
+
+        self._release_step = 100  # time step of ball release
+
+        self.sim_time = 4  # seconds
+        self.ep_length = 600  # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt)
+        self.cup_table_id = 10
+
+        if noisy:
+            self.noise_std = 0.01
+        else:
+            self.noise_std = 0
+
+        if difficulty == 'simple':
+            self.cup_goal_pos = np.array([0, -1.7, 0.840])
+        elif difficulty == 'intermediate':
+            self.cup_goal_pos = np.array([0.3, -1.5, 0.840])
+        elif difficulty == 'hard':
+            self.cup_goal_pos = np.array([-0.3, -2.2, 0.840])
+        elif difficulty == 'hardest':
+            self.cup_goal_pos = np.array([-0.3, -1.2, 0.840])
+
+        if reward_type == "no_context":
+            from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerPongReward
+            reward_function = BeerPongReward
+        elif reward_type == "staged":
+            from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
+            reward_function = BeerPongReward
+        else:
+            raise ValueError("Unknown reward type: {}".format(reward_type))
+        self.reward_function = reward_function()
+
+        MujocoEnv.__init__(self, self.xml_path, frame_skip)
+        utils.EzPickle.__init__(self)
+
+    @property
+    def start_pos(self):
+        return self._start_pos
+
+    @property
+    def start_vel(self):
+        return self._start_vel

    @property
    def current_pos(self):
@ -52,9 +76,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
    def current_vel(self):
        return self.sim.data.qvel[0:7].copy()

-    def configure(self, context):
-        self.context = context
-        self.reward_function.reset(context)
+    def reset(self):
+        self.reward_function.reset(self.add_noise)
+        return super().reset()

    def reset_model(self):
        init_pos_all = self.init_qpos.copy()
@ -62,19 +86,14 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
        init_vel = np.zeros_like(init_pos_all)

        self._steps = 0
-        self._q_pos = []
-        self._q_vel = []

        start_pos = init_pos_all
        start_pos[0:7] = init_pos_robot
-        # start_pos[7:] = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])

        self.set_state(start_pos, init_vel)
-
-        ball_pos = np.copy(self.sim.data.site_xpos[self.cup_robot_id, :]) + np.array([0., 0.0, 0.05])
-        self.sim.model.body_pos[self.ball_id] = ball_pos.copy()
-        self.sim.model.body_pos[self.cup_table_id] = self.context.copy()
-
+        self.sim.model.body_pos[self.cup_table_id] = self.cup_goal_pos
+        start_pos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+        self.set_state(start_pos, init_vel)
        return self._get_obs()

    def step(self, a):
@ -82,32 +101,55 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
        angular_vel = 0.0
        reward_ctrl = - np.square(a).sum()

-        crash = self.do_simulation(a)
-        joint_cons_viol = self.check_traj_in_joint_limits()
-
-        self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
-        self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
+        if self.apply_gravity_comp:
+            a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
+        try:
+            self.do_simulation(a, self.frame_skip)
+            if self._steps < self._release_step:
+                self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
+                self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
+            elif self._steps == self._release_step and self.add_noise:
+                 self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
+            crash = False
+        except mujoco_py.builder.MujocoException:
+            crash = True
+        # joint_cons_viol = self.check_traj_in_joint_limits()

        ob = self._get_obs()

-        if not crash and not joint_cons_viol:
-            reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
-            done = success or self._steps == self.sim_steps - 1 or stop_sim
+        if not crash:
+            reward, reward_infos = self.reward_function.compute_reward(self, a)
+            success = reward_infos['success']
+            is_collided = reward_infos['is_collided']
+            ball_pos = reward_infos['ball_pos']
+            ball_vel = reward_infos['ball_vel']
+            done = is_collided or self._steps == self.ep_length - 1
            self._steps += 1
        else:
-            reward = -1000
+            reward = -30
            success = False
+            is_collided = False
            done = True
+            ball_pos = np.zeros(3)
+            ball_vel = np.zeros(3)
+
        return ob, reward, done, dict(reward_dist=reward_dist,
                                      reward_ctrl=reward_ctrl,
+                                      reward=reward,
                                      velocity=angular_vel,
-                                      traj=self._q_pos, is_success=success,
-                                      is_collided=crash or joint_cons_viol)
+                                      # traj=self._q_pos,
+                                      action=a,
+                                      q_pos=self.sim.data.qpos[0:7].ravel().copy(),
+                                      q_vel=self.sim.data.qvel[0:7].ravel().copy(),
+                                      ball_pos=ball_pos,
+                                      ball_vel=ball_vel,
+                                      is_success=success,
+                                      is_collided=is_collided, sim_crash=crash)

    def check_traj_in_joint_limits(self):
        return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)

-    # TODO
+    # TODO: extend observation space
    def _get_obs(self):
        theta = self.sim.data.qpos.flat[:7]
        return np.concatenate([
@ -118,28 +160,26 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
        ])

    # TODO
+    @property
    def active_obs(self):
-        pass
-
+        return np.hstack([
+            [False] * 7,  # cos
+            [False] * 7,  # sin
+            # [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])


 if __name__ == "__main__":
-    env = ALRBeerpongEnv()
-    ctxt = np.array([0, -2, 0.840])    # initial
+    env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest')

-    env.configure(ctxt)
+    # env.configure(ctxt)
    env.reset()
-    env.render()
-    for i in range(16000):
-        # test with random actions
-        ac = 0.0 * env.action_space.sample()[0:7]
-        ac[1] = -0.8
-        ac[3] = - 0.3
-        ac[5] = -0.2
-        # ac = env.start_pos
-        # ac[0] += np.pi/2
+    env.render("human")
+    for i in range(800):
+        ac = 10 * env.action_space.sample()[0:7]
        obs, rew, d, info = env.step(ac)
-        env.render()
+        env.render("human")

        print(rew)

@ -147,4 +187,3 @@ if __name__ == "__main__":
            break

    env.close()
-
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py
@ -1,124 +1,169 @@
 import numpy as np
-from alr_envs.alr.mujoco import alr_reward_fct


-class BeerpongReward(alr_reward_fct.AlrReward):
-    def __init__(self, sim, sim_time):
+class BeerPongReward:
+    def __init__(self):

-        self.sim = sim
-        self.sim_time = sim_time
+        self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+                                        "wrist_pitch_link_convex_decomposition_p1_geom",
+                                        "wrist_pitch_link_convex_decomposition_p2_geom",
+                                        "wrist_pitch_link_convex_decomposition_p3_geom",
+                                        "wrist_yaw_link_convex_decomposition_p1_geom",
+                                        "wrist_yaw_link_convex_decomposition_p2_geom",
+                                        "forearm_link_convex_decomposition_p1_geom",
+                                        "forearm_link_convex_decomposition_p2_geom",
+                                        "upper_arm_link_convex_decomposition_p1_geom",
+                                        "upper_arm_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p1_geom",
+                                        "shoulder_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p3_geom",
+                                        "base_link_convex_geom", "table_contact_geom"]

-        self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
-                                  "wrist_pitch_link_convex_decomposition_p1_geom",
-                                  "wrist_pitch_link_convex_decomposition_p2_geom",
-                                  "wrist_pitch_link_convex_decomposition_p3_geom",
-                                  "wrist_yaw_link_convex_decomposition_p1_geom",
-                                  "wrist_yaw_link_convex_decomposition_p2_geom",
-                                  "forearm_link_convex_decomposition_p1_geom",
-                                  "forearm_link_convex_decomposition_p2_geom"]
+        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+                                      # "cup_base_table", "cup_base_table_contact",
+                                      "cup_geom_table15",
+                                      "cup_geom_table16",
+                                      "cup_geom_table17", "cup_geom1_table8",
+                                      # "cup_base_table_contact",
+                                      # "cup_base_table"
+                                      ]

-        self.ball_id = None
-        self.ball_collision_id = None
-        self.goal_id = None
-        self.goal_final_id = None
-        self.collision_ids = None

        self.ball_traj = None
        self.dists = None
-        self.dists_ctxt = None
        self.dists_final = None
        self.costs = None
-
+        self.action_costs = None
+        self.angle_rewards = None
+        self.cup_angles = None
+        self.cup_z_axes = None
+        self.collision_penalty = 500
        self.reset(None)

    def reset(self, context):
-        self.ball_traj = np.zeros(shape=(self.sim_time, 3))
+        self.ball_traj = []
        self.dists = []
-        self.dists_ctxt = []
        self.dists_final = []
        self.costs = []
-        self.context = context
-        self.ball_in_cup = False
-        self.dist_ctxt = 5
+        self.action_costs = []
+        self.angle_rewards = []
+        self.cup_angles = []
+        self.cup_z_axes = []
+        self.ball_ground_contact = False
+        self.ball_table_contact = False
+        self.ball_wall_contact = False
+        self.ball_cup_contact = False

-        self.ball_id = self.sim.model._body_name2id["ball"]
-        self.ball_collision_id = self.sim.model._geom_name2id["ball_geom"]
-        self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"]
-        self.goal_id = self.sim.model._site_name2id["cup_goal_table"]
-        self.goal_final_id = self.sim.model._site_name2id["cup_goal_final_table"]
-        self.collision_ids = [self.sim.model._geom_name2id[name] for name in self.collision_objects]
-        self.cup_table_id = self.sim.model._body_name2id["cup_table"]
+    def compute_reward(self, env, action):
+        self.ball_id = env.sim.model._body_name2id["ball"]
+        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+        self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+        self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+        self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+        self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+        self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+        self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+        self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+        self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+        self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]

-    def compute_reward(self, action, sim, step):
-        action_cost = np.sum(np.square(action))
-
-        stop_sim = False
-        success = False
-
-        if self.check_collision(sim):
-            reward = - 1e-4 * action_cost - 1000
-            stop_sim = True
-            return reward, success, stop_sim
-
-        # Compute the current distance from the ball to the inner part of the cup
-        goal_pos = sim.data.site_xpos[self.goal_id]
-        ball_pos = sim.data.body_xpos[self.ball_id]
-        goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+        goal_pos = env.sim.data.site_xpos[self.goal_id]
+        ball_pos = env.sim.data.body_xpos[self.ball_id]
+        goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
        self.dists.append(np.linalg.norm(goal_pos - ball_pos))
        self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-        self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
-        self.ball_traj[step, :] = ball_pos

-        # Determine the first time when ball is in cup
-        if not self.ball_in_cup:
-            ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
-            self.ball_in_cup = ball_in_cup
-            if ball_in_cup:
-                dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
-                self.dist_ctxt = dist_to_ctxt
+        action_cost = np.sum(np.square(action))
+        self.action_costs.append(action_cost)
+
+        ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                 self.table_collision_id)
+
+        if ball_table_bounce:  # or ball_cup_table_cont or ball_wall_con
+            self.ball_table_contact = True
+
+        ball_cup_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+                                                                        self.cup_collision_ids)
+        if ball_cup_cont:
+            self.ball_cup_contact = True
+
+        ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.wall_collision_id)
+        if ball_wall_cont and not self.ball_table_contact:
+            self.ball_wall_contact = True
+
+        ball_ground_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                   self.ground_collision_id)
+        if ball_ground_contact and not self.ball_table_contact:
+            self.ball_ground_contact = True
+
+        self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        if env._steps == env.ep_length - 1 or self._is_collided:

-        if step == self.sim_time - 1:
            min_dist = np.min(self.dists)
-            dist_final = self.dists_final[-1]
-            # dist_ctxt = self.dists_ctxt[-1]

-            # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
-            cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * self.dist_ctxt)
-            reward = np.exp(-1 * cost) - 1e-4 * action_cost
-            success = dist_final < 0.05 and self.dist_ctxt < 0.05
+            ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id)
+
+            cost_offset = 0
+
+            if self.ball_ground_contact:  #  or self.ball_wall_contact:
+                cost_offset += 2
+
+            if not self.ball_table_contact:
+                cost_offset += 2
+            
+            if not ball_in_cup:
+                cost_offset += 2
+                cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-4 * action_cost  # + min_dist ** 2
+            else:
+                if self.ball_cup_contact:
+                    cost_offset += 1
+                cost = cost_offset + self.dists_final[-1] ** 2 + 1e-4 * action_cost
+
+            reward = - 1*cost - self.collision_penalty * int(self._is_collided)
+            success = ball_in_cup and not self.ball_ground_contact and not self.ball_wall_contact and not self.ball_cup_contact
        else:
            reward = - 1e-4 * action_cost
            success = False

-        return reward, success, stop_sim
+        infos = {}
+        infos["success"] = success
+        infos["is_collided"] = self._is_collided
+        infos["ball_pos"] = ball_pos.copy()
+        infos["action_cost"] = 5e-4 * action_cost

-    def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
-        if not ball_in_cup:
-            cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
-        else:
-            cost = 2 * dist_to_ctxt ** 2
-            print('Context Distance:', dist_to_ctxt)
-        return cost
+        return reward, infos

-    def check_ball_in_cup(self, sim, ball_collision_id):
-        cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
+    def _check_collision_single_objects(self, sim, id_1, id_2):
        for coni in range(0, sim.data.ncon):
            con = sim.data.contact[coni]

-            collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
-            collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1

            if collision or collision_trans:
                return True
        return False

-    def check_collision(self, sim):
+    def _check_collision_with_itself(self, sim, collision_ids):
+        col_1, col_2 = False, False
+        for j, id in enumerate(collision_ids):
+            col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+            if j != len(collision_ids) - 1:
+                col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+            else:
+                col_2 = False
+        collision = True if col_1 or col_2 else False
+        return collision
+
+    def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
        for coni in range(0, sim.data.ncon):
            con = sim.data.contact[coni]

-            collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
-            collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
+            collision = con.geom1 in id_list and con.geom2 == id_1
+            collision_trans = con.geom1 == id_1 and con.geom2 in id_list

            if collision or collision_trans:
                return True
-        return False
+        return False
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@ -0,0 +1,169 @@
+import numpy as np
+
+
+class BeerPongReward:
+    def __init__(self):
+
+        self.robot_collision_objects = ["wrist_palm_link_convex_geom",
+                                        "wrist_pitch_link_convex_decomposition_p1_geom",
+                                        "wrist_pitch_link_convex_decomposition_p2_geom",
+                                        "wrist_pitch_link_convex_decomposition_p3_geom",
+                                        "wrist_yaw_link_convex_decomposition_p1_geom",
+                                        "wrist_yaw_link_convex_decomposition_p2_geom",
+                                        "forearm_link_convex_decomposition_p1_geom",
+                                        "forearm_link_convex_decomposition_p2_geom",
+                                        "upper_arm_link_convex_decomposition_p1_geom",
+                                        "upper_arm_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p1_geom",
+                                        "shoulder_link_convex_decomposition_p2_geom",
+                                        "shoulder_link_convex_decomposition_p3_geom",
+                                        "base_link_convex_geom", "table_contact_geom"]
+
+        self.cup_collision_objects = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "cup_geom_table6",
+                                      "cup_geom_table7", "cup_geom_table8", "cup_geom_table9", "cup_geom_table10",
+                                      # "cup_base_table", "cup_base_table_contact",
+                                      "cup_geom_table15",
+                                      "cup_geom_table16",
+                                      "cup_geom_table17", "cup_geom1_table8",
+                                      # "cup_base_table_contact",
+                                      # "cup_base_table"
+                                      ]
+
+
+        self.ball_traj = None
+        self.dists = None
+        self.dists_final = None
+        self.costs = None
+        self.action_costs = None
+        self.angle_rewards = None
+        self.cup_angles = None
+        self.cup_z_axes = None
+        self.collision_penalty = 500
+        self.reset(None)
+
+    def reset(self, noisy):
+        self.ball_traj = []
+        self.dists = []
+        self.dists_final = []
+        self.costs = []
+        self.action_costs = []
+        self.angle_rewards = []
+        self.cup_angles = []
+        self.cup_z_axes = []
+        self.ball_ground_contact = False
+        self.ball_table_contact = False
+        self.ball_wall_contact = False
+        self.ball_cup_contact = False
+        self.noisy_bp = noisy
+        self._t_min_final_dist = -1
+
+    def compute_reward(self, env, action):
+        self.ball_id = env.sim.model._body_name2id["ball"]
+        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+        self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+        self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+        self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+        self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+        self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+        self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+        self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+        self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+        self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+
+        goal_pos = env.sim.data.site_xpos[self.goal_id]
+        ball_pos = env.sim.data.body_xpos[self.ball_id]
+        ball_vel = env.sim.data.body_xvelp[self.ball_id]
+        goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+        self.dists.append(np.linalg.norm(goal_pos - ball_pos))
+        self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
+
+        action_cost = np.sum(np.square(action))
+        self.action_costs.append(action_cost)
+
+        self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        if env._steps == env.ep_length - 1 or self._is_collided:
+
+            min_dist = np.min(self.dists)
+            ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                     self.table_collision_id)
+            ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id,
+                                                                            self.cup_collision_ids)
+            ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                                  self.wall_collision_id)
+            ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
+                                                               self.cup_table_collision_id)
+            if not ball_in_cup:
+                cost_offset = 2
+                if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont:
+                    cost_offset += 2
+                cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost
+            else:
+                cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7
+
+            reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
+            success = ball_in_cup
+            crash = self._is_collided
+        else:
+            reward = - 1e-7 * action_cost
+            cost = 0
+            success = False
+            crash = False
+
+        infos = {}
+        infos["success"] = success
+        infos["is_collided"] = self._is_collided
+        infos["ball_pos"] = ball_pos.copy()
+        infos["ball_vel"] = ball_vel.copy()
+        infos["action_cost"] = 5e-4 * action_cost
+        infos["task_cost"] = cost
+
+        return reward, infos
+
+    def get_cost_offset(self):
+        if self.ball_ground_contact:
+            return 200
+
+        if not self.ball_table_contact:
+            return 100
+
+        if not self.ball_in_cup:
+            return 50
+
+        if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp:
+            return 10
+
+        return 0
+
+    def _check_collision_single_objects(self, sim, id_1, id_2):
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+
+            if collision or collision_trans:
+                return True
+        return False
+
+    def _check_collision_with_itself(self, sim, collision_ids):
+        col_1, col_2 = False, False
+        for j, id in enumerate(collision_ids):
+            col_1 = self._check_collision_with_set_of_objects(sim, id, collision_ids[:j])
+            if j != len(collision_ids) - 1:
+                col_2 = self._check_collision_with_set_of_objects(sim, id, collision_ids[j + 1:])
+            else:
+                col_2 = False
+        collision = True if col_1 or col_2 else False
+        return collision
+
+    def _check_collision_with_set_of_objects(self, sim, id_1, id_list):
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 in id_list and con.geom2 == id_1
+            collision_trans = con.geom1 == id_1 and con.geom2 in id_list
+
+            if collision or collision_trans:
+                return True
+        return False
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
@ -0,0 +1,39 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def active_obs(self):
+        # TODO: @Max Filter observations correctly
+        return np.hstack([
+            [False] * 7,  # cos
+            [False] * 7,  # sin
+            # [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def start_pos(self):
+        return self._start_pos
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qpos[0:7].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qvel[0:7].copy()
+
+    @property
+    def goal_pos(self):
+        # TODO: @Max I think the default value of returning to the start is reasonable here
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_convex.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_dist_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_convex.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_med_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_finger_prox_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl
--- a/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_fine.stl
--- a/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
+++ b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_fine.stl
--- a/alr_envs/alr/mujoco/table_tennis/init.py
+++ b/alr_envs/alr/mujoco/table_tennis/init.py
@ -0,0 +1 @@
+from .mp_wrapper import MPWrapper
--- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
@ -0,0 +1,38 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def active_obs(self):
+        # TODO: @Max Filter observations correctly
+        return np.hstack([
+            [True] * 7,  # Joint Pos
+            [True] * 3, # Ball pos
+            [True] * 3  # goal pos
+        ])
+
+    @property
+    def start_pos(self):
+        return self.self.init_qpos_tt
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qpos[:7].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.sim.data.qvel[:7].copy()
+
+    @property
+    def goal_pos(self):
+        # TODO: @Max I think the default value of returning to the start is reasonable here
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
@ -0,0 +1,168 @@
+import os
+
+import numpy as np
+import mujoco_py
+from gym import utils, spaces
+from gym.envs.mujoco import MujocoEnv
+
+from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
+from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
+
+#TODO: Check for simulation stability. Make sure the code runs even for sim crash
+
+MAX_EPISODE_STEPS = 1375
+BALL_NAME_CONTACT = "target_ball_contact"
+BALL_NAME = "target_ball"
+TABLE_NAME = 'table_tennis_table'
+FLOOR_NAME = 'floor'
+PADDLE_CONTACT_1_NAME = 'bat'
+PADDLE_CONTACT_2_NAME = 'bat_back'
+RACKET_NAME = 'bat'
+# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
+CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
+CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
+
+class TT_Env_Gym(MujocoEnv, utils.EzPickle):
+
+    def __init__(self, ctxt_dim=2):
+        model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
+
+        self.ctxt_dim = ctxt_dim
+        if ctxt_dim == 2:
+            self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
+            self.goal = np.zeros(3)  # 2 x,y + 1z
+        elif ctxt_dim == 4:
+            self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
+            self.goal = np.zeros(3)
+        else:
+            raise ValueError("either 2 or 4 dimensional Contexts available")
+
+        action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
+        action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
+        self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
+
+        self.time_steps = 0
+        self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
+        self.init_qvel_tt = np.zeros(10)
+
+        self.reward_func = TT_Reward(self.ctxt_dim)
+        self.ball_landing_pos = None
+        self.hited_ball = False
+        self.ball_contact_after_hit = False
+        self._ids_set = False
+        super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1)
+        self.ball_id = self.sim.model._body_name2id[BALL_NAME]  # find the proper -> not protected func.
+        self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+        self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+        self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+        self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME]  # check if we need both or only this
+        self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME]  # check if we need both or only this
+        self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+
+    def _set_ids(self):
+        self.ball_id = self.sim.model._body_name2id[BALL_NAME]  # find the proper -> not protected func.
+        self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
+        self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
+        self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME]  # check if we need both or only this
+        self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME]  # check if we need both or only this
+        self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
+        self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
+        self._ids_set = True
+
+    def _get_obs(self):
+        ball_pos = self.sim.data.body_xpos[self.ball_id]
+        obs = np.concatenate([self.sim.data.qpos[:7].copy(),  # 7 joint positions
+                              ball_pos,
+                              self.goal.copy()])
+        return obs
+
+    def sample_context(self):
+        return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
+
+    def reset_model(self):
+        self.set_state(self.init_qpos_tt, self.init_qvel_tt)    # reset to initial sim state
+        self.time_steps = 0
+        self.ball_landing_pos = None
+        self.hited_ball = False
+        self.ball_contact_after_hit = False
+        self.goal = self.sample_context()[:2]
+        if self.ctxt_dim == 2:
+            initial_ball_state = ball_init(random=False)  # fixed velocity, fixed position
+        elif self.ctxt_dim == 4:
+            initial_ball_state = ball_init(random=False)#raise NotImplementedError
+
+        self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
+        self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
+        self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
+
+        self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
+        self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
+        self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
+
+        z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
+        self.goal = z_extended_goal_pos
+        self.sim.model.body_pos[5] = self.goal[:3]          # Desired Landing Position, Yellow
+        self.sim.model.body_pos[3] = np.array([0, 0, 0.5])  # Outgoing Ball Landing Position, Green
+        self.sim.model.body_pos[4] = np.array([0, 0, 0.5])  # Incoming Ball Landing Position, Red
+        self.sim.forward()
+
+        self.reward_func.reset(self.goal)                   # reset the reward function
+        return self._get_obs()
+
+    def _contact_checker(self, id_1, id_2):
+        for coni in range(0, self.sim.data.ncon):
+            con = self.sim.data.contact[coni]
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+            if collision or collision_trans:
+                return True
+        return False
+
+    def step(self, action):
+        if not self._ids_set:
+            self._set_ids()
+        done = False
+        episode_end = False if self.time_steps+1<MAX_EPISODE_STEPS else True
+        if not self.hited_ball:
+            self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side
+            if not self.hited_ball:
+                self.hited_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side
+        if self.hited_ball:
+            if not self.ball_contact_after_hit:
+                if self._contact_checker(self.ball_contact_id, self.floor_contact_id):  # first check contact with floor
+                    self.ball_contact_after_hit = True
+                    self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
+                elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table
+                    self.ball_contact_after_hit = True
+                    self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
+        c_ball_pos = self.sim.data.body_xpos[self.ball_id]
+        racket_pos = self.sim.data.geom_xpos[self.racket_id]        # TODO: use this to reach out the position of the paddle?
+        if self.ball_landing_pos is not None:
+            done = True
+            episode_end =True
+        reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hited_ball, self.ball_landing_pos)
+        self.time_steps += 1
+        # gravity compensation on joints:
+        #action += self.sim.data.qfrc_bias[:7].copy()
+        try:
+            self.do_simulation(action, self.frame_skip)
+        except mujoco_py.MujocoException as e:
+            print('Simulation got unstable returning')
+            done = True
+            reward = -25
+        ob = self._get_obs()
+        return ob, reward, done, {"hit_ball":self.hited_ball}# might add some information here ....
+
+    def set_context(self, context):
+        old_state = self.sim.get_state()
+        qpos = old_state.qpos.copy()
+        qvel = old_state.qvel.copy()
+        self.set_state(qpos, qvel)
+        self.goal = context
+        z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77]))
+        if self.ctxt_dim == 4:
+            z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77]))
+        self.goal = z_extended_goal_pos
+        self.sim.model.body_pos[5] = self.goal[:3]      # TODO: Missing: Setting the desired incomoing landing position
+        self.sim.forward()
+        return self._get_obs()
--- a/alr_envs/alr/mujoco/table_tennis/tt_reward.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_reward.py
@ -0,0 +1,48 @@
+import numpy as np
+
+
+class TT_Reward:
+
+    def __init__(self, ctxt_dim):
+        self.ctxt_dim = ctxt_dim
+        self.c_goal = None          # current desired landing point
+        self.c_ball_traj = []
+        self.c_racket_traj = []
+        self.constant = 8
+
+    def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos):
+        self.c_ball_traj.append(ball_position.copy())
+        self.c_racket_traj.append(racket_pos.copy())
+        if not episode_end:
+            return 0
+        else:
+            # # seems to work for episodic case
+            min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
+            if not hited_ball:
+                return 0.2 * (1- np.tanh(min_r_b_dist**2))
+            else:
+                if ball_landing_pos is None:
+                    min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))
+                    return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
+                else:
+                    min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2])
+                    over_net_bonus = int(ball_landing_pos[0] < 0)
+                    return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
+
+
+            # if not hited_ball:
+            #     min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
+            #     return -min_r_b_dist
+            # else:
+            #     if ball_landing_pos is None:
+            #         dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant
+            #     else:
+            #         dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant
+            #     if dist_to_des_pos < -0.2:
+            #         dist_to_des_pos = -0.2
+            #     return -dist_to_des_pos
+
+    def reset(self, goal):
+        self.c_goal = goal.copy()
+        self.c_ball_traj = []
+        self.c_racket_traj = []
--- a/alr_envs/alr/mujoco/table_tennis/tt_utils.py
+++ b/alr_envs/alr/mujoco/table_tennis/tt_utils.py
@ -0,0 +1,26 @@
+import numpy as np
+
+
+def ball_init(random=False, context_range=None):
+    if random:
+        dx = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+        dy = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+        dz = np.random.uniform(-0.1, 0.1)           # TODO: clarify these numbers?
+
+        v_x = np.random.uniform(1.7, 1.8)
+        v_y = np.random.uniform(0.7, 0.8)
+        v_z = np.random.uniform(0.1, 0.2)
+    else:
+        dx = 1
+        dy = 0
+        dz = 0.05
+
+        v_x = 2.5
+        v_y = 2
+        v_z = 0.5
+
+    initial_x = 0 + dx - 1.2
+    initial_y = -0.2 + dy - 0.6
+    initial_z = 0.3 + dz + 1.5
+    initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
+    return initial_ball_state
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
@ -0,0 +1,12 @@
+<mujocoinclude>
+    <actuator>
+        <motor name="wam/base_motor" joint="wam/base_yaw_joint_right" ctrllimited="true" ctrlrange="-1.0 1.0" gear="150.0"/>
+        <motor name="wam/shoulder_pitch_motor" joint='wam/shoulder_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="125.0"/>
+        <motor name="wam/shoulder_yaw_motor" joint='wam/shoulder_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="40.0"/>
+        <motor name="wam/elbow_pitch_motor" joint='wam/elbow_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="60.0"/>
+        <motor name="wam/wrist_yaw_motor" joint='wam/wrist_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
+        <motor name="wam/wrist_pitch_motor" joint='wam/wrist_pitch_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="5.0"/>
+        <motor name="wam/palm_yaw_motor" joint='wam/palm_yaw_joint_right' ctrllimited="true" ctrlrange="-1.0 1.0" gear="2.0"/>
+    </actuator>
+</mujocoinclude>
+
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
@ -0,0 +1,103 @@
+<mujocoinclue>
+    <body name="wam/base_link_right" pos="2.1 0 2.0" quat="0 0 1 0" childclass="wam" >
+            <inertial pos="0 0 0" mass="1" diaginertia="0.1 0.1 0.1"/>
+            <geom name="base_link_fine" class="viz" mesh="base_link_fine" rgba="0.5 0.5 0.5 0"/>
+            <geom name="base_link_convex" class="col" mesh="base_link_convex" rgba="0.5 0.5 0.5 1"/>
+            <body name="wam/shoulder_yaw_link_right" pos="0 0 0.346">
+                <inertial pos="-0.00443422 -0.00066489 -0.128904" quat="0.69566 0.716713 -0.0354863 0.0334839" mass="5"
+                          diaginertia="0.135089 0.113095 0.0904426"/>
+<!--                control 0: 1.6-->
+                <joint name="wam/base_yaw_joint_right" range="-2.6 2.6" damping="1.98"/>
+                <geom name="shoulder_link_fine" class="viz" mesh="shoulder_link_fine" rgba="1 1 1 0"/>
+                <geom name="shoulder_link_convex_decomposition_p1" class="col"
+                      mesh="shoulder_link_convex_decomposition_p1"/>
+                <geom name="shoulder_link_convex_decomposition_p2" class="col"
+                      mesh="shoulder_link_convex_decomposition_p2"/>
+                <geom name="shoulder_link_convex_decomposition_p3" class="col"
+                      mesh="shoulder_link_convex_decomposition_p3"/>
+                <body name="wam/shoulder_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                    <inertial pos="-0.00236981 -0.0154211 0.0310561" quat="0.961794 0.273112 -0.0169316 0.00866592"
+                              mass="3.87494" diaginertia="0.0214195 0.0167127 0.0126452"/> <!--seems off-->
+<!--                    control 1: 0-->
+                    <joint name="wam/shoulder_pitch_joint_right" range="-2 2" damping="0.55"/>
+                    <geom name="shoulder_pitch_link_fine" class="viz" mesh="shoulder_pitch_link_fine" rgba="1 1 1 0"/>
+                    <geom name="shoulder_pitch_link_convex" class="col" mesh="shoulder_pitch_link_convex"/>
+                    <body name="wam/upper_arm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
+                        <inertial pos="0.00683259 3.309e-005 0.392492" quat="0.647136 0.0170822 0.0143038 0.762049"
+                                  mass="2.20228" diaginertia="0.0592718 0.0592207 0.00313419"/>
+<!--                        control 2: 0-->
+                        <joint name="wam/shoulder_yaw_joint_right" range="-2.8 2.8" damping="1.65"/>
+                        <geom name="upper_arm_link_fine" class="viz" mesh="upper_arm_link_fine" rgba="1 1 1 0"/>
+                        <geom name="upper_arm_link_convex_decomposition_p1" class="col"
+                              mesh="upper_arm_link_convex_decomposition_p1" rgba="0.094 0.48 0.804 1"/>
+                        <geom name="upper_arm_link_convex_decomposition_p2" class="col"
+                              mesh="upper_arm_link_convex_decomposition_p2" rgba="0.094 0.48 0.804 1"/>
+                        <body name="wam/forearm_link_right" pos="0.045 0 0.55" quat="0.707107 -0.707107 0 0">
+                            <inertial pos="-0.0400149 -0.142717 -0.00022942"
+                                      quat="0.704281 0.706326 0.0180333 0.0690353" mass="0.500168"
+                                      diaginertia="0.0151047 0.0148285 0.00275805"/>
+<!--                            control 3: 2.4-->
+                            <joint name="wam/elbow_pitch_joint_right" range="-0.9 3.1" damping="0.88"/>
+                            <geom name="elbow_link_fine" class="viz" mesh="elbow_link_fine" rgba="1 1 1 0"/>
+                            <geom name="elbow_link_convex" class="col" mesh="elbow_link_convex"/>
+                            <geom name="forearm_link_fine" class="viz" mesh="forearm_link_fine" pos="-.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="1 1 1 0"/>
+                            <geom name="forearm_link_convex_decomposition_p1" class="col"
+                                  mesh="forearm_link_convex_decomposition_p1" pos="-0.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
+                            <geom name="forearm_link_convex_decomposition_p2" class="col"
+                                  mesh="forearm_link_convex_decomposition_p2" pos="-.045 -0.0730 0"
+                                  euler="1.57 0 0" rgba="0.094 0.48 0.804 1"/>
+                            <body name="wam/wrist_yaw_link_right" pos="-0.045 -0.3 0" quat="0.707107 0.707107 0 0">
+                                <inertial pos="8.921e-005 0.00435824 -0.00511217"
+                                          quat="0.630602 0.776093 0.00401969 -0.002372" mass="1.05376"
+                                          diaginertia="0.000555168 0.00046317 0.000234072"/> <!--this is an approximation-->
+<!--                                control 4: 0-->
+                                <joint name="wam/wrist_yaw_joint_right" range="-4.8 1.3" damping="0.55"/>
+                                <geom name="wrist_yaw_link_fine" class="viz" mesh="wrist_yaw_link_fine" rgba="1 1 1 0"/>
+                                <geom name="wrist_yaw_link_convex_decomposition_p1" class="col"
+                                      mesh="wrist_yaw_link_convex_decomposition_p1"/>
+                                <geom name="wrist_yaw_link_convex_decomposition_p2" class="col"
+                                      mesh="wrist_yaw_link_convex_decomposition_p2"/>
+                                <body name="wam/wrist_pitch_link_right" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                                    <inertial pos="-0.00012262 -0.0246834 -0.0170319"
+                                              quat="0.630602 0.776093 0.00401969 -0.002372" mass="0.517974"
+                                              diaginertia="0.000555168 0.00046317 0.000234072"/>
+<!--                                    control 5: 0-->
+                                    <joint name="wam/wrist_pitch_joint_right" range="-1.6 1.6" damping="0.11"/>
+                                    <geom name="wrist_pitch_link_fine" class="viz" mesh="wrist_pitch_link_fine"
+                                          rgba="1 1 1 0"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p1" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p1"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p2" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p2"/>
+                                    <geom name="wrist_pitch_link_convex_decomposition_p3" rgba="1 0.5 0.313 1"
+                                          class="col" mesh="wrist_pitch_link_convex_decomposition_p3"/>
+                                    <body name="wam/wrist_palm_link_right" pos="0 0 0" quat="0.707107 0.707107 0 0">
+                                        <inertial pos="0 0 0.055" quat="0.707107 0 0 0.707107" mass="0.0828613"
+                                                  diaginertia="0.00020683 0.00010859 0.00010851"/>
+<!--                                        control 6: 1.8-->
+                                        <joint name="wam/palm_yaw_joint_right" range="-2.2 2.2" damping="0.11"/>
+                                        <geom name="wrist_palm_link_fine" class="viz" mesh="wrist_palm_link_fine"
+                                              rgba="1 1 1 0"/>
+                                        <geom name="wrist_palm_link_convex" class="col" mesh="wrist_palm_link_convex"/>
+<!--                                        EE=wam/paddle, configure name to the end effector name-->
+                                        <body name="EE" pos="0 0 0.26" childclass="contact_geom">
+                                            <geom name="bat" type="cylinder" size="0.075 0.005" rgba="1 0 0 1"
+                                                  quat="0.71 0 0.71 0"/>
+                                            <geom name="bat_back" type="cylinder" size="0.0749 0.0025" rgba="0 1 0 1"
+                                                  quat="0.71 0 0.71 0" pos="-0.0026 0 0"/>
+                                            <geom name="wam/paddle_handle" type="box" size="0.005 0.01 0.05" pos="0 0 -0.08"
+                                                  rgba="1 1 1 1"/>
+                                            <!--                                            Extract information for sampling goals.-->
+                                            <site name="wam/paddle_center" pos="0 0 0" rgba="1 1 1 1" size="0.00001"/>
+                                        </body>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+</mujocoinclue>
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
@ -0,0 +1,30 @@
+<mujocoinclude>
+    <body name="table_tennis_table" pos="0 0 0">
+        <geom class="viz" name="table_base_1" pos="1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_2" pos="1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_3" pos="-1 -0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <geom class="viz" name="table_base_4" pos="-1 0.7 0.375" rgba="1 1 1 1" size="0.05 0.05 .375" type="box" />
+        <body name="table_top" pos="0 0 0.76">
+            <geom class="contact_geom" name="table_tennis_table" pos="0 0 0" rgba="0 0 0.5 1" size="1.37 .7625 .01" type="box" />
+            <site name="left_up_corner" pos="-1.37 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="middle_up_corner" pos="0 .7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="left_down_corner" pos="-1.37 -0.7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <site name="middle_down_corner" pos="0 -.7625 0.01" rgba="1 1 1 1" size="0.00001" />
+            <geom class="contact_geom" material="floor_plane" name="table_te_context_spacennis_net" pos="0 0 0.08625" rgba="0 0 1 0.5" size="0.01 0.915 0.07625" type="box" />
+            <geom class="viz" name="left_while_line" pos="0 -0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
+            <geom class="viz" name="center_while_line" pos="0 0 0.01" rgba="1 1 1 1" size="1.37 .01 .001" type="box" />
+            <geom class="viz" name="right_while_line" pos="0 0.7425 0.01" rgba="1 1 1 1" size="1.37 .02 .001" type="box" />
+            <geom class="viz" name="right_side_line" pos="1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
+            <geom class="viz" name="left_side_line" pos="-1.35 0 0.01" rgba="1 1 1 1" size="0.02 .7625 .001" type="box" />
+        </body>
+        <body name="achieved_pos" pos="0 0 0.5">
+            <geom class="viz" name="achieved_point_geom" pos="0 0 0" rgba="0 1 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+        <body name="right_achieved_pos" pos="0 0 0.5">
+            <geom class="viz" name="hitting_achieved_point_geom" pos="0 0 0" rgba="1 0 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+        <body name="target_point" pos="0 0 0.5">
+            <geom class="viz" name="target_point_geom" pos="0 0 0" rgba="1 1 0 1" size="0.02 0.001" type="cylinder" />
+        </body>
+    </body>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
@ -0,0 +1,10 @@
+<mujocoinclude>
+    <body name="target_ball" pos="0 0 0">
+        <joint axis="1 0 0" damping="0.0" name="tar:x" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <joint axis="0 1 0" damping="0.0" name="tar:y" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <joint axis="0 0 1" damping="0.0" name="tar:z" pos="0 0 0" stiffness="0" type="slide" frictionloss="0"/>
+        <geom size="0.025 0.025 0.025" type="sphere" condim="4" name="target_ball_contact" rgba="1 1 0 1" mass="0.1"
+              friction="0.1 0.1 0.1" solimp="0.9 0.95 0.001 0.5 2" solref="0.1 0.03" priority="1"/>
+        <site name="target_ball" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
+    </body>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
@ -0,0 +1,47 @@
+<mujocoinclude>
+    <actuator>
+
+<!--                        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="162.0" />-->
+<!--                        <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="122.0" />-->
+<!--                        <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" />-->
+<!--                        <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="102.0" />-->
+<!--                        <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" />-->
+
+<!--                <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="151.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="125.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="121.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+
+<!--                        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="600.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="122.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="500.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="103.0" ctrllimited="true"/>-->
+<!--                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="99.0" ctrllimited="true"/>-->
+
+                <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="800.0" ctrllimited="true"/>
+                <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>
+                <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="1000.0" ctrllimited="true"/>
+                <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>
+
+
+<!--        <position ctrlrange="-2.6 2.6" joint="wam/base_yaw_joint_right" kp="1600.0" ctrllimited="true"/>-->
+<!--&lt;!&ndash;        <velocity ctrlrange="-50 50" joint="wam/base_yaw_joint_right" kv="100" ctrllimited="true"/>&ndash;&gt;-->
+
+<!--        <position ctrlrange="-1.985 1.985" joint="wam/shoulder_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-2.8 2.8" joint="wam/shoulder_yaw_joint_right" kp="800.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-0.9 3.14159" joint="wam/elbow_pitch_joint_right" kp="1200.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-4.55 1.25" joint="wam/wrist_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-1.5707 1.5707" joint="wam/wrist_pitch_joint_right" kp="2000.0" ctrllimited="true"/>-->
+<!--        <position ctrlrange="-3 3" joint="wam/palm_yaw_joint_right" kp="100.0" ctrllimited="true"/>-->
+    </actuator>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
@ -0,0 +1,46 @@
+<mujocoinclude>
+    <default>
+        <default class="wam">
+            <joint type="hinge" limited="true" pos="0 0 0" axis="0 0 1"/>
+        </default>
+        <default class="viz">
+            <geom type="mesh" contype="0" conaffinity="0" group="1" rgba="1 1 1 1"/>
+        </default>
+        <default class="col">
+            <geom type="mesh" contype="0" conaffinity="1" group="0" rgba="1 1 1 1"/>
+        </default>
+        <default class="contact_geom">
+            <geom condim="4" friction="0.1 0.1 0.1" margin="0" solimp="1 1 0" solref="0.1 0.03"/>
+        </default>
+    </default>
+    <asset>
+        <mesh file="base_link_fine.stl"/>
+        <mesh file="base_link_convex.stl"/>
+        <mesh file="shoulder_link_fine.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p1.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p2.stl"/>
+        <mesh file="shoulder_link_convex_decomposition_p3.stl"/>
+        <mesh file="shoulder_pitch_link_fine.stl"/>
+        <mesh file="shoulder_pitch_link_convex.stl"/>
+        <mesh file="upper_arm_link_fine.stl"/>
+        <mesh file="upper_arm_link_convex_decomposition_p1.stl"/>
+        <mesh file="upper_arm_link_convex_decomposition_p2.stl"/>
+        <mesh file="elbow_link_fine.stl"/>
+        <mesh file="elbow_link_convex.stl"/>
+        <mesh file="forearm_link_fine.stl"/>
+        <mesh file="forearm_link_convex_decomposition_p1.stl"/>
+        <mesh file="forearm_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_yaw_link_fine.stl"/>
+        <mesh file="wrist_yaw_link_convex_decomposition_p1.stl"/>
+        <mesh file="wrist_yaw_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_pitch_link_fine.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p1.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p2.stl"/>
+        <mesh file="wrist_pitch_link_convex_decomposition_p3.stl"/>
+        <mesh file="wrist_palm_link_fine.stl"/>
+        <mesh file="wrist_palm_link_convex.stl"/>
+        <texture builtin="checker" height="512" name="texplane" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" type="2d"
+                 width="512"/>
+        <material name="floor_plane" reflectance="0.5" texrepeat="1 1" texture="texplane" texuniform="true"/>
+    </asset>
+</mujocoinclude>
--- a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
+++ b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
@ -0,0 +1,18 @@
+<mujoco model="table_tennis(v0.1)">
+    <compiler angle="radian" coordinate="local" meshdir="../../meshes/wam" />
+    <option gravity="0 0 -9.81" timestep="0.002">
+        <flag warmstart="enable" />
+    </option>
+    <custom>
+        <numeric data="0 0 0 0 0 0 0" name="START_ANGLES" />
+    </custom>
+    <include file="shared.xml" />
+    <worldbody>
+        <light cutoff="60" diffuse="1 1 1" dir="-.1 -.2 -1.3" directional="true" exponent="1" pos=".1 .2 1.3" specular=".1 .1 .1" />
+        <geom conaffinity="1" contype="1" material="floor_plane" name="floor" pos="0 0 0" size="10 5 1" type="plane" />
+        <include file="include_table.xml" />
+        <include file="include_barrett_wam_7dof_right.xml" />
+        <include file="include_target_ball.xml" />
+    </worldbody>
+    <include file="include_7_motor_actuator.xml" />
+</mujoco>