From 8fe6a83271e015c41710cd55861f5344695da982 Mon Sep 17 00:00:00 2001
From: Onur <celik@kit.edu>
Date: Tue, 28 Jun 2022 20:33:19 +0200
Subject: [PATCH] started cleaning up init. DMP envs are still not transferred.
 Wrappers for various environments still missing

---
 alr_envs/alr/__init__.py                      | 1204 ++++++-----------
 alr_envs/alr/mujoco/beerpong/beerpong.py      |   64 +-
 .../alr/mujoco/hopper_jump/hopper_jump.py     |   70 -
 3 files changed, 418 insertions(+), 920 deletions(-)

diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 09f533a..ec539db 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -21,6 +21,35 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
 
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
 
+DEFAULT_MP_ENV_DICT = {
+    "name": 'EnvName',
+    "wrappers": [],
+    "ep_wrapper_kwargs": {
+        "weight_scale": 1
+    },
+    "movement_primitives_kwargs": {
+        'movement_primitives_type': 'promp',
+        'action_dim': 7
+    },
+    "phase_generator_kwargs": {
+        'phase_generator_type': 'linear',
+        'delay': 0,
+        'tau': 1.5,  # initial value
+        'learn_tau': False,
+        'learn_delay': False
+    },
+    "controller_kwargs": {
+        'controller_type': 'motor',
+        "p_gains": np.ones(7),
+        "d_gains": np.ones(7) * 0.1,
+    },
+    "basis_generator_kwargs": {
+        'basis_generator_type': 'zero_rbf',
+        'num_basis': 5,
+        'num_basis_zero_start': 2
+    }
+}
+
 # Classic Control
 ## Simple Reacher
 register(
@@ -32,16 +61,6 @@ register(
     }
 )
 
-register(
-    id='SimpleReacher-v1',
-    entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 2,
-        "random_start": False
-    }
-)
-
 register(
     id='LongSimpleReacher-v0',
     entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
@@ -51,16 +70,6 @@ register(
     }
 )
 
-register(
-    id='LongSimpleReacher-v1',
-    entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 5,
-        "random_start": False
-    }
-)
-
 ## Viapoint Reacher
 
 register(
@@ -91,38 +100,6 @@ register(
     }
 )
 
-register(
-    id='HoleReacher-v1',
-    entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 5,
-        "random_start": False,
-        "allow_self_collision": False,
-        "allow_wall_collision": False,
-        "hole_width": 0.25,
-        "hole_depth": 1,
-        "hole_x": None,
-        "collision_penalty": 100,
-    }
-)
-
-register(
-    id='HoleReacher-v2',
-    entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 5,
-        "random_start": False,
-        "allow_self_collision": False,
-        "allow_wall_collision": False,
-        "hole_width": 0.25,
-        "hole_depth": 1,
-        "hole_x": 2,
-        "collision_penalty": 1,
-    }
-)
-
 # Mujoco
 
 ## Reacher
@@ -203,108 +180,39 @@ register(
     }
 )
 
-_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
-for i in _vs:
-    _env_id = f'ALRReacher{i}-v0'
-    register(
-        id=_env_id,
-        entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
-        max_episode_steps=200,
-        kwargs={
-            "steps_before_reward": 0,
-            "n_links": 5,
-            "balance": False,
-            'ctrl_cost_weight': i
-        }
-    )
-
-    _env_id = f'ALRReacherSparse{i}-v0'
-    register(
-        id=_env_id,
-        entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
-        max_episode_steps=200,
-        kwargs={
-            "steps_before_reward": 200,
-            "n_links": 5,
-            "balance": False,
-            'ctrl_cost_weight': i
-        }
-    )
-
-# CtxtFree are v0, Contextual are v1
 register(
     id='ALRAntJump-v0',
     entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
     max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
-        "context": False
-    }
-)
-
-# CtxtFree are v0, Contextual are v1
-register(
-    id='ALRAntJump-v1',
-    entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
         "context": True
     }
 )
 
-# CtxtFree are v0, Contextual are v1
 register(
     id='ALRHalfCheetahJump-v0',
     entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
     max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
-        "context": False
-    }
-)
-# CtxtFree are v0, Contextual are v1
-register(
-    id='ALRHalfCheetahJump-v1',
-    entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
         "context": True
     }
 )
-# CtxtFree are v0, Contextual are v1
+
 register(
     id='ALRHopperJump-v0',
     entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
     max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
-        "context": False,
-        "healthy_reward": 1.0
-    }
-)
-register(
-    id='ALRHopperJump-v1',
-    entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
         "context": True
     }
 )
 
+#### Hopper Jump random joints and des position
 register(
-    id='ALRHopperJump-v2',
-    entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
-    }
-)
-
-register(
-    id='ALRHopperJump-v3',
+    id='ALRHopperJumpRndmJointsDesPos-v0',
     entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
     max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
     kwargs={
@@ -314,9 +222,9 @@ register(
     }
 )
 
-##### Hopper Jump step based reward
+##### Hopper Jump random joints and des position step based reward
 register(
-    id='ALRHopperJump-v4',
+    id='ALRHopperJumpRndmJointsDesPosStepBased-v0',
     entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
     max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
     kwargs={
@@ -326,84 +234,40 @@ register(
     }
 )
 
-
-# CtxtFree are v0, Contextual are v1
 register(
     id='ALRHopperJumpOnBox-v0',
     entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
     max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
-        "context": False
-    }
-)
-# CtxtFree are v0, Contextual are v1
-register(
-    id='ALRHopperJumpOnBox-v1',
-    entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
         "context": True
     }
 )
-# CtxtFree are v0, Contextual are v1
 
 register(
     id='ALRHopperThrow-v0',
     entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
     max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
-        "context": False
-    }
-)
-# CtxtFree are v0, Contextual are v1
-register(
-    id='ALRHopperThrow-v1',
-    entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
         "context": True
     }
 )
-# CtxtFree are v0, Contextual are v1
 
 register(
     id='ALRHopperThrowInBasket-v0',
     entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
     max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
-        "context": False
-    }
-)
-# CtxtFree are v0, Contextual are v1
-register(
-    id='ALRHopperThrowInBasket-v1',
-    entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
         "context": True
     }
 )
-# CtxtFree are v0, Contextual are v1
+
 register(
     id='ALRWalker2DJump-v0',
     entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
     max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
-    kwargs={
-        "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
-        "context": False
-    }
-)
-# CtxtFree are v0, Contextual are v1
-register(
-    id='ALRWalker2DJump-v1',
-    entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
-    max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
         "context": True
@@ -427,76 +291,46 @@ register(id='TableTennis2DCtxt-v0',
          max_episode_steps=MAX_EPISODE_STEPS,
          kwargs={'ctxt_dim': 2})
 
-register(id='TableTennis2DCtxt-v1',
-         entry_point='alr_envs.alr.mujoco:TTEnvGym',
-         max_episode_steps=MAX_EPISODE_STEPS,
-         kwargs={'ctxt_dim': 2, 'fixed_goal': True})
-
 register(id='TableTennis4DCtxt-v0',
          entry_point='alr_envs.alr.mujocco:TTEnvGym',
          max_episode_steps=MAX_EPISODE_STEPS,
          kwargs={'ctxt_dim': 4})
 
-## BeerPong
-# fixed goal cup position
 register(
-        id='ALRBeerPong-v0',
-        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
-        max_episode_steps=300,
-        kwargs={
-            "rndm_goal": False,
-            "cup_goal_pos": [0.1, -2.0],
-            "frame_skip": 2
-        }
-    )
+    id='ALRBeerPong-v0',
+    entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+    max_episode_steps=300,
+    kwargs={
+        "rndm_goal": True,
+        "cup_goal_pos": [-0.3, -1.2],
+        "frame_skip": 2
+    }
+)
 
-
-# random goal cup position
+# Here we use the same reward as in ALRBeerPong-v0, but now consider after the release,
+# only one time step, i.e. we simulate until the end of th episode
 register(
-        id='ALRBeerPong-v1',
-        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
-        max_episode_steps=300,
-        kwargs={
-            "rndm_goal": True,
-            "cup_goal_pos": [-0.3, -1.2],
-            "frame_skip": 2
-        }
-    )
-
-# random goal cup position
-register(
-        id='ALRBeerPong-v2',
-        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
-        max_episode_steps=300,
-        kwargs={
-            "rndm_goal": True,
-            "cup_goal_pos": [-0.3, -1.2],
-            "frame_skip": 2
-        }
-    )
-# Beerpong with episodic reward, but fixed release time step
-register(
-        id='ALRBeerPong-v3',
-        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward',
-        max_episode_steps=300,
-        kwargs={
-            "rndm_goal": True,
-            "cup_goal_pos": [-0.3, -1.2],
-            "frame_skip": 2
-        }
-    )
+    id='ALRBeerPongStepBased-v0',
+    entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
+    max_episode_steps=300,
+    kwargs={
+        "rndm_goal": True,
+        "cup_goal_pos": [-0.3, -1.2],
+        "frame_skip": 2
+    }
+)
 
 # Beerpong with episodic reward, but fixed release time step
 register(
-        id='ALRBeerPong-v4',
-        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep',
-        max_episode_steps=300,
-        kwargs={
-            "rndm_goal": True,
-            "cup_goal_pos": [-0.3, -1.2],
-            "frame_skip": 2
-        }
-    )
+    id='ALRBeerPongFixedRelease-v0',
+    entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep',
+    max_episode_steps=300,
+    kwargs={
+        "rndm_goal": True,
+        "cup_goal_pos": [-0.3, -1.2],
+        "frame_skip": 2
+    }
+)
 
 # Motion Primitive Environments
 
@@ -530,25 +364,17 @@ for _v in _versions:
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
 
     _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_simple_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_simple_reacher_promp['wrappers'].append('TODO')  # TODO
+    kwargs_dict_simple_reacher_promp['movement_primitives_kwargs']['action_dim'] = 2 if "long" not in _v.lower() else 5
+    kwargs_dict_simple_reacher_promp['phase_generator_kwargs']['tau'] = 2
+    kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6
+    kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075
+    kwargs_dict_simple_reacher_promp['name'] = _env_id
     register(
         id=_env_id,
         entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:{_v}",
-            "wrappers": [classic_control.simple_reacher.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 2 if "long" not in _v.lower() else 5,
-                "num_basis": 5,
-                "duration": 2,
-                "policy_type": "motor",
-                "weights_scale": 1,
-                "zero_start": True,
-                "policy_kwargs": {
-                    "p_gains": .6,
-                    "d_gains": .075
-                }
-            }
-        }
+        kwargs=kwargs_dict_simple_reacher_promp
     )
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 
@@ -573,28 +399,24 @@ register(
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
 
+kwargs_dict_via_point_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO')  # TODO
+kwargs_dict_via_point_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
+kwargs_dict_via_point_reacher_promp['phase_generator_kwargs']['tau'] = 2
+kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
+kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
 register(
     id="ViaPointReacherProMP-v0",
     entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-    kwargs={
-        "name": f"alr_envs:ViaPointReacher-v0",
-        "wrappers": [classic_control.viapoint_reacher.MPWrapper],
-        "mp_kwargs": {
-            "num_dof": 5,
-            "num_basis": 5,
-            "duration": 2,
-            "policy_type": "velocity",
-            "weights_scale": 1,
-            "zero_start": True
-        }
-    }
+    kwargs=kwargs_dict_via_point_reacher_promp
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
 
 ## Hole Reacher
-_versions = ["v0", "v1", "v2"]
+_versions = ["HoleReacher-v0"]
 for _v in _versions:
-    _env_id = f'HoleReacherDMP-{_v}'
+    _name = _v.split("-")
+    _env_id = f'{_name[0]}DMP-{_name[1]}'
     register(
         id=_env_id,
         entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
@@ -617,22 +439,19 @@ for _v in _versions:
     )
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
 
-    _env_id = f'HoleReacherProMP-{_v}'
+    _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_hole_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_hole_reacher_promp['wrappers'].append('TODO')  # TODO
+    kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2
+    kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5
+    kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2
+    kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
+    kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5
+    kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}"
     register(
         id=_env_id,
         entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:HoleReacher-{_v}",
-            "wrappers": [classic_control.hole_reacher.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 5,
-                "num_basis": 3,
-                "duration": 2,
-                "policy_type": "velocity",
-                "weights_scale": 5,
-                "zero_start": True
-            }
-        }
+        kwargs=kwargs_dict_hole_reacher_promp
     )
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 
@@ -666,30 +485,268 @@ for _v in _versions:
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
 
     _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_alr_reacher_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_alr_reacher_promp['wrappers'].append('TODO')  # TODO
+    kwargs_dict_alr_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5 if "long" not in _v.lower() else 7
+    kwargs_dict_alr_reacher_promp['phase_generator_kwargs']['tau'] = 4
+    kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
+    kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
+    kwargs_dict_alr_reacher_promp['name'] = f"alr_envs:{_v}"
     register(
         id=_env_id,
         entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:{_v}",
-            "wrappers": [mujoco.reacher.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 5 if "long" not in _v.lower() else 7,
-                "num_basis": 2,
-                "duration": 4,
-                "policy_type": "motor",
-                "weights_scale": 5,
-                "zero_start": True,
-                "policy_kwargs": {
-                    "p_gains": 1,
-                    "d_gains": 0.1
-                }
-            }
-        }
+        kwargs=kwargs_dict_alr_reacher_promp
     )
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 
+########################################################################################################################
+## Beerpong ProMP
+_versions = ['ALRBeerPong-v0']
+for _v in _versions:
+    _name = _v.split("-")
+    _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
+    kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
+    kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.8
+    kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True
+    kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
+    kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+    kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
+    kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
+    register(
+        id=_env_id,
+        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+        kwargs=kwargs_dict_bp_promp
+    )
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 
+### BP with Fixed release
+_versions = ["ALRBeerPongStepBased-v0", "ALRBeerPongFixedRelease-v0"]
+for _v in _versions:
+    _name = _v.split("-")
+    _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper)
+    kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7
+    kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
+    kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
+    kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
+    kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
+    kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}"
+    register(
+        id=_env_id,
+        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+        kwargs=kwargs_dict_bp_promp_fixed_release
+    )
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+########################################################################################################################
+
+## Table Tennis needs to be fixed according to Zhou's implementation
+
+########################################################################################################################
+
+## AntJump
+_versions = ['ALRAntJump-v0']
+for _v in _versions:
+    _name = _v.split("-")
+    _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_ant_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
+    kwargs_dict_ant_jump_promp['movement_primitives_kwargs']['action_dim'] = 8
+    kwargs_dict_ant_jump_promp['phase_generator_kwargs']['tau'] = 10
+    kwargs_dict_ant_jump_promp['controller_kwargs']['p_gains'] = np.ones(8)
+    kwargs_dict_ant_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(8)
+    kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}"
+    register(
+        id=_env_id,
+        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+        kwargs=kwargs_dict_ant_jump_promp
+    )
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+########################################################################################################################
+
+## HalfCheetahJump
+_versions = ['ALRHalfCheetahJump-v0']
+for _v in _versions:
+    _name = _v.split("-")
+    _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_halfcheetah_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper)
+    kwargs_dict_halfcheetah_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
+    kwargs_dict_halfcheetah_jump_promp['phase_generator_kwargs']['tau'] = 5
+    kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['p_gains'] = np.ones(6)
+    kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6)
+    kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}"
+    register(
+        id=_env_id,
+        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+        kwargs=kwargs_dict_halfcheetah_jump_promp
+    )
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+########################################################################################################################
+
+
+## HopperJump
+_versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJumpRndmJointsDesPosStepBased-v0',
+             'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0']
+
+for _v in _versions:
+    _name = _v.split("-")
+    _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_hopper_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_hopper_jump_promp['wrappers'].append('TODO')  # TODO
+    kwargs_dict_hopper_jump_promp['movement_primitives_kwargs']['action_dim'] = 3
+    kwargs_dict_hopper_jump_promp['phase_generator_kwargs']['tau'] = 2
+    kwargs_dict_hopper_jump_promp['controller_kwargs']['p_gains'] = np.ones(3)
+    kwargs_dict_hopper_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(3)
+    kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}"
+    register(
+        id=_env_id,
+        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+        kwargs=kwargs_dict_hopper_jump_promp
+    )
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+########################################################################################################################
+
+
+## Walker2DJump
+_versions = ['ALRWalker2DJump-v0']
+for _v in _versions:
+    _name = _v.split("-")
+    _env_id = f'{_name[0]}ProMP-{_name[1]}'
+    kwargs_dict_walker2d_jump_promp = dict(DEFAULT_MP_ENV_DICT)
+    kwargs_dict_walker2d_jump_promp['wrappers'].append('TODO')  # TODO
+    kwargs_dict_walker2d_jump_promp['movement_primitives_kwargs']['action_dim'] = 6
+    kwargs_dict_walker2d_jump_promp['phase_generator_kwargs']['tau'] = 2.4
+    kwargs_dict_walker2d_jump_promp['controller_kwargs']['p_gains'] = np.ones(6)
+    kwargs_dict_walker2d_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6)
+    kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}"
+    register(
+        id=_env_id,
+        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+        kwargs=kwargs_dict_walker2d_jump_promp
+    )
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+### Depricated, we will not provide non random starts anymore
+"""
+register(
+    id='SimpleReacher-v1',
+    entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 2,
+        "random_start": False
+    }
+)
+
+register(
+    id='LongSimpleReacher-v1',
+    entry_point='alr_envs.alr.classic_control:SimpleReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 5,
+        "random_start": False
+    }
+)
+register(
+    id='HoleReacher-v1',
+    entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 5,
+        "random_start": False,
+        "allow_self_collision": False,
+        "allow_wall_collision": False,
+        "hole_width": 0.25,
+        "hole_depth": 1,
+        "hole_x": None,
+        "collision_penalty": 100,
+    }
+)
+register(
+    id='HoleReacher-v2',
+    entry_point='alr_envs.alr.classic_control:HoleReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "n_links": 5,
+        "random_start": False,
+        "allow_self_collision": False,
+        "allow_wall_collision": False,
+        "hole_width": 0.25,
+        "hole_depth": 1,
+        "hole_x": 2,
+        "collision_penalty": 1,
+    }
+)
+
+# CtxtFree are v0, Contextual are v1
+register(
+    id='ALRAntJump-v0',
+    entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv',
+    max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
+    kwargs={
+        "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
+        "context": False
+    }
+)
+# CtxtFree are v0, Contextual are v1
+register(
+    id='ALRHalfCheetahJump-v0',
+    entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv',
+    max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+    kwargs={
+        "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
+        "context": False
+    }
+)
+register(
+    id='ALRHopperJump-v0',
+    entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
+    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+    kwargs={
+        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+        "context": False,
+        "healthy_reward": 1.0
+    }
+)
+
+"""
+
+### Deprecated used for CorL paper
+"""
 _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
+for i in _vs:
+    _env_id = f'ALRReacher{i}-v0'
+    register(
+        id=_env_id,
+        entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+        max_episode_steps=200,
+        kwargs={
+            "steps_before_reward": 0,
+            "n_links": 5,
+            "balance": False,
+            'ctrl_cost_weight': i
+        }
+    )
+
+    _env_id = f'ALRReacherSparse{i}-v0'
+    register(
+        id=_env_id,
+        entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
+        max_episode_steps=200,
+        kwargs={
+            "steps_before_reward": 200,
+            "n_links": 5,
+            "balance": False,
+            'ctrl_cost_weight': i
+        }
+    )
+    _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
 for i in _vs:
     _env_id = f'ALRReacher{i}ProMP-v0'
     register(
@@ -736,543 +793,56 @@ for i in _vs:
             }
         }
     )
-
-
-# ## Beerpong
-# _versions = ["v0", "v1"]
-# for _v in _versions:
-#     _env_id = f'BeerpongProMP-{_v}'
-#     register(
-#         id=_env_id,
-#         entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-#         kwargs={
-#             "name": f"alr_envs:ALRBeerPong-{_v}",
-#             "wrappers": [mujoco.beerpong.MPWrapper],
-#             "mp_kwargs": {
-#                 "num_dof": 7,
-#                 "num_basis": 2,
-#                 # "duration": 1,
-#                 "duration": 0.5,
-#                 # "post_traj_time": 2,
-#                 "post_traj_time": 2.5,
-#                 "policy_type": "motor",
-#                 "weights_scale": 0.14,
-#                 # "weights_scale": 1,
-#                 "zero_start": True,
-#                 "zero_goal": False,
-#                 "policy_kwargs": {
-#                     "p_gains": np.array([       1.5,   5,   2.55,    3,   2.,    2,   1.25]),
-#                     "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
-#                 }
-#             }
-#         }
-#     )
-#     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Beerpong
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'BeerpongProMP-{_v}'
+    
     register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+        id='ALRHopperJumpOnBox-v0',
+        entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
+        max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
         kwargs={
-            "name": f"alr_envs:ALRBeerPong-{_v}",
-            "wrappers": [mujoco.beerpong.NewMPWrapper],
-            "ep_wrapper_kwargs": {
-                "weight_scale": 1
-                },
-            "movement_primitives_kwargs": {
-                'movement_primitives_type': 'promp',
-                'action_dim': 7
-                },
-            "phase_generator_kwargs": {
-                'phase_generator_type': 'linear',
-                'delay': 0,
-                'tau': 0.8,     # initial value
-                'learn_tau': True,
-                'learn_delay': False
-                },
-            "controller_kwargs": {
-                'controller_type': 'motor',
-                "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
-                "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
-                },
-            "basis_generator_kwargs": {
-                'basis_generator_type': 'zero_rbf',
-                'num_basis': 2,
-                'num_basis_zero_start': 2
-                }
-            }
+            "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
+            "context": False
+        }
     )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Beerpong ProMP fixed release
-_env_id = 'BeerpongProMP-v2'
-register(
-    id=_env_id,
-    entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+    register(
+    id='ALRHopperThrow-v0',
+    entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv',
+    max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
     kwargs={
-        "name": "alr_envs:ALRBeerPong-v4",
-        "wrappers": [mujoco.beerpong.NewMPWrapper],
-        "ep_wrapper_kwargs": {
-            "weight_scale": 1
-            },
-        "movement_primitives_kwargs": {
-            'movement_primitives_type': 'promp',
-            'action_dim': 7
-            },
-        "phase_generator_kwargs": {
-            'phase_generator_type': 'linear',
-            'delay': 0,
-            'tau': 0.62,     # initial value
-            'learn_tau': False,
-            'learn_delay': False
-            },
-        "controller_kwargs": {
-            'controller_type': 'motor',
-            "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
-            "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
-            },
-        "basis_generator_kwargs": {
-            'basis_generator_type': 'zero_rbf',
-            'num_basis': 2,
-            'num_basis_zero_start': 2
-            }
-        }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Table Tennis
-ctxt_dim = [2, 4]
-for _v, cd in enumerate(ctxt_dim):
-    _env_id = f'TableTennisProMP-v{_v}'
+        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
+        "context": False
+    }
+    )   
     register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd),
-            "wrappers": [mujoco.table_tennis.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 7,
-                "num_basis": 2,
-                "duration": 1.25,
-                "post_traj_time": 1.5,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]),
-                    "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1])
-                }
-            }
-        }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## AntJump
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRAntJumpProMP-{_v}'
-    register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:ALRAntJump-{_v}",
-            "wrappers": [mujoco.ant_jump.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 8,
-                "num_basis": 5,
-                "duration": 10,
-                "post_traj_time": 0,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": np.ones(8),
-                    "d_gains": 0.1*np.ones(8)
-                }
-            }
-        }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## AntJump
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRAntJumpProMP-{_v}'
-    register(
-        id= _env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
-        kwargs={
-            "name": f"alr_envs:ALRAntJump-{_v}",
-            "wrappers": [mujoco.ant_jump.NewMPWrapper],
-            "ep_wrapper_kwargs": {
-                "weight_scale": 1
-            },
-            "movement_primitives_kwargs": {
-                'movement_primitives_type': 'promp',
-                'action_dim': 8
-            },
-            "phase_generator_kwargs": {
-                'phase_generator_type': 'linear',
-                'delay': 0,
-                'tau': 10,  # initial value
-                'learn_tau': False,
-                'learn_delay': False
-            },
-            "controller_kwargs": {
-                'controller_type': 'motor',
-                "p_gains": np.ones(8),
-                "d_gains": 0.1*np.ones(8),
-            },
-            "basis_generator_kwargs": {
-                'basis_generator_type': 'zero_rbf',
-                'num_basis': 5,
-                'num_basis_zero_start': 2
-            }
-        }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-
-
-## HalfCheetahJump
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRHalfCheetahJumpProMP-{_v}'
-    register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:ALRHalfCheetahJump-{_v}",
-            "wrappers": [mujoco.half_cheetah_jump.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 6,
-                "num_basis": 5,
-                "duration": 5,
-                "post_traj_time": 0,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": np.ones(6),
-                    "d_gains": 0.1*np.ones(6)
-                }
-            }
-        }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-# ## HopperJump
-# _versions = ["v0", "v1"]
-# for _v in _versions:
-#     _env_id = f'ALRHopperJumpProMP-{_v}'
-#     register(
-#         id= _env_id,
-#         entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-#         kwargs={
-#             "name": f"alr_envs:ALRHopperJump-{_v}",
-#             "wrappers": [mujoco.hopper_jump.MPWrapper],
-#             "mp_kwargs": {
-#                 "num_dof": 3,
-#                 "num_basis": 5,
-#                 "duration": 2,
-#                 "post_traj_time": 0,
-#                 "policy_type": "motor",
-#                 "weights_scale": 1.0,
-#                 "zero_start": True,
-#                 "zero_goal": False,
-#                 "policy_kwargs": {
-#                     "p_gains": np.ones(3),
-#                     "d_gains": 0.1*np.ones(3)
-#                 }
-#             }
-#         }
-#     )
-#     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-# ## HopperJump
-# register(
-#     id= "ALRHopperJumpProMP-v2",
-#     entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-#     kwargs={
-#         "name": f"alr_envs:ALRHopperJump-v2",
-#         "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
-#         "mp_kwargs": {
-#             "num_dof": 3,
-#             "num_basis": 5,
-#             "duration": 2,
-#             "post_traj_time": 0,
-#             "policy_type": "motor",
-#             "weights_scale": 1.0,
-#             "zero_start": True,
-#             "zero_goal": False,
-#             "policy_kwargs": {
-#                 "p_gains": np.ones(3),
-#                 "d_gains": 0.1*np.ones(3)
-#             }
-#         }
-#     }
-# )
-# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
-
-## HopperJump
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRHopperJumpProMP-{_v}'
-    register(
-        id= _env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
-        kwargs={
-            "name": f"alr_envs:ALRHopperJump-{_v}",
-            "wrappers": [mujoco.hopper_jump.NewMPWrapper],
-            "ep_wrapper_kwargs": {
-                "weight_scale": 1
-            },
-            "movement_primitives_kwargs": {
-                'movement_primitives_type': 'promp',
-                'action_dim': 3
-            },
-            "phase_generator_kwargs": {
-                'phase_generator_type': 'linear',
-                'delay': 0,
-                'tau': 2,  # initial value
-                'learn_tau': False,
-                'learn_delay': False
-            },
-            "controller_kwargs": {
-                'controller_type': 'motor',
-                "p_gains": np.ones(3),
-                "d_gains": 0.1*np.ones(3),
-            },
-            "basis_generator_kwargs": {
-                'basis_generator_type': 'zero_rbf',
-                'num_basis': 5,
-                'num_basis_zero_start': 1
-            }
-        }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## HopperJump
-register(
-    id= "ALRHopperJumpProMP-v2",
-    entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+    id='ALRHopperThrowInBasket-v0',
+    entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv',
+    max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
     kwargs={
-        "name": f"alr_envs:ALRHopperJump-v2",
-        "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
-        "ep_wrapper_kwargs": {
-                "weight_scale": 1
-            },
-            "movement_primitives_kwargs": {
-                'movement_primitives_type': 'promp',
-                'action_dim': 3
-            },
-            "phase_generator_kwargs": {
-                'phase_generator_type': 'linear',
-                'delay': 0,
-                'tau': 2,  # initial value
-                'learn_tau': False,
-                'learn_delay': False
-            },
-            "controller_kwargs": {
-                'controller_type': 'motor',
-                "p_gains": np.ones(3),
-                "d_gains": 0.1*np.ones(3),
-            },
-            "basis_generator_kwargs": {
-                'basis_generator_type': 'zero_rbf',
-                'num_basis': 5,
-                'num_basis_zero_start': 1
-            }
-        }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
-
-
-## HopperJump
-register(
-    id= "ALRHopperJumpProMP-v3",
-    entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
+        "context": False
+    }
+    )
+    register(
+    id='ALRWalker2DJump-v0',
+    entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv',
+    max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
     kwargs={
-        "name": f"alr_envs:ALRHopperJump-v3",
-        "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
-        "ep_wrapper_kwargs": {
-                "weight_scale": 1
-            },
-            "movement_primitives_kwargs": {
-                'movement_primitives_type': 'promp',
-                'action_dim': 3
-            },
-            "phase_generator_kwargs": {
-                'phase_generator_type': 'linear',
-                'delay': 0,
-                'tau': 2,  # initial value
-                'learn_tau': False,
-                'learn_delay': False
-            },
-            "controller_kwargs": {
-                'controller_type': 'motor',
-                "p_gains": np.ones(3),
-                "d_gains": 0.1*np.ones(3),
-            },
-            "basis_generator_kwargs": {
-                'basis_generator_type': 'zero_rbf',
-                'num_basis': 5,
-                'num_basis_zero_start': 1
-            }
-        }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
-
-
-## HopperJump
-register(
-    id= "ALRHopperJumpProMP-v4",
-    entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
-    kwargs={
-        "name": f"alr_envs:ALRHopperJump-v4",
-        "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
-        "ep_wrapper_kwargs": {
-                "weight_scale": 1
-            },
-            "movement_primitives_kwargs": {
-                'movement_primitives_type': 'promp',
-                'action_dim': 3
-            },
-            "phase_generator_kwargs": {
-                'phase_generator_type': 'linear',
-                'delay': 0,
-                'tau': 2,  # initial value
-                'learn_tau': False,
-                'learn_delay': False
-            },
-            "controller_kwargs": {
-                'controller_type': 'motor',
-                "p_gains": np.ones(3),
-                "d_gains": 0.1*np.ones(3),
-            },
-            "basis_generator_kwargs": {
-                'basis_generator_type': 'zero_rbf',
-                'num_basis': 5,
-                'num_basis_zero_start': 1
-            }
-        }
-)
-ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4")
-
-## HopperJumpOnBox
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRHopperJumpOnBoxProMP-{_v}'
-    register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:ALRHopperJumpOnBox-{_v}",
-            "wrappers": [mujoco.hopper_jump.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 3,
-                "num_basis": 5,
-                "duration": 2,
-                "post_traj_time": 0,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": np.ones(3),
-                    "d_gains": 0.1*np.ones(3)
-                }
-            }
-        }
+        "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
+        "context": False
+    }
     )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+    register(id='TableTennis2DCtxt-v1',
+         entry_point='alr_envs.alr.mujoco:TTEnvGym',
+         max_episode_steps=MAX_EPISODE_STEPS,
+         kwargs={'ctxt_dim': 2, 'fixed_goal': True})
 
-#HopperThrow
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRHopperThrowProMP-{_v}'
     register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
+        id='ALRBeerPong-v0',
+        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv',
+        max_episode_steps=300,
         kwargs={
-            "name": f"alr_envs:ALRHopperThrow-{_v}",
-            "wrappers": [mujoco.hopper_throw.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 3,
-                "num_basis": 5,
-                "duration": 2,
-                "post_traj_time": 0,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": np.ones(3),
-                    "d_gains": 0.1*np.ones(3)
-                }
-            }
+            "rndm_goal": False,
+            "cup_goal_pos": [0.1, -2.0],
+            "frame_skip": 2
         }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## HopperThrowInBasket
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRHopperThrowInBasketProMP-{_v}'
-    register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:ALRHopperThrowInBasket-{_v}",
-            "wrappers": [mujoco.hopper_throw.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 3,
-                "num_basis": 5,
-                "duration": 2,
-                "post_traj_time": 0,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": np.ones(3),
-                    "d_gains": 0.1*np.ones(3)
-                }
-            }
-        }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
-
-## Walker2DJump
-_versions = ["v0", "v1"]
-for _v in _versions:
-    _env_id = f'ALRWalker2DJumpProMP-{_v}'
-    register(
-        id=_env_id,
-        entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
-        kwargs={
-            "name": f"alr_envs:ALRWalker2DJump-{_v}",
-            "wrappers": [mujoco.walker_2d_jump.MPWrapper],
-            "mp_kwargs": {
-                "num_dof": 6,
-                "num_basis": 5,
-                "duration": 2.4,
-                "post_traj_time": 0,
-                "policy_type": "motor",
-                "weights_scale": 1.0,
-                "zero_start": True,
-                "zero_goal": False,
-                "policy_kwargs": {
-                    "p_gains": np.ones(6),
-                    "d_gains": 0.1*np.ones(6)
-                }
-            }
-        }
-    )
-    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
\ No newline at end of file
+        )
+"""
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index b7d376e..64d9e78 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -178,8 +178,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
             [self._steps],
             ])
 
-    def compute_reward(self):
-
     @property
     def dt(self):
         return super(ALRBeerBongEnv, self).dt * self.repeat_action
@@ -213,37 +211,37 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
         return ob, reward, done, infos
 
 
-class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
-    def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
-        super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
-        self.release_step = 62  # empirically evaluated for frame_skip=2!
-
-    def step(self, a):
-        if self._steps < self.release_step:
-            return super(ALRBeerBongEnvStepBased, self).step(a)
-        else:
-            reward = 0
-            done = False
-            while not done:
-                sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape))
-                if not done or sub_infos['sim_crash']:
-                    reward += sub_reward
-                else:
-                    ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
-                    cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
-                        self.sim.model._site_name2id["cup_goal_final_table"]].copy())
-                    cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
-                        self.sim.model._site_name2id["cup_goal_table"]].copy())
-                    if sub_infos['success']:
-                        dist_rew = -cup_goal_dist_final ** 2
-                    else:
-                        dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
-                    reward = reward - sub_infos['action_cost'] + dist_rew
-            infos = sub_infos
-            ob = sub_ob
-            ob[-1] = self.release_step + 1  # Since we simulate until the end of the episode, PPO does not see the
-            # internal steps and thus, the observation also needs to be set correctly
-        return ob, reward, done, infos
+# class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
+#     def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
+#         super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
+#         self.release_step = 62  # empirically evaluated for frame_skip=2!
+#
+#     def step(self, a):
+#         if self._steps < self.release_step:
+#             return super(ALRBeerBongEnvStepBased, self).step(a)
+#         else:
+#             reward = 0
+#             done = False
+#             while not done:
+#                 sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape))
+#                 if not done or sub_infos['sim_crash']:
+#                     reward += sub_reward
+#                 else:
+#                     ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
+#                     cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
+#                         self.sim.model._site_name2id["cup_goal_final_table"]].copy())
+#                     cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
+#                         self.sim.model._site_name2id["cup_goal_table"]].copy())
+#                     if sub_infos['success']:
+#                         dist_rew = -cup_goal_dist_final ** 2
+#                     else:
+#                         dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
+#                     reward = reward - sub_infos['action_cost'] + dist_rew
+#             infos = sub_infos
+#             ob = sub_ob
+#             ob[-1] = self.release_step + 1  # Since we simulate until the end of the episode, PPO does not see the
+#             # internal steps and thus, the observation also needs to be set correctly
+#         return ob, reward, done, infos
 
 
 if __name__ == "__main__":
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 146b039..5cd234c 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -298,76 +298,6 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
         return observation, reward, done, info
 
 
-class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
-    def __init__(self, max_episode_steps=250):
-        super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
-                                                      reset_noise_scale=5e-1,
-                                                      max_episode_steps=max_episode_steps)
-
-    def reset_model(self):
-        self._floor_geom_id = self.model.geom_name2id('floor')
-        self._foot_geom_id = self.model.geom_name2id('foot_geom')
-        noise_low = -np.ones(self.model.nq) * self._reset_noise_scale
-        noise_low[1] = 0
-        noise_low[2] = 0
-        noise_low[3] = -0.2
-        noise_low[4] = -0.2
-        noise_low[5] = -0.1
-
-        noise_high = np.ones(self.model.nq) * self._reset_noise_scale
-        noise_high[1] = 0
-        noise_high[2] = 0
-        noise_high[3] = 0
-        noise_high[4] = 0
-        noise_high[5] = 0.1
-
-        rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
-        # rnd_vec[2] *= 0.05  # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
-        # can not recover
-        # rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
-        qpos = self.init_qpos + rnd_vec
-        qvel = self.init_qvel
-
-        self.set_state(qpos, qvel)
-
-        observation = self._get_obs()
-        return observation
-
-    def step(self, action):
-
-        self.current_step += 1
-        self.do_simulation(action, self.frame_skip)
-
-        self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \
-            self.contact_with_floor else True
-
-        height_after = self.get_body_com("torso")[2]
-        self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0
-
-        ctrl_cost = self.control_cost(action)
-        costs = ctrl_cost
-        done = False
-
-        if self.current_step >= self.max_episode_steps:
-            healthy_reward = 0
-            height_reward = self._forward_reward_weight * self.max_height  # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
-            rewards = height_reward + healthy_reward
-
-        else:
-            # penalty for wrong start direction of first two joints; not needed, could be removed
-            rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
-
-        observation = self._get_obs()
-        reward = rewards - costs
-        info = {
-            'height': height_after,
-            'max_height': self.max_height,
-            'goal': self.goal
-            }
-
-        return observation, reward, done, info
-
-
 if __name__ == '__main__':
     render_mode = "human"  # "human" or "partial" or "final"
     # env = ALRHopperJumpEnv()