diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 1101020..9dc31ae 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -252,7 +252,8 @@ register(
     max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
     kwargs={
         "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
-        "context": False
+        "context": False,
+        "healthy_rewasrd": 1.0
     }
 )
 register(
@@ -273,6 +274,17 @@ register(
         "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
     }
 )
+
+register(
+    id='ALRHopperJump-v3',
+    entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
+    max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
+    kwargs={
+        "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
+        "context": True,
+        "healthy_reward": 1.0
+    }
+)
 # CtxtFree are v0, Contextual are v1
 register(
     id='ALRHopperJumpOnBox-v0',
@@ -723,6 +735,46 @@ for _v in _versions:
     )
     ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 
+## AntJump
+_versions = ["v0", "v1"]
+for _v in _versions:
+    _env_id = f'ALRAntJumpProMP-{_v}'
+    register(
+        id= _env_id,
+        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+        kwargs={
+            "name": f"alr_envs:ALRAntJump-{_v}",
+            "wrappers": [mujoco.ant_jump.NewMPWrapper],
+            "ep_wrapper_kwargs": {
+                "weight_scale": 1
+            },
+            "movement_primitives_kwargs": {
+                'movement_primitives_type': 'promp',
+                'action_dim': 8
+            },
+            "phase_generator_kwargs": {
+                'phase_generator_type': 'linear',
+                'delay': 0,
+                'tau': 10,  # initial value
+                'learn_tau': False,
+                'learn_delay': False
+            },
+            "controller_kwargs": {
+                'controller_type': 'motor',
+                "p_gains": np.ones(8),
+                "d_gains": 0.1*np.ones(8),
+            },
+            "basis_generator_kwargs": {
+                'basis_generator_type': 'zero_rbf',
+                'num_basis': 5,
+                'num_basis_zero_start': 2
+            }
+        }
+    )
+    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
+
+
+
 ## HalfCheetahJump
 _versions = ["v0", "v1"]
 for _v in _versions:
@@ -877,6 +929,42 @@ register(
 )
 ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
 
+
+## HopperJump
+register(
+    id= "ALRHopperJumpProMP-v3",
+    entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
+    kwargs={
+        "name": f"alr_envs:ALRHopperJump-v3",
+        "wrappers": [mujoco.hopper_jump.NewMPWrapper],
+        "ep_wrapper_kwargs": {
+                "weight_scale": 1
+            },
+            "movement_primitives_kwargs": {
+                'movement_primitives_type': 'promp',
+                'action_dim': 3
+            },
+            "phase_generator_kwargs": {
+                'phase_generator_type': 'linear',
+                'delay': 0,
+                'tau': 2,  # initial value
+                'learn_tau': False,
+                'learn_delay': False
+            },
+            "controller_kwargs": {
+                'controller_type': 'motor',
+                "p_gains": np.ones(3),
+                "d_gains": 0.1*np.ones(3),
+            },
+            "basis_generator_kwargs": {
+                'basis_generator_type': 'zero_rbf',
+                'num_basis': 5,
+                'num_basis_zero_start': 2
+            }
+        }
+)
+ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3")
+
 ## HopperJumpOnBox
 _versions = ["v0", "v1"]
 for _v in _versions:
diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py
index fb86186..5c04b03 100644
--- a/alr_envs/alr/mujoco/__init__.py
+++ b/alr_envs/alr/mujoco/__init__.py
@@ -5,7 +5,7 @@ from .table_tennis.tt_gym import TTEnvGym
 from .beerpong.beerpong import ALRBeerBongEnv
 from .ant_jump.ant_jump import ALRAntJumpEnv
 from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
-from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
+from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv
 from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
 from .hopper_throw.hopper_throw import ALRHopperThrowEnv
 from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
diff --git a/alr_envs/alr/mujoco/ant_jump/__init__.py b/alr_envs/alr/mujoco/ant_jump/__init__.py
index c5e6d2f..5d15867 100644
--- a/alr_envs/alr/mujoco/ant_jump/__init__.py
+++ b/alr_envs/alr/mujoco/ant_jump/__init__.py
@@ -1 +1,2 @@
 from .mp_wrapper import MPWrapper
+from .new_mp_wrapper import NewMPWrapper
diff --git a/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
new file mode 100644
index 0000000..c33048f
--- /dev/null
+++ b/alr_envs/alr/mujoco/ant_jump/new_mp_wrapper.py
@@ -0,0 +1,21 @@
+from alr_envs.mp.episodic_wrapper import EpisodicWrapper
+from typing import Union, Tuple
+import numpy as np
+
+
+
+class NewMPWrapper(EpisodicWrapper):
+
+    def set_active_obs(self):
+        return np.hstack([
+            [False] * 111, # ant has 111 dimensional observation space !!
+            [True] # goal height
+        ])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.env.sim.data.qpos[7:15].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.sim.data.qvel[6:14].copy()
diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
index 436b36c..78e2c32 100644
--- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
+++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml
@@ -144,7 +144,7 @@
                   solref="-10000 -100"/>
 
 <!--        <body name="cup_table" pos="0.32 -1.55 0.84" quat="0.7071068 0.7071068 0 0">-->
-        <body name="cup_table" pos="1.42 -1.25 0.84" quat="0.7071068 0.7071068 0 0">
+        <body name="cup_table" pos="-1.42 -4.05 0.84" quat="0.7071068 0.7071068 0 0">
             <inertial pos="-3.75236e-10 8.27811e-05 0.0947015" quat="0.999945 -0.0104888 0 0" mass="10.132" diaginertia="0.000285643 0.000270485 9.65696e-05" />
             <geom priority= "1" name="cup_geom_table3" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup3_table" mass="10"/>
             <geom priority= "1" name="cup_geom_table4" pos="0 0.1 0.001" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup4_table" mass="10"/>
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 36998b9..017bfcd 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -7,8 +7,12 @@ from gym.envs.mujoco import MujocoEnv
 from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
 
 
-CUP_POS_MIN = np.array([-0.32, -2.2])
-CUP_POS_MAX = np.array([0.32, -1.2])
+CUP_POS_MIN = np.array([-1.42, -4.05])
+CUP_POS_MAX = np.array([1.42, -1.25])
+
+
+# CUP_POS_MIN = np.array([-0.32, -2.2])
+# CUP_POS_MAX = np.array([0.32, -1.2])
 
 # smaller context space -> Easier task
 # CUP_POS_MIN = np.array([-0.16, -2.2])
@@ -24,8 +28,10 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
         self.cup_goal_pos = np.array(cup_goal_pos)
 
         self._steps = 0
+        # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
+        #                              "beerpong_wo_cup" + ".xml")
         self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
-                                     "beerpong_wo_cup" + ".xml")
+                                     "beerpong_wo_cup_big_table" + ".xml")
 
         self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
         self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
@@ -100,10 +106,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
         return self._get_obs()
 
     def step(self, a):
-        # if a.shape[0] == 8: # we learn also when to release the ball
-        #     self._release_step = a[-1]
-        # self._release_step = np.clip(self._release_step, 50, 250)
-        # self.release_step = 0.5/self.dt
         reward_dist = 0.0
         angular_vel = 0.0
         applied_action = a
@@ -170,16 +172,11 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
             [self._steps],
         ])
 
-    # TODO
-    @property
-    def active_obs(self):
-        return np.hstack([
-            [False] * 7,  # cos
-            [False] * 7,  # sin
-            [True] * 2,  # xy position of cup
-            [False]  # env steps
-        ])
 
+class ALRBeerPongStepEnv(ALRBeerBongEnv):
+    def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
+                 rndm_goal=False, cup_goal_pos=None):
+        super(ALRBeerPongStepEnv, self).__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
 
 if __name__ == "__main__":
     env = ALRBeerBongEnv(rndm_goal=True)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index bb5dd3f..cb75127 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -40,6 +40,7 @@ class BeerPongReward:
         self.cup_z_axes = None
         self.collision_penalty = 500
         self.reset(None)
+        self.is_initialized = False
 
     def reset(self, noisy):
         self.ball_traj = []
@@ -55,113 +56,60 @@ class BeerPongReward:
         self.ball_wall_contact = False
         self.ball_cup_contact = False
         self.ball_in_cup = False
+        self.dist_ground_cup = -1         # distance floor to cup if first floor contact
         self.noisy_bp = noisy
         self._t_min_final_dist = -1
 
     def compute_reward(self, env, action):
-        self.ball_id = env.sim.model._body_name2id["ball"]
-        self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
-        self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
-        self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
-        self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
-        self.cup_table_id = env.sim.model._body_name2id["cup_table"]
-        self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
-        self.wall_collision_id = env.sim.model._geom_name2id["wall"]
-        self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
-        self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
-        self.ground_collision_id = env.sim.model._geom_name2id["ground"]
-        self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
+
+        if not self.is_initialized:
+            self.is_initialized = True
+            self.ball_id = env.sim.model._body_name2id["ball"]
+            self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+            self.goal_id = env.sim.model._site_name2id["cup_goal_table"]
+            self.goal_final_id = env.sim.model._site_name2id["cup_goal_final_table"]
+            self.cup_collision_ids = [env.sim.model._geom_name2id[name] for name in self.cup_collision_objects]
+            self.cup_table_id = env.sim.model._body_name2id["cup_table"]
+            self.table_collision_id = env.sim.model._geom_name2id["table_contact_geom"]
+            self.wall_collision_id = env.sim.model._geom_name2id["wall"]
+            self.cup_table_collision_id = env.sim.model._geom_name2id["cup_base_table_contact"]
+            self.init_ball_pos_site_id = env.sim.model._site_name2id["init_ball_pos_site"]
+            self.ground_collision_id = env.sim.model._geom_name2id["ground"]
+            self.robot_collision_ids = [env.sim.model._geom_name2id[name] for name in self.robot_collision_objects]
 
         goal_pos = env.sim.data.site_xpos[self.goal_id]
         ball_pos = env.sim.data.body_xpos[self.ball_id]
         ball_vel = env.sim.data.body_xvelp[self.ball_id]
         goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
+
+        self._check_contacts(env.sim)
         self.dists.append(np.linalg.norm(goal_pos - ball_pos))
         self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
-
+        self.dist_ground_cup = np.linalg.norm(ball_pos-goal_pos) \
+            if self.ball_ground_contact_first and self.dist_ground_cup == -1 else self.dist_ground_cup
         action_cost = np.sum(np.square(action))
         self.action_costs.append(action_cost)
-        # ##################### Reward function which forces to bounce once on the table (tanh) ########################
-        # if not self.ball_table_contact:
-        #     self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
-        #                                                                        self.table_collision_id)
-        #
-        # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
-        # if env._steps == env.ep_length - 1 or self._is_collided:
-        #     min_dist = np.min(self.dists)
-        #     final_dist = self.dists_final[-1]
-        #
-        #     ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id,
-        #                                                        self.cup_table_collision_id)
-        #
-        #     # encourage bounce before falling into cup
-        #     if not ball_in_cup:
-        #         if not self.ball_table_contact:
-        #             reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
-        #         else:
-        #             reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
-        #     else:
-        #         if not self.ball_table_contact:
-        #             reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
-        #         else:
-        #             reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
-        #
-        #     # reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
-        #     success = ball_in_cup
-        #     crash = self._is_collided
-        # else:
-        #     reward = - 1e-2 * action_cost
-        #     success = False
-        #     crash = False
-        # ################################################################################################################
-
-        ##################### Reward function which does not force to bounce once on the table (tanh) ################
-        # self._check_contacts(env.sim)
-        # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
-        # if env._steps == env.ep_length - 1 or self._is_collided:
-        #     min_dist = np.min(self.dists)
-        #     final_dist = self.dists_final[-1]
-        #
-        #     # encourage bounce before falling into cup
-        #     if not self.ball_in_cup:
-        #         if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
-        #             min_dist_coeff, final_dist_coeff, rew_offset = 0.2, 0.1, 0
-        #             # reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
-        #         else:
-        #             min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, 0
-        #             # reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
-        #     else:
-        #         min_dist_coeff, final_dist_coeff, rew_offset = 1, 2, 3
-        #         # reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
-        #
-        #     reward = final_dist_coeff * (1 - np.tanh(0.5 * final_dist)) + min_dist_coeff * (1 - np.tanh(0.5 * min_dist)) \
-        #              + rew_offset
-        #     success = self.ball_in_cup
-        #     crash = self._is_collided
-        # else:
-        #     reward = - 1e-2 * action_cost
-        #     success = False
-        #     crash = False
-        ################################################################################################################
-
         # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
-        self._check_contacts(env.sim)
+
         self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+
         if env._steps == env.ep_length - 1 or self._is_collided:
             min_dist = np.min(self.dists)
             final_dist = self.dists_final[-1]
-            # if self.ball_ground_contact_first:
-            #     min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
-            # else:
-            if not self.ball_in_cup:
-                if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
-                    min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
-                else:
-                    min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
+            if self.ball_ground_contact_first:
+                min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 2, -4     # relative rew offset when first bounding on ground
+                # min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -6     # absolute rew offset when first bouncing on ground
             else:
-                min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
+                if not self.ball_in_cup:
+                    if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
+                        min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -4
+                    else:
+                        min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 1, 0.5, 0, -2
+                else:
+                    min_dist_coeff, final_dist_coeff, ground_contact_dist_coeff, rew_offset = 0, 1, 0 ,0
+            # dist_ground_cup = 1 * self.dist_ground_cup
             reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
-                     1e-4 * np.mean(action_cost)
+                     1e-4 * np.mean(action_cost) - ground_contact_dist_coeff*self.dist_ground_cup ** 2
             # 1e-7*np.mean(action_cost)
             # release step punishment
             min_time_bound = 0.1
@@ -172,43 +120,13 @@ class BeerPongReward:
             reward += release_time_rew
             success = self.ball_in_cup
             # print('release time :', release_time)
+            # print('dist_ground_cup :', dist_ground_cup)
         else:
             reward = - 1e-2 * action_cost
             # reward = - 1e-4 * action_cost
             # reward = 0
             success = False
         # ################################################################################################################
-
-        # # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
-        # self._check_contacts(env.sim)
-        # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
-        # if env._steps == env.ep_length - 1 or self._is_collided:
-        #     min_dist = np.min(self.dists)
-        #     final_dist = self.dists_final[-1]
-        #
-        #     if not self.ball_in_cup:
-        #         if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
-        #             min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
-        #         else:
-        #             if self.ball_ground_contact_first:
-        #                 min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
-        #             else:
-        #                 min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
-        #     else:
-        #         if self.ball_ground_contact_first:
-        #             min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, -1
-        #         else:
-        #             min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
-        #     reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
-        #              1e-7 * np.mean(action_cost)
-        #     # 1e-4*np.mean(action_cost)
-        #     success = self.ball_in_cup
-        # else:
-        #     # reward = - 1e-2 * action_cost
-        #     # reward = - 1e-4 * action_cost
-        #     reward = 0
-        #     success = False
-        # ################################################################################################################
         infos = {}
         infos["success"] = success
         infos["is_collided"] = self._is_collided
diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
index 7c4ee9d..9683687 100644
--- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py
@@ -44,6 +44,12 @@ class NewMPWrapper(EpisodicWrapper):
         else:
             return action, None
 
+    def set_context(self, context):
+        xyz = np.zeros(3)
+        xyz[:2] = context
+        xyz[-1] = 0.840
+        self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
+        return self.get_observation_from_step(self.env.env._get_obs())
     # def set_action_space(self):
     #     if self.mp.learn_tau:
     #         min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml
similarity index 92%
rename from alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
rename to alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml
index f18bc46..3348bab 100644
--- a/alr_envs/alr/mujoco/hopper_jump/assets/hopper.xml
+++ b/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml
@@ -25,12 +25,16 @@
           <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
           <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
           <body name="foot" pos="0.13/2 0 0.1">
+            <site name="foot_site" pos="0 0 0.04" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"/>
             <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
             <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
           </body>
         </body>
       </body>
     </body>
+  <body name="goal_site_body" pos = "0 0 0">
+    <site name="goal_site" pos="0 0 0.0" size="0.02 0.02 0.02" rgba="0 1 0 1" type="sphere"/>
+  </body>
   </worldbody>
   <actuator>
     <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
index 9a74bb0..979e224 100644
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@@ -1,5 +1,6 @@
 from gym.envs.mujoco.hopper_v3 import HopperEnv
 import numpy as np
+import os
 
 MAX_EPISODE_STEPS_HOPPERJUMP = 250
 
@@ -14,13 +15,13 @@ class ALRHopperJumpEnv(HopperEnv):
     """
 
     def __init__(self,
-                 xml_file='hopper.xml',
+                 xml_file='hopper_jump.xml',
                  forward_reward_weight=1.0,
                  ctrl_cost_weight=1e-3,
                  healthy_reward=0.0,
                  penalty=0.0,
                  context=True,
-                 terminate_when_unhealthy=True,
+                 terminate_when_unhealthy=False,
                  healthy_state_range=(-100.0, 100.0),
                  healthy_z_range=(0.5, float('inf')),
                  healthy_angle_range=(-float('inf'), float('inf')),
@@ -34,6 +35,13 @@ class ALRHopperJumpEnv(HopperEnv):
         self.goal = 0
         self.context = context
         self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
+        self._floor_geom_id = None
+        self._foot_geom_id = None
+        self.contact_with_floor = False
+        self.init_floor_contact = False
+        self.has_left_floor = False
+        self.contact_dist = None
+        xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
         super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
                          healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
                          exclude_current_positions_from_observation)
@@ -43,28 +51,36 @@ class ALRHopperJumpEnv(HopperEnv):
         self.current_step += 1
         self.do_simulation(action, self.frame_skip)
         height_after = self.get_body_com("torso")[2]
+        site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
         self.max_height = max(height_after, self.max_height)
 
         ctrl_cost = self.control_cost(action)
         costs = ctrl_cost
         done = False
-
+        rewards = 0
         if self.current_step >= self.max_episode_steps:
             hight_goal_distance = -10*np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height
-            healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+            healthy_reward = 0 if self.context else self.healthy_reward * 2 # self.current_step
             height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
             rewards = height_reward + healthy_reward
 
-        else:
-            # penalty for wrong start direction of first two joints; not needed, could be removed
-            rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
+        # else:
+        #     # penalty for wrong start direction of first two joints; not needed, could be removed
+        #     rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
 
         observation = self._get_obs()
         reward = rewards - costs
+        # info = {
+        #     'height'    : height_after,
+        #     'max_height': self.max_height,
+        #     'goal' : self.goal
+        # }
         info = {
-            'height'    : height_after,
+            'height': height_after,
+            'x_pos': site_pos_after,
             'max_height': self.max_height,
-            'goal' : self.goal
+            'height_rew': self.max_height,
+            'healthy_reward': self.healthy_reward * 2
         }
 
         return observation, reward, done, info
@@ -73,7 +89,7 @@ class ALRHopperJumpEnv(HopperEnv):
         return np.append(super()._get_obs(), self.goal)
 
     def reset(self):
-        self.goal = np.random.uniform(1.4, 2.3, 1) # 1.3 2.3
+        self.goal = np.random.uniform(1.4, 2.16, 1) # 1.3 2.3
         self.max_height = 0
         self.current_step = 0
         return super().reset()
@@ -87,14 +103,124 @@ class ALRHopperJumpEnv(HopperEnv):
         self.set_state(qpos, qvel)
 
         observation = self._get_obs()
+        self.has_left_floor = False
+        self.contact_with_floor = False
+        self.init_floor_contact = False
+        self.contact_dist = None
         return observation
 
+    def _contact_checker(self, id_1, id_2):
+        for coni in range(0, self.sim.data.ncon):
+            con = self.sim.data.contact[coni]
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+            if collision or collision_trans:
+                return True
+        return False
+
+
+class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
+    # The goal here is the desired x-Position of the Torso
+
+    def step(self, action):
+
+        self._floor_geom_id = self.model.geom_name2id('floor')
+        self._foot_geom_id = self.model.geom_name2id('foot_geom')
+
+        self.current_step += 1
+        self.do_simulation(action, self.frame_skip)
+        height_after = self.get_body_com("torso")[2]
+        site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
+        self.max_height = max(height_after, self.max_height)
+
+        # floor_contact = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not self.contact_with_floor else False
+        # self.init_floor_contact = floor_contact if not self.init_floor_contact else self.init_floor_contact
+        # self.has_left_floor = not floor_contact if self.init_floor_contact and not self.has_left_floor else self.has_left_floor
+        # self.contact_with_floor = floor_contact if not self.contact_with_floor and self.has_left_floor else self.contact_with_floor
+
+        floor_contact = self._contact_checker(self._floor_geom_id,
+                                              self._foot_geom_id) if not self.contact_with_floor else False
+        if not self.init_floor_contact:
+            self.init_floor_contact = floor_contact
+        if self.init_floor_contact and not self.has_left_floor:
+            self.has_left_floor = not floor_contact
+        if not self.contact_with_floor and self.has_left_floor:
+            self.contact_with_floor = floor_contact
+
+        if self.contact_dist is None and self.contact_with_floor:
+                self.contact_dist = np.linalg.norm(self.sim.data.site_xpos[self.model.site_name2id('foot_site')]
+                                               - np.array([self.goal, 0, 0], dtype=object))[0]
+
+        ctrl_cost = self.control_cost(action)
+        costs = ctrl_cost
+        done = False
+        goal_dist = 0
+        rewards = 0
+        if self.current_step >= self.max_episode_steps:
+            # healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
+            healthy_reward = self.healthy_reward * 2 #* self.current_step
+            goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object))[0]
+            contact_dist = self.contact_dist if self.contact_dist is not None else 5
+            dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist)
+            rewards = dist_reward + healthy_reward
+
+        # else:
+            ## penalty for wrong start direction of first two joints; not needed, could be removed
+            # rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
+
+        observation = self._get_obs()
+        reward = rewards - costs
+        info = {
+            'height': height_after,
+            'x_pos': site_pos_after,
+            'max_height': self.max_height,
+            'goal': self.goal,
+            'dist_rew': goal_dist,
+            'height_rew': self.max_height,
+            'healthy_reward': self.healthy_reward * 2
+        }
+
+        return observation, reward, done, info
+
+    def reset_model(self):
+        self.init_qpos[1] = 1.5
+        self._floor_geom_id = self.model.geom_name2id('floor')
+        self._foot_geom_id = self.model.geom_name2id('foot_geom')
+        noise_low = -np.zeros(self.model.nq)
+        noise_low[3] = -0.5
+        noise_low[4] = -0.2
+        noise_low[5] = 0
+
+        noise_high = np.zeros(self.model.nq)
+        noise_high[3] = 0
+        noise_high[4] = 0
+        noise_high[5] = 0.785
+
+        rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
+        qpos = self.init_qpos + rnd_vec
+        qvel = self.init_qvel
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        self.has_left_floor = False
+        self.contact_with_floor = False
+        self.init_floor_contact = False
+        self.contact_dist = None
+
+        return observation
+
+    def reset(self):
+        super().reset()
+        # self.goal = np.random.uniform(-1.5, 1.5, 1)
+        self.goal = np.random.uniform(0, 1.5, 1)
+        # self.goal = np.array([1.5])
+        self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object)
+        return self.reset_model()
+
 
 class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
     def __init__(self, max_episode_steps=250):
-        self.contact_with_floor = False
-        self._floor_geom_id = None
-        self._foot_geom_id = None
         super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
                                                       reset_noise_scale=5e-1,
                                                       max_episode_steps=max_episode_steps)
@@ -104,17 +230,17 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
         self._foot_geom_id = self.model.geom_name2id('foot_geom')
         noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
         noise_low[1] = 0
-        noise_low[2] = -0.3
-        noise_low[3] = -0.1
-        noise_low[4] = -1.1
-        noise_low[5] = -0.785
+        noise_low[2] = 0
+        noise_low[3] = -0.2
+        noise_low[4] = -0.2
+        noise_low[5] = -0.1
 
         noise_high = np.ones(self.model.nq)*self._reset_noise_scale
         noise_high[1] = 0
-        noise_high[2] = 0.3
+        noise_high[2] = 0
         noise_high[3] = 0
         noise_high[4] = 0
-        noise_high[5] = 0.785
+        noise_high[5] = 0.1
 
         rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
         # rnd_vec[2] *= 0.05  # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
@@ -162,24 +288,18 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
 
         return observation, reward, done, info
 
-    def _contact_checker(self, id_1, id_2):
-        for coni in range(0, self.sim.data.ncon):
-            con = self.sim.data.contact[coni]
-            collision = con.geom1 == id_1 and con.geom2 == id_2
-            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
-            if collision or collision_trans:
-                return True
-        return False
+
 
 if __name__ == '__main__':
     render_mode = "human"  # "human" or "partial" or "final"
     # env = ALRHopperJumpEnv()
-    env = ALRHopperJumpRndmPosEnv()
+    env = ALRHopperXYJumpEnv()
+    # env = ALRHopperJumpRndmPosEnv()
     obs = env.reset()
 
     for k in range(10):
         obs = env.reset()
-        print('observation :', obs[:6])
+        print('observation :', obs[:])
         for i in range(200):
             # objective.load_result("/tmp/cma")
             # test with random actions
diff --git a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
index 9932403..31be6be 100644
--- a/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_jump/new_mp_wrapper.py
@@ -12,9 +12,19 @@ class NewMPWrapper(EpisodicWrapper):
     def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
         return self.env.sim.data.qvel[3:6].copy()
 
+    # # random goal
+    # def set_active_obs(self):
+    #     return np.hstack([
+    #         [False] * (5 + int(not self.env.exclude_current_positions_from_observation)),  # position
+    #         [False] * 6,  # velocity
+    #         [True]
+    #     ])
+
+    # Random x goal + random init pos
     def set_active_obs(self):
         return np.hstack([
-            [False] * (5 + int(not self.env.exclude_current_positions_from_observation)),  # position
+            [False] * (2 + int(not self.env.exclude_current_positions_from_observation)),  # position
+            [True] * 3,    # set to true if randomize initial pos
             [False] * 6,  # velocity
             [True]
         ])
diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/episodic_wrapper.py
index 7426c15..21fb035 100644
--- a/alr_envs/mp/episodic_wrapper.py
+++ b/alr_envs/mp/episodic_wrapper.py
@@ -205,7 +205,7 @@ class EpisodicWrapper(gym.Env, ABC):
             infos['step_actions'] = actions[:t + 1]
             infos['step_observations'] = observations[:t + 1]
             infos['step_rewards'] = rewards[:t + 1]
-            infos['trajectory_length'] = t + 1
+        infos['trajectory_length'] = t + 1
         done = True
         return self.get_observation_from_step(obs), trajectory_return, done, infos