diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 7e7bca1..38abbf1 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -391,6 +391,7 @@ for _v in _versions:
                 "duration": 1,
                 "post_traj_time": 2,
                 "policy_type": "motor",
+                # "weights_scale": 0.15,
                 "weights_scale": 1,
                 "zero_start": True,
                 "zero_goal": False,
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index 886b924..d885e78 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -10,6 +10,10 @@ from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
 CUP_POS_MIN = np.array([-0.32, -2.2])
 CUP_POS_MAX = np.array([0.32, -1.2])
 
+# smaller context space -> Easier task
+# CUP_POS_MIN = np.array([-0.16, -2.2])
+# CUP_POS_MAX = np.array([0.16, -1.7])
+
 
 class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
     def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
@@ -36,7 +40,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
         self.ball_site_id = 0
         self.ball_id = 11
 
-        self._release_step = 175  # time step of ball release
+        # self._release_step = 175  # time step of ball release
+        self._release_step = 130  # time step of ball release
 
         self.sim_time = 3  # seconds
         self.ep_length = 600  # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index 6edc7ee..c9ed451 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -85,7 +85,7 @@ class BeerPongReward:
         # if not self.ball_table_contact:
         #     self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
         #                                                                        self.table_collision_id)
-
+        #
         # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
         # if env._steps == env.ep_length - 1 or self._is_collided:
         #     min_dist = np.min(self.dists)
@@ -115,7 +115,7 @@ class BeerPongReward:
         #     crash = False
         # ################################################################################################################
 
-        # ##################### Reward function which does not force to bounce once on the table (tanh) ################
+        ##################### Reward function which does not force to bounce once on the table (tanh) ################
         # self._check_contacts(env.sim)
         # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
         # if env._steps == env.ep_length - 1 or self._is_collided:
@@ -142,9 +142,9 @@ class BeerPongReward:
         #     reward = - 1e-2 * action_cost
         #     success = False
         #     crash = False
-        # ################################################################################################################
+        ################################################################################################################
 
-        # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
+        # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
         self._check_contacts(env.sim)
         self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
         if env._steps == env.ep_length - 1 or self._is_collided:
@@ -162,12 +162,12 @@ class BeerPongReward:
 
             reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
                      1e-4*np.mean(action_cost)
+                     # 1e-7*np.mean(action_cost)
             success = self.ball_in_cup
-            crash = self._is_collided
         else:
-            reward = - 1e-2 * action_cost
+            # reward = - 1e-2 * action_cost
+            reward = - 1e-4 * action_cost
             success = False
-            crash = False
         # ################################################################################################################
 
         infos = {}