diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/classic_control/hole_reacher.py
index eeba84d..be1cd1f 100644
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@@ -35,7 +35,6 @@ class HoleReacher(gym.Env):
         self._angle_velocity = None
         self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
         self.start_vel = np.zeros(self.num_links)
-        self.weight_matrix_scale = 50  # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
 
         self.dt = 0.01
         self.time_limit = 2
@@ -68,6 +67,9 @@ class HoleReacher(gym.Env):
     def end_effector(self):
         return self._joints[self.num_links].T
 
+    def configure(self, context):
+        pass
+
     def reset(self):
         self._joint_angles = self.start_pos
         self._angle_velocity = self.start_vel
diff --git a/alr_envs/classic_control/utils.py b/alr_envs/classic_control/utils.py
index f276d4a..9da138f 100644
--- a/alr_envs/classic_control/utils.py
+++ b/alr_envs/classic_control/utils.py
@@ -62,7 +62,8 @@ def make_holereacher_env(rank, seed=0):
                              learn_goal=True,
                              alpha_phase=2,
                              start_pos=_env.start_pos,
-                             policy_type="velocity"
+                             policy_type="velocity",
+                             weights_scale=100,
                              )
         _env.seed(seed + rank)
         return _env
diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
index 7c68e35..73dc1c5 100644
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
@@ -71,10 +71,10 @@ class BallInACupReward(alr_reward_fct.AlrReward):
             dist_ctxt = self.dists_ctxt[-1]
 
             # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
-            cost = 2 * (0.33 * min_dist + 0.33 * dist_final + 0.33 * dist_ctxt)
+            cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * dist_ctxt)
             reward = np.exp(-1 * cost) - 1e-4 * action_cost
             stop_sim = True
-            success = dist_final < 0.05 and ball_in_cup
+            success = dist_final < 0.05 and dist_ctxt < 0.05
         else:
             reward = - 1e-4 * action_cost
             success = False
diff --git a/alr_envs/utils/dmp_env_wrapper.py b/alr_envs/utils/dmp_env_wrapper.py
index 43bb030..849ac1b 100644
--- a/alr_envs/utils/dmp_env_wrapper.py
+++ b/alr_envs/utils/dmp_env_wrapper.py
@@ -86,8 +86,6 @@ class DmpEnvWrapper(gym.Wrapper):
     def rollout(self, params, context=None, render=False):
         """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
         goal_pos, weight_matrix = self.goal_and_weights(params)
-        if hasattr(self.env, "weight_matrix_scale"):
-            weight_matrix = weight_matrix * self.env.weight_matrix_scale
         self.dmp.set_weights(weight_matrix, goal_pos)
         trajectory, velocity = self.dmp.reference_trajectory(self.t)
 
diff --git a/dmp_env_wrapper_example.py b/dmp_env_wrapper_example.py
index e63e11c..b971574 100644
--- a/dmp_env_wrapper_example.py
+++ b/dmp_env_wrapper_example.py
@@ -5,17 +5,19 @@ import numpy as np
 
 if __name__ == "__main__":
 
-    n_samples = 10
+    n_samples = 1
     n_cpus = 4
     dim = 25
 
-    env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
-                            n_samples=n_samples)
+    # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
+    #                         n_samples=n_samples)
+
+    test_env = make_viapointreacher_env(0)()
 
     params = np.random.randn(n_samples, dim)
     # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
 
-    # env.reset()
-    out = env(params)
+    test_env.rollout(params, render=True)
 
-    print(out)
+    # out = env(params)
+    # print(out)
diff --git a/dmp_pd_control_example.py b/dmp_pd_control_example.py
index 33abe6e..303f979 100644
--- a/dmp_pd_control_example.py
+++ b/dmp_pd_control_example.py
@@ -5,29 +5,23 @@ import numpy as np
 
 if __name__ == "__main__":
 
-    dim = 24
+    dim = 15
     n_cpus = 4
 
-    n_samples = 10
+    n_samples = 1
 
     vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
                                 n_samples=n_samples)
 
-    # params = 10 * np.random.randn(n_samples, dim)
-    params = np.array([[ -4.51280364,  24.43701373,  15.73282129, -12.13020392,
-         -8.57305795,   2.79806606,  -6.38613201,   5.99309385,
-         -2.05631886,  24.71684748,  14.05989949, -14.60456967,
-         10.51933419,  -2.43715355,  -6.0767578 ,  13.06498129,
-          6.18038374,  11.4153859 ,   1.40753639,   5.57082387,
-          9.81989309,   3.60558787,  -9.66996754,  14.28519904]])
+    params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
 
-    out = vec_env(params)
-    print(out)
+    rewards, infos = vec_env(params)
+    print(rewards)
     #
-    non_vec_env = make_simple_env(0, 0)()
+    # non_vec_env = make_simple_env(0, 0)()
     #
     # params = 10 * np.random.randn(dim)
 
-    out2 = non_vec_env.rollout(params, render=True)
+    # out2 = non_vec_env.rollout(params, render=True)
 
-    print(out2)
+    # print(out2)