diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py index 73dc1c5..bb7440d 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py @@ -73,7 +73,6 @@ class BallInACupReward(alr_reward_fct.AlrReward): # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * dist_ctxt) reward = np.exp(-1 * cost) - 1e-4 * action_cost - stop_sim = True success = dist_final < 0.05 and dist_ctxt < 0.05 else: reward = - 1e-4 * action_cost diff --git a/alr_envs/mujoco/ball_in_a_cup/utils.py b/alr_envs/mujoco/ball_in_a_cup/utils.py index 8e94670..d2b57f9 100644 --- a/alr_envs/mujoco/ball_in_a_cup/utils.py +++ b/alr_envs/mujoco/ball_in_a_cup/utils.py @@ -28,7 +28,7 @@ def make_env(rank, seed=0): dt=env.dt, weights_scale=0.1, zero_start=True, - zero_goal=False + zero_goal=True ) env.seed(seed + rank) @@ -37,7 +37,6 @@ def make_env(rank, seed=0): return _init - def make_simple_env(rank, seed=0): """ Utility function for multiprocessed env.