From 66be0b1e02a8741b945b5eba488ed50e03d4eb8d Mon Sep 17 00:00:00 2001 From: Onur Date: Tue, 1 Feb 2022 16:02:33 +0100 Subject: [PATCH] adjusted reward function --- alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py index e94b470..40b181b 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -97,14 +97,14 @@ class BeerPongReward: # encourage bounce before falling into cup if not ball_in_cup: if not self.ball_table_contact: - reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2)) + reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist)) else: - reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2)) + reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist)) else: if not self.ball_table_contact: - reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1 + reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1 else: - reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3 + reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3 # reward = - 1 * cost - self.collision_penalty * int(self._is_collided) success = ball_in_cup