adjusted reward function

2022-02-01 16:02:33 +01:00 · 2022-02-01 16:02:33 +01:00 · 66be0b1e02
commit 66be0b1e02
parent bcebf1077c
1 changed files with 4 additions and 4 deletions
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@ -97,14 +97,14 @@ class BeerPongReward:
            # encourage bounce before falling into cup
            if not ball_in_cup:
                if not self.ball_table_contact:
-                    reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2))
+                    reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
                else:
-                    reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2))
+                    reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
            else:
                if not self.ball_table_contact:
-                    reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1
+                    reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
                else:
-                    reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3
+                    reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3

            # reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
            success = ball_in_cup