adjusted reward function

This commit is contained in:
Onur 2022-02-01 16:02:33 +01:00
parent bcebf1077c
commit 66be0b1e02

View File

@ -97,14 +97,14 @@ class BeerPongReward:
# encourage bounce before falling into cup # encourage bounce before falling into cup
if not ball_in_cup: if not ball_in_cup:
if not self.ball_table_contact: if not self.ball_table_contact:
reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2)) reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
else: else:
reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2)) reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
else: else:
if not self.ball_table_contact: if not self.ball_table_contact:
reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1 reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
else: else:
reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3 reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
# reward = - 1 * cost - self.collision_penalty * int(self._is_collided) # reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
success = ball_in_cup success = ball_in_cup