adjusted reward function

This commit is contained in:
Onur 2022-02-01 16:02:33 +01:00
parent bcebf1077c
commit 66be0b1e02

View File

@ -97,14 +97,14 @@ class BeerPongReward:
# encourage bounce before falling into cup
if not ball_in_cup:
if not self.ball_table_contact:
reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2))
reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
else:
reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2))
reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
else:
if not self.ball_table_contact:
reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1
reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
else:
reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3
reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
# reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
success = ball_in_cup