adjusted reward function
This commit is contained in:
parent
bcebf1077c
commit
66be0b1e02
@ -97,14 +97,14 @@ class BeerPongReward:
|
||||
# encourage bounce before falling into cup
|
||||
if not ball_in_cup:
|
||||
if not self.ball_table_contact:
|
||||
reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2))
|
||||
reward = 0.2 * (1 - np.tanh(0.5*min_dist)) + 0.1 * (1 - np.tanh(0.5*final_dist))
|
||||
else:
|
||||
reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2))
|
||||
reward = (1 - np.tanh(0.5*min_dist)) + 0.5 * (1 - np.tanh(0.5*final_dist))
|
||||
else:
|
||||
if not self.ball_table_contact:
|
||||
reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1
|
||||
reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 1
|
||||
else:
|
||||
reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3
|
||||
reward = 2 * (1 - np.tanh(0.5*final_dist)) + 1 * (1 - np.tanh(0.5*min_dist)) + 3
|
||||
|
||||
# reward = - 1 * cost - self.collision_penalty * int(self._is_collided)
|
||||
success = ball_in_cup
|
||||
|
Loading…
Reference in New Issue
Block a user