biac simple reward function update
This commit is contained in:
parent
744f6eb747
commit
4308607a74
@ -33,6 +33,7 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.dists = []
|
self.dists = []
|
||||||
self.dists_final = []
|
self.dists_final = []
|
||||||
self.costs = []
|
self.costs = []
|
||||||
|
self.action_costs = []
|
||||||
|
|
||||||
def compute_reward(self, action, sim, step, context=None):
|
def compute_reward(self, action, sim, step, context=None):
|
||||||
self.ball_id = sim.model._body_name2id["ball"]
|
self.ball_id = sim.model._body_name2id["ball"]
|
||||||
@ -52,9 +53,10 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.ball_traj[step, :] = ball_pos
|
self.ball_traj[step, :] = ball_pos
|
||||||
|
|
||||||
action_cost = np.sum(np.square(action))
|
action_cost = np.sum(np.square(action))
|
||||||
|
self.action_costs.append(action_cost)
|
||||||
|
|
||||||
if self.check_collision(sim):
|
if self.check_collision(sim):
|
||||||
reward = - 1e-4 * action_cost - 1000
|
reward = - 1000
|
||||||
return reward, False, True
|
return reward, False, True
|
||||||
|
|
||||||
if step == self.sim_time - 1:
|
if step == self.sim_time - 1:
|
||||||
@ -62,10 +64,10 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
dist_final = self.dists_final[-1]
|
dist_final = self.dists_final[-1]
|
||||||
|
|
||||||
cost = 0.5 * min_dist + 0.5 * dist_final
|
cost = 0.5 * min_dist + 0.5 * dist_final
|
||||||
reward = np.exp(-2 * cost) - 1e-4 * action_cost
|
reward = np.exp(-2 * cost) - 1e-3 * action_cost
|
||||||
success = dist_final < 0.05 and ball_in_cup
|
success = dist_final < 0.05 and ball_in_cup
|
||||||
else:
|
else:
|
||||||
reward = - 1e-4 * action_cost
|
reward = - 1e-3 * action_cost
|
||||||
success = False
|
success = False
|
||||||
|
|
||||||
return reward, success, False
|
return reward, success, False
|
||||||
|
Loading…
Reference in New Issue
Block a user