some changes to reward
This commit is contained in:
parent
7f4f52ab10
commit
3cc4d6e667
@ -11,6 +11,6 @@ register(
|
|||||||
entry_point='alr_envs.classic_control:SimpleReacherEnv',
|
entry_point='alr_envs.classic_control:SimpleReacherEnv',
|
||||||
max_episode_steps=200,
|
max_episode_steps=200,
|
||||||
kwargs={
|
kwargs={
|
||||||
"n_links": 5,
|
"n_links": 3,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -61,12 +61,12 @@ class SimpleReacherEnv(gym.Env, utils.EzPickle):
|
|||||||
self._update_joints()
|
self._update_joints()
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
|
|
||||||
reward = self._get_reward(action)
|
reward, info = self._get_reward(action)
|
||||||
|
|
||||||
# done = np.abs(self.end_effector - self._goal_pos) < 0.1
|
# done = np.abs(self.end_effector - self._goal_pos) < 0.1
|
||||||
done = False
|
done = False
|
||||||
|
|
||||||
return self._get_obs().copy(), reward, done, {}
|
return self._get_obs().copy(), reward, done, info
|
||||||
|
|
||||||
def _scale_action(self, action):
|
def _scale_action(self, action):
|
||||||
"""
|
"""
|
||||||
@ -107,15 +107,17 @@ class SimpleReacherEnv(gym.Env, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_reward(self, action):
|
def _get_reward(self, action):
|
||||||
diff = self.end_effector - self._goal_pos
|
diff = self.end_effector - self._goal_pos
|
||||||
distance = 0
|
reward_dist = 0
|
||||||
|
|
||||||
# TODO: Is this the best option
|
# TODO: Is this the best option
|
||||||
if self._steps >= self.steps_before_reward:
|
if self._steps >= self.steps_before_reward:
|
||||||
distance = np.exp(-0.1 * diff ** 2).mean()
|
reward_dist = - np.linalg.norm(diff)
|
||||||
# distance -= (diff ** 2).mean()
|
# reward_dist = np.exp(-0.1 * diff ** 2).mean()
|
||||||
|
# reward_dist = - (diff ** 2).mean()
|
||||||
|
|
||||||
# distance -= action ** 2
|
reward_ctrl = (action ** 2).sum()
|
||||||
return distance
|
reward = reward_dist - reward_ctrl
|
||||||
|
return reward, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user