update on holereacher reward

This commit is contained in:
Maximilian Huettenrauch 2021-03-22 15:28:50 +01:00
parent a0692b1089
commit 6233c85904
2 changed files with 5 additions and 13 deletions

View File

@ -94,23 +94,15 @@ class HoleReacher(gym.Env):
# compute reward directly in step function # compute reward directly in step function
dist_reward = 0 reward = 0
if not self._is_collided: if not self._is_collided:
if self._steps == 199: if self._steps == 199:
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole) reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2
else: else:
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
reward = - dist_reward ** 2
reward -= 5e-8 * np.sum(acc**2)
# if self._steps == 180:
# reward -= 0.1 * np.sum(vel**2) ** 2
if self._is_collided:
reward = -self.collision_penalty reward = -self.collision_penalty
reward -= 5e-8 * np.sum(acc ** 2)
info = {"is_collided": self._is_collided} info = {"is_collided": self._is_collided}
self._steps += 1 self._steps += 1

View File

@ -8,7 +8,7 @@ if __name__ == "__main__":
dim = 15 dim = 15
n_cpus = 4 n_cpus = 4
n_samples = 1 n_samples = 10
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)], vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
n_samples=n_samples) n_samples=n_samples)