update on holereacher reward
This commit is contained in:
parent
a0692b1089
commit
6233c85904
@ -94,23 +94,15 @@ class HoleReacher(gym.Env):
|
|||||||
|
|
||||||
# compute reward directly in step function
|
# compute reward directly in step function
|
||||||
|
|
||||||
dist_reward = 0
|
reward = 0
|
||||||
if not self._is_collided:
|
if not self._is_collided:
|
||||||
if self._steps == 199:
|
if self._steps == 199:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2
|
||||||
else:
|
else:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
|
||||||
|
|
||||||
reward = - dist_reward ** 2
|
|
||||||
|
|
||||||
reward -= 5e-8 * np.sum(acc**2)
|
|
||||||
|
|
||||||
# if self._steps == 180:
|
|
||||||
# reward -= 0.1 * np.sum(vel**2) ** 2
|
|
||||||
|
|
||||||
if self._is_collided:
|
|
||||||
reward = -self.collision_penalty
|
reward = -self.collision_penalty
|
||||||
|
|
||||||
|
reward -= 5e-8 * np.sum(acc ** 2)
|
||||||
|
|
||||||
info = {"is_collided": self._is_collided}
|
info = {"is_collided": self._is_collided}
|
||||||
|
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
|
@ -8,7 +8,7 @@ if __name__ == "__main__":
|
|||||||
dim = 15
|
dim = 15
|
||||||
n_cpus = 4
|
n_cpus = 4
|
||||||
|
|
||||||
n_samples = 1
|
n_samples = 10
|
||||||
|
|
||||||
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
||||||
n_samples=n_samples)
|
n_samples=n_samples)
|
||||||
|
Loading…
Reference in New Issue
Block a user