fixes in holereacher

2021-01-15 17:16:52 +01:00 · 2021-01-15 17:16:52 +01:00 · 2d9e7fb3eb
commit 2d9e7fb3eb
parent b7400c477d
1 changed files with 7 additions and 6 deletions
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@ -38,6 +38,7 @@ class HoleReacher(gym.Env):
        self.weight_matrix_scale = 50  # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer

        self._dt = 0.01
+        self.time_limit = 2

        action_bound = np.pi * np.ones((self.num_links,))
        state_bound = np.hstack([
@ -103,9 +104,7 @@ class HoleReacher(gym.Env):
        reward -= 1e-6 * np.sum(acc**2)

        if self._steps == 180:
-            reward -= (0.1 * np.sum(vel**2) ** 2
-                       + 1e-3 * np.sum(action**2)
-                       )
+            reward -= 0.1 * np.sum(vel**2) ** 2

        if self._is_collided:
            reward -= self.collision_penalty
@ -114,7 +113,9 @@ class HoleReacher(gym.Env):

        self._steps += 1

-        return self._get_obs().copy(), reward, self._is_collided, info
+        done = self._steps * self._dt > self.time_limit or self._is_collided
+
+        return self._get_obs().copy(), reward, done, info

    def _update_joints(self):
        """
@ -301,12 +302,12 @@ if __name__ == '__main__':
        # test with random actions
        ac = 2 * env.action_space.sample()
        # ac[0] += np.pi/2
-        obs, rew, done, info = env.step(ac)
+        obs, rew, d, info = env.step(ac)
        env.render(mode=render_mode)

        print(rew)

-        if done:
+        if d:
            break

    env.close()