fixed hole reacher bug

2021-05-18 15:27:08 +02:00 · 2021-05-18 15:27:08 +02:00 · 724b8c6c61
commit 724b8c6c61
parent e0e4d6d41c
3 changed files with 18 additions and 20 deletions
--- a/alr_envs/init.py
+++ b/alr_envs/init.py
@ -172,6 +172,7 @@ register(
    max_episode_steps=200,
    kwargs={
        "n_links": 5,
+        "random_start": True,
        "allow_self_collision": False,
        "allow_wall_collision": False,
        "hole_width": None,
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@ -14,7 +14,7 @@ class HoleReacherEnv(MPEnv):

    def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
                 hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
-                 allow_wall_collision: bool = False, collision_penalty: bool = 1000):
+                 allow_wall_collision: bool = False, collision_penalty: float = 1000):

        self.n_links = n_links
        self.link_lengths = np.ones((n_links, 1))
@ -52,7 +52,7 @@ class HoleReacherEnv(MPEnv):
            [np.pi] * self.n_links,  # sin
            [np.inf] * self.n_links,  # velocity
            [np.inf],  # hole width
-            [np.inf],  # hole depth
+            # [np.inf],  # hole depth
            [np.inf] * 2,  # x-y coordinates of target distance
            [np.inf]  # env steps, because reward start after n steps TODO: Maybe
        ])
@ -138,24 +138,20 @@ class HoleReacherEnv(MPEnv):
        self._is_collided = self_collision or wall_collision

    def _get_reward(self, acc: np.ndarray):
-        success = False
-        reward = -np.inf
-        if not self._is_collided:
-            dist = 0
+        reward = 0
+        # success = False
+
+        if self._steps == 199 or self._is_collided:
            # return reward only in last time step
-            if self._steps == 199:
-                dist = np.linalg.norm(self.end_effector - self._goal)
-                success = dist < 0.005
-        else:
-            # Episode terminates when colliding, hence return reward
+            # Episode also terminates when colliding, hence return reward
            dist = np.linalg.norm(self.end_effector - self._goal)
-            reward = -self.collision_penalty
+            # success = dist < 0.005 and not self._is_collided
+            reward = - dist ** 2 - self.collision_penalty * self._is_collided

-        reward -= dist ** 2
        reward -= 5e-8 * np.sum(acc ** 2)
-        info = {"is_success": success}
+        # info = {"is_success": success}

-        return reward, info
+        return reward, {}  # info

    def _get_obs(self):
        theta = self._joint_angles
@ -164,7 +160,7 @@ class HoleReacherEnv(MPEnv):
            np.sin(theta),
            self._angle_velocity,
            self._tmp_hole_width,
-            self._tmp_hole_depth,
+            # self._tmp_hole_depth,
            self.end_effector - self._goal,
            self._steps
        ])
@ -281,7 +277,7 @@ class HoleReacherEnv(MPEnv):
            [self.random_start] * self.n_links,  # sin
            [self.random_start] * self.n_links,  # velocity
            [self._hole_width is None],  # hole width
-            [self._hole_depth is None],  # hole width
+            # [self._hole_depth is None],  # hole depth
            [True] * 2,  # x-y coordinates of target distance
            [False]  # env steps
        ])
--- a/alr_envs/classic_control/utils.py
+++ b/alr_envs/classic_control/utils.py
@ -2,16 +2,17 @@ def ccw(A, B, C):
    return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12


-# Return true if line segments AB and CD intersect
 def intersect(A, B, C, D):
+    """
+    Checks whether line segments AB and CD intersect
+    """
    return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)


 def check_self_collision(line_points):
+    "Checks whether line segments and intersect"
    for i, line1 in enumerate(line_points):
        for line2 in line_points[i + 2:, :, :]:
-            # if line1 != line2:
            if intersect(line1[0], line1[-1], line2[0], line2[-1]):
                return True
    return False
-