diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 0007982..fa73b7a 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -172,6 +172,7 @@ register( max_episode_steps=200, kwargs={ "n_links": 5, + "random_start": True, "allow_self_collision": False, "allow_wall_collision": False, "hole_width": None, diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/classic_control/hole_reacher.py index 0e008aa..064b94b 100644 --- a/alr_envs/classic_control/hole_reacher.py +++ b/alr_envs/classic_control/hole_reacher.py @@ -14,7 +14,7 @@ class HoleReacherEnv(MPEnv): def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, - allow_wall_collision: bool = False, collision_penalty: bool = 1000): + allow_wall_collision: bool = False, collision_penalty: float = 1000): self.n_links = n_links self.link_lengths = np.ones((n_links, 1)) @@ -52,7 +52,7 @@ class HoleReacherEnv(MPEnv): [np.pi] * self.n_links, # sin [np.inf] * self.n_links, # velocity [np.inf], # hole width - [np.inf], # hole depth + # [np.inf], # hole depth [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps TODO: Maybe ]) @@ -138,24 +138,20 @@ class HoleReacherEnv(MPEnv): self._is_collided = self_collision or wall_collision def _get_reward(self, acc: np.ndarray): - success = False - reward = -np.inf - if not self._is_collided: - dist = 0 + reward = 0 + # success = False + + if self._steps == 199 or self._is_collided: # return reward only in last time step - if self._steps == 199: - dist = np.linalg.norm(self.end_effector - self._goal) - success = dist < 0.005 - else: - # Episode terminates when colliding, hence return reward + # Episode also terminates when colliding, hence return reward dist = np.linalg.norm(self.end_effector - self._goal) - reward = -self.collision_penalty + # success = dist < 0.005 and not self._is_collided + reward = - dist ** 2 - self.collision_penalty * self._is_collided - reward -= dist ** 2 reward -= 5e-8 * np.sum(acc ** 2) - info = {"is_success": success} + # info = {"is_success": success} - return reward, info + return reward, {} # info def _get_obs(self): theta = self._joint_angles @@ -164,7 +160,7 @@ class HoleReacherEnv(MPEnv): np.sin(theta), self._angle_velocity, self._tmp_hole_width, - self._tmp_hole_depth, + # self._tmp_hole_depth, self.end_effector - self._goal, self._steps ]) @@ -281,7 +277,7 @@ class HoleReacherEnv(MPEnv): [self.random_start] * self.n_links, # sin [self.random_start] * self.n_links, # velocity [self._hole_width is None], # hole width - [self._hole_depth is None], # hole width + # [self._hole_depth is None], # hole depth [True] * 2, # x-y coordinates of target distance [False] # env steps ]) diff --git a/alr_envs/classic_control/utils.py b/alr_envs/classic_control/utils.py index 3a9bb50..dbaa88e 100644 --- a/alr_envs/classic_control/utils.py +++ b/alr_envs/classic_control/utils.py @@ -2,16 +2,17 @@ def ccw(A, B, C): return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12 -# Return true if line segments AB and CD intersect def intersect(A, B, C, D): + """ + Checks whether line segments AB and CD intersect + """ return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D) def check_self_collision(line_points): + "Checks whether line segments and intersect" for i, line1 in enumerate(line_points): for line2 in line_points[i + 2:, :, :]: - # if line1 != line2: if intersect(line1[0], line1[-1], line2[0], line2[-1]): return True return False -