diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py index 369ccec..d658412 100644 --- a/mujoco_maze/maze_env.py +++ b/mujoco_maze/maze_env.py @@ -264,6 +264,8 @@ class MazeEnv(gym.Env): _, file_path = tempfile.mkstemp(text=True, suffix=".xml") tree.write(file_path) + x, y, _ = map(float, tree.find(".//geom[@name='target']").attrib["pos"].split()) + self.goal_xy = np.array([x, y]) self.wrapped_env = self.MODEL_CLASS(*args, file_path=file_path, **kwargs) def get_ori(self): @@ -407,7 +409,7 @@ class MazeEnv(gym.Env): ((x1, y2), (x1, y1)), ] for seg in struct_segments: - segments.append(dict(segment=seg, type=block_type,)) + segments.append(dict(segment=seg, type=block_type)) sensor_readings = np.zeros((self._n_bins, 3)) # 3 for wall, drop-off, block for ray_idx in range(self._n_bins): @@ -437,19 +439,16 @@ class MazeEnv(gym.Env): # Find out which segment is intersected first. first_seg = sorted(ray_segments, key=lambda x: x["distance"])[0] seg_type = first_seg["type"] - idx = ( - 0 - if seg_type == 1 - else 1 # Wall. - if seg_type == -1 - else 2 # Drop-off. - if maze_env_utils.can_move(seg_type) - else None # Block. - ) - if first_seg["distance"] <= self._sensor_range: - sensor_readings[ray_idx][idx] = ( - self._sensor_range - first_seg["distance"] - ) / self._sensor_range + idx = None + if seg_type == 1: + idx = 0 # Wall + elif seg_type == -1: + idx = 1 # Drop-off + elif maze_env_utils.can_move(seg_type): + idx == 2 # Block + sr = self._sensor_range + if first_seg["distance"] <= sr: + sensor_readings[ray_idx][idx] = (sr - first_seg["distance"]) / sr return sensor_readings @@ -533,16 +532,23 @@ class MazeEnv(gym.Env): return True return False + def _is_in_goal_position(self, pos): + return np.linalg.norm(pos - self.goal_xy) <= 0.6 + def step(self, action): self.t += 1 + goal_reward = 0.0 if self._manual_collision: old_pos = self.wrapped_env.get_xy() inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action) new_pos = self.wrapped_env.get_xy() if self._is_in_collision(new_pos): self.wrapped_env.set_xy(old_pos) + + if self._is_in_goal_position(new_pos): + goal_reward = 1.0 + done = True else: inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action) next_obs = self._get_obs() - done = False - return next_obs, inner_reward, done, info + return next_obs, inner_reward + goal_reward, done, info diff --git a/mujoco_maze/point.py b/mujoco_maze/point.py index 09b441d..7deb23a 100644 --- a/mujoco_maze/point.py +++ b/mujoco_maze/point.py @@ -27,7 +27,6 @@ class PointEnv(AgentModel): def __init__(self, file_path=None, expose_all_qpos=True): self._expose_all_qpos = expose_all_qpos - super().__init__(file_path, 1) def _step(self, a): @@ -61,9 +60,10 @@ class PointEnv(AgentModel): self.sim.data.qvel.flat[:3], ] ) - return np.concatenate( - [self.sim.data.qpos.flat[2:3], self.sim.data.qvel.flat[:3]] - ) + else: + return np.concatenate( + [self.sim.data.qpos.flat[2:3], self.sim.data.qvel.flat[:3]] + ) def reset_model(self): qpos = self.init_qpos + self.np_random.uniform(