Give goal reward

This commit is contained in:
kngwyu 2020-05-29 20:38:48 +09:00
parent b77425efdb
commit 2ee4e78945
2 changed files with 26 additions and 20 deletions

View File

@ -264,6 +264,8 @@ class MazeEnv(gym.Env):
_, file_path = tempfile.mkstemp(text=True, suffix=".xml") _, file_path = tempfile.mkstemp(text=True, suffix=".xml")
tree.write(file_path) tree.write(file_path)
x, y, _ = map(float, tree.find(".//geom[@name='target']").attrib["pos"].split())
self.goal_xy = np.array([x, y])
self.wrapped_env = self.MODEL_CLASS(*args, file_path=file_path, **kwargs) self.wrapped_env = self.MODEL_CLASS(*args, file_path=file_path, **kwargs)
def get_ori(self): def get_ori(self):
@ -407,7 +409,7 @@ class MazeEnv(gym.Env):
((x1, y2), (x1, y1)), ((x1, y2), (x1, y1)),
] ]
for seg in struct_segments: for seg in struct_segments:
segments.append(dict(segment=seg, type=block_type,)) segments.append(dict(segment=seg, type=block_type))
sensor_readings = np.zeros((self._n_bins, 3)) # 3 for wall, drop-off, block sensor_readings = np.zeros((self._n_bins, 3)) # 3 for wall, drop-off, block
for ray_idx in range(self._n_bins): for ray_idx in range(self._n_bins):
@ -437,19 +439,16 @@ class MazeEnv(gym.Env):
# Find out which segment is intersected first. # Find out which segment is intersected first.
first_seg = sorted(ray_segments, key=lambda x: x["distance"])[0] first_seg = sorted(ray_segments, key=lambda x: x["distance"])[0]
seg_type = first_seg["type"] seg_type = first_seg["type"]
idx = ( idx = None
0 if seg_type == 1:
if seg_type == 1 idx = 0 # Wall
else 1 # Wall. elif seg_type == -1:
if seg_type == -1 idx = 1 # Drop-off
else 2 # Drop-off. elif maze_env_utils.can_move(seg_type):
if maze_env_utils.can_move(seg_type) idx == 2 # Block
else None # Block. sr = self._sensor_range
) if first_seg["distance"] <= sr:
if first_seg["distance"] <= self._sensor_range: sensor_readings[ray_idx][idx] = (sr - first_seg["distance"]) / sr
sensor_readings[ray_idx][idx] = (
self._sensor_range - first_seg["distance"]
) / self._sensor_range
return sensor_readings return sensor_readings
@ -533,16 +532,23 @@ class MazeEnv(gym.Env):
return True return True
return False return False
def _is_in_goal_position(self, pos):
return np.linalg.norm(pos - self.goal_xy) <= 0.6
def step(self, action): def step(self, action):
self.t += 1 self.t += 1
goal_reward = 0.0
if self._manual_collision: if self._manual_collision:
old_pos = self.wrapped_env.get_xy() old_pos = self.wrapped_env.get_xy()
inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action) inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action)
new_pos = self.wrapped_env.get_xy() new_pos = self.wrapped_env.get_xy()
if self._is_in_collision(new_pos): if self._is_in_collision(new_pos):
self.wrapped_env.set_xy(old_pos) self.wrapped_env.set_xy(old_pos)
if self._is_in_goal_position(new_pos):
goal_reward = 1.0
done = True
else: else:
inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action) inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action)
next_obs = self._get_obs() next_obs = self._get_obs()
done = False return next_obs, inner_reward + goal_reward, done, info
return next_obs, inner_reward, done, info

View File

@ -27,7 +27,6 @@ class PointEnv(AgentModel):
def __init__(self, file_path=None, expose_all_qpos=True): def __init__(self, file_path=None, expose_all_qpos=True):
self._expose_all_qpos = expose_all_qpos self._expose_all_qpos = expose_all_qpos
super().__init__(file_path, 1) super().__init__(file_path, 1)
def _step(self, a): def _step(self, a):
@ -61,6 +60,7 @@ class PointEnv(AgentModel):
self.sim.data.qvel.flat[:3], self.sim.data.qvel.flat[:3],
] ]
) )
else:
return np.concatenate( return np.concatenate(
[self.sim.data.qpos.flat[2:3], self.sim.data.qvel.flat[:3]] [self.sim.data.qpos.flat[2:3], self.sim.data.qvel.flat[:3]]
) )