Do not use goal

2020-05-29 22:56:09 +09:00 · 2020-05-29 22:56:09 +09:00 · 7c20df20d7
commit 7c20df20d7
parent 2ee4e78945
3 changed files with 1 additions and 15 deletions
--- a/mujoco_maze/assets/ant.xml
+++ b/mujoco_maze/assets/ant.xml
@ -66,7 +66,6 @@
        </body>
      </body>
    </body>
  </worldbody>
  <actuator>
    <motor joint="hip_4" ctrlrange="-30.0 30.0" ctrllimited="true" />
--- a/mujoco_maze/assets/point.xml
+++ b/mujoco_maze/assets/point.xml
@ -22,8 +22,6 @@
            <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0' />
            <joint name='rot' type='hinge' axis='0 0 1' pos='0 0 0' limited="false" />
        </body>
        <!-- Goal / target position -->
        <geom name="target" type="sphere" size="0.5" pos="0 8 0" rgba="1. 0. 0. 0.6" />
    </worldbody>
    <actuator>
        <!-- Those are just dummy actuators for providing ranges -->
--- a/mujoco_maze/maze_env.py
+++ b/mujoco_maze/maze_env.py
@ -263,9 +263,6 @@ class MazeEnv(gym.Env):
        _, file_path = tempfile.mkstemp(text=True, suffix=".xml")
        tree.write(file_path)
        x, y, _ = map(float, tree.find(".//geom[@name='target']").attrib["pos"].split())
        self.goal_xy = np.array([x, y])
        self.wrapped_env = self.MODEL_CLASS(*args, file_path=file_path, **kwargs)
    def get_ori(self):
@ -532,23 +529,15 @@ class MazeEnv(gym.Env):
                        return True
        return False
    def _is_in_goal_position(self, pos):
        return np.linalg.norm(pos - self.goal_xy) <= 0.6
    def step(self, action):
        self.t += 1
        goal_reward = 0.0
        if self._manual_collision:
            old_pos = self.wrapped_env.get_xy()
            inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action)
            new_pos = self.wrapped_env.get_xy()
            if self._is_in_collision(new_pos):
                self.wrapped_env.set_xy(old_pos)
            if self._is_in_goal_position(new_pos):
                goal_reward = 1.0
                done = True
        else:
            inner_next_obs, inner_reward, done, info = self.wrapped_env.step(action)
        next_obs = self._get_obs()
-        return next_obs, inner_reward + goal_reward, done, info
+        return next_obs, inner_reward, False, info