From d2c661d55cb753c53d362ad4660ea9bcad335d48 Mon Sep 17 00:00:00 2001
From: kngwyu <yuji.kngw.80s.revive@gmail.com>
Date: Thu, 24 Sep 2020 23:40:33 +0900
Subject: [PATCH] Swimmer

---
 mujoco_maze/__init__.py        | 23 +++++++++--
 mujoco_maze/agent_model.py     | 11 ++---
 mujoco_maze/ant.py             | 27 ++++++-------
 mujoco_maze/assets/point.xml   |  1 +
 mujoco_maze/assets/swimmer.xml | 39 ++++++++++++++++++
 mujoco_maze/maze_env.py        | 14 ++++---
 mujoco_maze/maze_env_utils.py  |  1 +
 mujoco_maze/maze_task.py       | 40 +++++++++++++++----
 mujoco_maze/swimmer.py         | 73 ++++++++++++++++++++++++++++++++++
 tests/test_envs.py             | 13 ++++++
 10 files changed, 205 insertions(+), 37 deletions(-)
 create mode 100644 mujoco_maze/assets/swimmer.xml
 create mode 100644 mujoco_maze/swimmer.py

diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py
index 9acacab..652f7ee 100644
--- a/mujoco_maze/__init__.py
+++ b/mujoco_maze/__init__.py
@@ -11,9 +11,11 @@ import gym
 from mujoco_maze.ant import AntEnv
 from mujoco_maze.maze_task import TaskRegistry
 from mujoco_maze.point import PointEnv
+from mujoco_maze.swimmer import SwimmerEnv
 
 for maze_id in TaskRegistry.keys():
     for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
+        # Ant
         gym.envs.register(
             id=f"Ant{maze_id}-v{i}",
             entry_point="mujoco_maze.maze_env:MazeEnv",
@@ -26,9 +28,7 @@ for maze_id in TaskRegistry.keys():
             max_episode_steps=1000,
             reward_threshold=task_cls.REWARD_THRESHOLD,
         )
-
-for maze_id in TaskRegistry.keys():
-    for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
+        # Point
         gym.envs.register(
             id=f"Point{maze_id}-v{i}",
             entry_point="mujoco_maze.maze_env:MazeEnv",
@@ -42,5 +42,22 @@ for maze_id in TaskRegistry.keys():
             reward_threshold=task_cls.REWARD_THRESHOLD,
         )
 
+        if "Push" in maze_id or "Fall" in maze_id:
+            continue
+
+        # Swimmer
+        gym.envs.register(
+            id=f"Swimmer{maze_id}-v{i}",
+            entry_point="mujoco_maze.maze_env:MazeEnv",
+            kwargs=dict(
+                model_cls=SwimmerEnv,
+                maze_task=task_cls,
+                maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
+                inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
+            ),
+            max_episode_steps=1000,
+            reward_threshold=task_cls.REWARD_THRESHOLD,
+        )
+
 
 __version__ = "0.1.0"
diff --git a/mujoco_maze/agent_model.py b/mujoco_maze/agent_model.py
index cefaa49..d209d78 100644
--- a/mujoco_maze/agent_model.py
+++ b/mujoco_maze/agent_model.py
@@ -1,6 +1,7 @@
 """Common APIs for defining mujoco robot.
 """
 from abc import ABC, abstractmethod
+from typing import Optional
 
 import numpy as np
 from gym.envs.mujoco.mujoco_env import MujocoEnv
@@ -9,9 +10,9 @@ from gym.utils import EzPickle
 
 class AgentModel(ABC, MujocoEnv, EzPickle):
     FILE: str
-    ORI_IND: int
     MANUAL_COLLISION: bool
-    RADIUS: float
+    ORI_IND: int
+    RADIUS: Optional[float] = None
 
     def __init__(self, file_path: str, frame_skip: int) -> None:
         MujocoEnv.__init__(self, file_path, frame_skip)
@@ -30,18 +31,12 @@ class AgentModel(ABC, MujocoEnv, EzPickle):
         """
         pass
 
-    @abstractmethod
     def get_xy(self) -> np.ndarray:
         """Returns the coordinate of the agent.
         """
         pass
 
-    @abstractmethod
     def set_xy(self, xy: np.ndarray) -> None:
         """Set the coordinate of the agent.
         """
         pass
-
-    @abstractmethod
-    def get_ori(self) -> float:
-        pass
diff --git a/mujoco_maze/ant.py b/mujoco_maze/ant.py
index cc7c441..34356e1 100644
--- a/mujoco_maze/ant.py
+++ b/mujoco_maze/ant.py
@@ -6,7 +6,7 @@ Based on `models`_ and `gym`_ (both ant and ant-v3).
 .. _gym: https://github.com/openai/gym
 """
 
-from typing import Callable, Optional, Tuple
+from typing import Callable, Tuple
 
 import numpy as np
 
@@ -39,14 +39,15 @@ class AntEnv(AgentModel):
     FILE: str = "ant.xml"
     ORI_IND: int = 3
     MANUAL_COLLISION: bool = False
-    RADIUS: float = 0.2
 
     def __init__(
         self,
-        file_path: Optional[str] = None,
-        ctrl_cost_weight: float = 0.0001,
+        file_path: str,
+        forward_reward_weight: float = 1.0,
+        ctrl_cost_weight: float = 1e-4,
         forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
     ) -> None:
+        self._forward_reward_weight = forward_reward_weight
         self._ctrl_cost_weight = ctrl_cost_weight
         self._forward_reward_fn = forward_reward_fn
         super().__init__(file_path, 5)
@@ -63,12 +64,11 @@ class AntEnv(AgentModel):
         forward_reward = self._forward_reward(xy_pos_before)
         ctrl_cost = self._ctrl_cost_weight * np.square(action).sum()
 
-        ob = self._get_obs()
         return (
-            ob,
-            forward_reward - ctrl_cost,
+            self._get_obs(),
+            self._forward_reward_weight * forward_reward - ctrl_cost,
             False,
-            dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost,),
+            dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
         )
 
     def _get_obs(self):
@@ -82,7 +82,7 @@ class AntEnv(AgentModel):
 
     def reset_model(self):
         qpos = self.init_qpos + self.np_random.uniform(
-            size=self.model.nq, low=-0.1, high=0.1
+            size=self.model.nq, low=-0.1, high=0.1,
         )
         qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
 
@@ -92,18 +92,17 @@ class AntEnv(AgentModel):
         self.set_state(qpos, qvel)
         return self._get_obs()
 
-    def get_ori(self):
+    def get_ori(self) -> np.ndarray:
         ori = [0, 1, 0, 0]
-        ori_ind = self.ORI_IND
-        rot = self.sim.data.qpos[ori_ind : ori_ind + 4]  # take the quaternion
+        rot = self.sim.data.qpos[self.ORI_IND : self.ORI_IND + 4]  # take the quaternion
         ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3]  # project onto x-y plane
         ori = np.arctan2(ori[1], ori[0])
         return ori
 
-    def set_xy(self, xy):
+    def set_xy(self, xy: np.ndarray) -> None:
         qpos = self.sim.data.qpos.copy()
         qpos[:2] = xy
         self.set_state(qpos, self.sim.data.qvel)
 
-    def get_xy(self):
+    def get_xy(self) -> np.ndarray:
         return np.copy(self.sim.data.qpos[:2])
diff --git a/mujoco_maze/assets/point.xml b/mujoco_maze/assets/point.xml
index 2d90f75..e0d3302 100755
--- a/mujoco_maze/assets/point.xml
+++ b/mujoco_maze/assets/point.xml
@@ -15,6 +15,7 @@
     <worldbody>
         <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
         <geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
+        <!--  ================= Point ================= /-->
         <body name="torso" pos="0 0 0">
             <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
             <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />
diff --git a/mujoco_maze/assets/swimmer.xml b/mujoco_maze/assets/swimmer.xml
new file mode 100644
index 0000000..1a50dfc
--- /dev/null
+++ b/mujoco_maze/assets/swimmer.xml
@@ -0,0 +1,39 @@
+<mujoco model="swimmer">
+  <compiler angle="degree" coordinate="local" inertiafromgeom="true" />
+  <option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1" />
+  <default>
+    <geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1" />
+    <joint armature="0.1" />
+  </default>
+  <asset>
+    <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
+    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
+    <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
+    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
+    <material name='geom' texture="texgeom" texuniform="true" />
+  </asset>
+  <worldbody>
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" />
+    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" />
+    <!--  ================= SWIMMER ================= /-->
+    <body name="torso" pos="0 0 0">
+      <camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1" />
+      <geom name="frontbody" density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule" />
+      <joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide" />
+      <joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide" />
+      <joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge" />
+      <body name="mid" pos="0.5 0 0">
+        <geom name="midbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
+        <joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge" />
+        <body name="back" pos="-1 0 0">
+          <geom name="backbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
+          <joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge" />
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2" />
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3" />
+  </actuator>
+</mujoco>
diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py
index 31918f3..8c0febd 100644
--- a/mujoco_maze/maze_env.py
+++ b/mujoco_maze/maze_env.py
@@ -64,9 +64,14 @@ class MazeEnv(gym.Env):
             (x - torso_x, y - torso_y) for x, y in self._find_all_robots()
         ]
 
-        self._collision = maze_env_utils.CollisionDetector(
-            structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
-        )
+        if model_cls.MANUAL_COLLISION:
+            if model_cls.RADIUS is None:
+                raise ValueError("Manual collision needs radius of the model")
+            self._collision = maze_env_utils.CollisionDetector(
+                structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
+            )
+        else:
+            self._collision = None
 
         self._xy_to_rowcol = lambda x, y: (
             2 + (y + size_scaling / 2) / size_scaling,
@@ -226,7 +231,7 @@ class MazeEnv(gym.Env):
         geoms = torso.findall(".//geom")
         for geom in geoms:
             if "name" not in geom.attrib:
-                raise Exception("Every geom of the torso must have a name " "defined")
+                raise Exception("Every geom of the torso must have a name")
 
         # Set goals
         asset = tree.find(".//asset")
@@ -344,7 +349,6 @@ class MazeEnv(gym.Env):
         robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2]
         self._robot_x = robot_x
         self._robot_y = robot_y
-        self._robot_ori = self.get_ori()
 
         structure = self._maze_structure
         size_scaling = self._maze_size_scaling
diff --git a/mujoco_maze/maze_env_utils.py b/mujoco_maze/maze_env_utils.py
index a44830a..98280ef 100644
--- a/mujoco_maze/maze_env_utils.py
+++ b/mujoco_maze/maze_env_utils.py
@@ -138,6 +138,7 @@ class Collision:
 class CollisionDetector:
     """For manual collision detection.
     """
+
     EPS: float = 0.05
     NEIGHBORS: List[Tuple[int, int]] = [[0, -1], [-1, 0], [0, 1], [1, 0]]
 
diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py
index 9d36a1e..b2be5dc 100644
--- a/mujoco_maze/maze_task.py
+++ b/mujoco_maze/maze_task.py
@@ -46,11 +46,12 @@ class MazeGoal:
 class Scaling(NamedTuple):
     ant: float
     point: float
+    swimmer: float
 
 
 class MazeTask(ABC):
     REWARD_THRESHOLD: float
-    MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0)
+    MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
     INNER_REWARD_SCALING: float = 0.01
     TOP_DOWN_VIEW: bool = False
     OBSERVE_BLOCKS: bool = False
@@ -88,6 +89,32 @@ class DistRewardMixIn:
         return -self.goals[0].euc_dist(obs) / self.scale
 
 
+class GoalRewardSimpleRoom(MazeTask):
+    """ Very easy task. For testing.
+    """
+    REWARD_THRESHOLD: float = 0.9
+
+    def __init__(self, scale: float) -> None:
+        super().__init__(scale)
+        self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))]
+
+    def reward(self, obs: np.ndarray) -> float:
+        return 1.0 if self.termination(obs) else -0.0001
+
+    @staticmethod
+    def create_maze() -> List[List[MazeCell]]:
+        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
+        return [
+            [B, B, B, B, B],
+            [B, R, E, E, B],
+            [B, B, B, B, B],
+        ]
+
+
+class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
+    pass
+
+
 class GoalRewardUMaze(MazeTask):
     REWARD_THRESHOLD: float = 0.9
 
@@ -163,7 +190,7 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn):
 
 class GoalReward2Rooms(MazeTask):
     REWARD_THRESHOLD: float = 0.9
-    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
+    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
 
     def __init__(self, scale: float) -> None:
         super().__init__(scale)
@@ -201,7 +228,7 @@ class SubGoal2Rooms(GoalReward2Rooms):
 
 class GoalReward4Rooms(MazeTask):
     REWARD_THRESHOLD: float = 0.9
-    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
+    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
 
     def __init__(self, scale: float) -> None:
         super().__init__(scale)
@@ -244,12 +271,10 @@ class SubGoal4Rooms(GoalReward4Rooms):
 
 class GoalRewardTRoom(MazeTask):
     REWARD_THRESHOLD: float = 0.9
-    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
+    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
 
     def __init__(
-        self,
-        scale: float,
-        goals: List[Tuple[float, float]] = [(2.0, -3.0)],
+        self, scale: float, goals: List[Tuple[float, float]] = [(2.0, -3.0)],
     ) -> None:
         super().__init__(scale)
         self.goals = []
@@ -281,6 +306,7 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
 
 class TaskRegistry:
     REGISTRY: Dict[str, List[Type[MazeTask]]] = {
+        "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
         "UMaze": [DistRewardUMaze, GoalRewardUMaze],
         "Push": [DistRewardPush, GoalRewardPush],
         "Fall": [DistRewardFall, GoalRewardFall],
diff --git a/mujoco_maze/swimmer.py b/mujoco_maze/swimmer.py
new file mode 100644
index 0000000..ef56e89
--- /dev/null
+++ b/mujoco_maze/swimmer.py
@@ -0,0 +1,73 @@
+"""
+Swimmer robot as an explorer in the maze.
+Based on `gym`_ (swimmer-v3).
+
+.. _gym: https://github.com/openai/gym
+"""
+
+from typing import Tuple
+
+import numpy as np
+
+from mujoco_maze.agent_model import AgentModel
+from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm
+
+
+class SwimmerEnv(AgentModel):
+    FILE: str = "swimmer.xml"
+    MANUAL_COLLISION: bool = False
+
+    def __init__(
+        self,
+        file_path: str = None,
+        forward_reward_weight: float = 1.0,
+        ctrl_cost_weight: float = 1e-4,
+        forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
+    ) -> None:
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+        self._forward_reward_fn = forward_reward_fn
+        super().__init__(file_path, 4)
+
+    def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
+        xy_pos_after = self.sim.data.qpos[:2].copy()
+        xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
+        return self._forward_reward_fn(xy_velocity)
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
+        xy_pos_before = self.sim.data.qpos[:2].copy()
+        self.do_simulation(action, self.frame_skip)
+
+        forward_reward = self._forward_reward(xy_pos_before)
+        ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return (
+            self._get_obs(),
+            self._forward_reward_weight * forward_reward - ctrl_cost,
+            False,
+            dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
+        )
+
+    def _get_obs(self) -> np.ndarray:
+        position = self.sim.data.qpos.flat.copy()
+        velocity = self.sim.data.qvel.flat.copy()
+        observation = np.concatenate([position, velocity]).ravel()
+        return observation
+
+    def reset_model(self) -> np.ndarray:
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=-0.1, high=0.1, size=self.model.nq,
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.1, high=0.1, size=self.model.nv,
+        )
+
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def set_xy(self, xy: np.ndarray) -> None:
+        qpos = self.sim.data.qpos.copy()
+        qpos[:2] = xy
+        self.set_state(qpos, self.sim.data.qvel)
+
+    def get_xy(self) -> np.ndarray:
+        return np.copy(self.sim.data.qpos[:2])
diff --git a/tests/test_envs.py b/tests/test_envs.py
index 6e925a1..1cf331a 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -26,6 +26,19 @@ def test_point_maze(maze_id):
             assert s.shape == (7,)
 
 
+@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
+def test_swimmer_maze(maze_id):
+    if "Fall" in maze_id or "Push" in maze_id:
+        return
+    for i in range(2):
+        env = gym.make(f"Swimmer{maze_id}-v{i}")
+        s0 = env.reset()
+        s, _, _, _ = env.step(env.action_space.sample())
+        if not env.unwrapped._top_down_view:
+            assert s0.shape == (11,)
+            assert s.shape == (11,)
+
+
 @pytest.mark.parametrize("v", [0, 1])
 def test_maze_args(v):
     env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goals": [(-2.0, -3.0)]})