Introduce MazeTask for customizability

2020-06-16 13:47:40 +09:00 · 2020-06-16 13:47:40 +09:00 · d08cfe5d0e
commit d08cfe5d0e
parent c91a4bc8a7
4 changed files with 176 additions and 172 deletions
--- a/mujoco_maze/init.py
+++ b/mujoco_maze/init.py
@ -1,5 +1,8 @@
 import gym
 from mujoco_maze.maze_task import TaskRegistry
 MAZE_IDS = ["Maze", "Push", "Fall"]  # TODO: Block, BlockMaze
@ -12,36 +15,24 @@ def _get_kwargs(maze_id: str) -> tuple:
 for maze_id in MAZE_IDS:
-    gym.envs.register(
+    for i, task_cls in enumerate(TaskRegistry.REGISTRY[maze_id]):
-        id="Ant{}-v0".format(maze_id),
+        gym.envs.register(
-        entry_point="mujoco_maze.ant_maze_env:AntMazeEnv",
+            id=f"Ant{maze_id}-v{i}",
-        kwargs=dict(maze_size_scaling=8.0, **_get_kwargs(maze_id)),
+            entry_point="mujoco_maze.ant_maze_env:AntMazeEnv",
-        max_episode_steps=1000,
+            kwargs=dict(maze_task=task_cls, maze_size_scaling=8.0),
-        reward_threshold=-1000,
+            max_episode_steps=1000,
-    )
+            reward_threshold=task_cls.REWARD_THRESHOLD,
-    gym.envs.register(
+        )
        id="Ant{}-v1".format(maze_id),
        entry_point="mujoco_maze.ant_maze_env:AntMazeEnv",
        kwargs=dict(maze_size_scaling=8.0, **_get_kwargs(maze_id)),
        max_episode_steps=1000,
        reward_threshold=0.9,
    )
 for maze_id in MAZE_IDS:
-    gym.envs.register(
+    for i, task_cls in enumerate(TaskRegistry.REGISTRY[maze_id]):
-        id="Point{}-v0".format(maze_id),
+        gym.envs.register(
-        entry_point="mujoco_maze.point_maze_env:PointMazeEnv",
+            id=f"Point{maze_id}-v{i}",
-        kwargs=_get_kwargs(maze_id),
+            entry_point="mujoco_maze.point_maze_env:PointMazeEnv",
-        max_episode_steps=1000,
+            kwargs=dict(maze_task=task_cls),
-        reward_threshold=-1000,
+            max_episode_steps=1000,
-    )
+            reward_threshold=task_cls.REWARD_THRESHOLD,
-    gym.envs.register(
+        )
        id="Point{}-v1".format(maze_id),
        entry_point="mujoco_maze.point_maze_env:PointMazeEnv",
        kwargs=dict(**_get_kwargs(maze_id), dense_reward=False),
        max_episode_steps=1000,
        reward_threshold=0.9,
    )
 __version__ = "0.1.0"
--- a/mujoco_maze/maze_env.py
+++ b/mujoco_maze/maze_env.py
@ -16,17 +16,17 @@
 """Adapted from rllab maze_env.py."""
 import itertools as it
 import math
 import numpy as np
 import gym
 import os
 import tempfile
 import xml.etree.ElementTree as ET
-from typing import Callable, Type, Union
+from typing import Type
 from mujoco_maze.agent_model import AgentModel
 from mujoco_maze import maze_env_utils
 from mujoco_maze import maze_task
 # Directory that contains mujoco xml files.
 MODEL_DIR = os.path.dirname(os.path.abspath(__file__)) + "/assets"
@ -36,26 +36,23 @@ class MazeEnv(gym.Env):
    MODEL_CLASS: Type[AgentModel] = AgentModel
    MANUAL_COLLISION: bool = False
-    # For preventing the point from going through the wall
+    BLOCK_EPS: float = 0.0001
    SIZE_EPS = 0.0001
    def __init__(
        self,
-        maze_id=None,
+        maze_task: Type[maze_task.MazeTask] = maze_task.SingleGoalSparseEMaze(),
-        n_bins=0,
+        n_bins: int = 0,
-        sensor_range=3.0,
+        sensor_range: float = 3.0,
-        sensor_span=2 * math.pi,
+        sensor_span: float = 2 * np.pi,
-        observe_blocks=False,
+        observe_blocks: float = False,
-        put_spin_near_agent=False,
+        put_spin_near_agent: float = False,
-        top_down_view=False,
+        top_down_view: float = False,
        dense_reward=True,
        maze_height: float = 0.5,
        maze_size_scaling: float = 4.0,
        goal_sampler: Union[str, np.ndarray, Callable[[], np.ndarray]] = "default",
        *args,
        **kwargs,
    ) -> None:
-        self._maze_id = maze_id
+        self._task = maze_task()
        xml_path = os.path.join(MODEL_DIR, self.MODEL_CLASS.FILE)
        tree = ET.parse(xml_path)
@ -72,15 +69,11 @@ class MazeEnv(gym.Env):
        self._top_down_view = top_down_view
        self._collision_coef = 0.1
-        self._maze_structure = structure = maze_env_utils.construct_maze(
+        self._maze_structure = structure = self._task.create_maze()
            maze_id=self._maze_id
        )
        # Elevate the maze to allow for falling.
        self.elevated = any(maze_env_utils.MazeCell.CHASM in row for row in structure)
        # Are there any movable blocks?
-        self.blocks = any(
+        self.blocks = any(any(r.can_move() for r in row) for row in structure)
            any(r.can_move() for r in row) for row in structure
        )
        torso_x, torso_y = self._find_robot()
        self._init_torso_x = torso_x
@ -117,13 +110,13 @@ class MazeEnv(gym.Env):
            for j in range(len(structure[0])):
                struct = structure[i][j]
                if struct.is_robot() and self._put_spin_near_agent:
-                    struct = maze_env_utils.Move.SpinXY
+                    struct = maze_env_utils.MazeCell.SpinXY
                if self.elevated and not struct.is_chasm():
                    # Create elevated platform.
                    x = j * size_scaling - torso_x
                    y = i * size_scaling - torso_y
                    h = height / 2 * size_scaling
-                    size = 0.5 * size_scaling + self.SIZE_EPS
+                    size = 0.5 * size_scaling + self.BLOCK_EPS
                    ET.SubElement(
                        worldbody,
                        "geom",
@ -142,7 +135,7 @@ class MazeEnv(gym.Env):
                    x = j * size_scaling - torso_x
                    y = i * size_scaling - torso_y
                    h = height / 2 * size_scaling
-                    size = 0.5 * size_scaling + self.SIZE_EPS
+                    size = 0.5 * size_scaling + self.BLOCK_EPS
                    ET.SubElement(
                        worldbody,
                        "geom",
@ -172,7 +165,7 @@ class MazeEnv(gym.Env):
                    )
                    y = i * size_scaling - torso_y
                    h = height / 2 * size_scaling * height_shrink
-                    size = 0.5 * size_scaling * shrink + self.SIZE_EPS
+                    size = 0.5 * size_scaling * shrink + self.BLOCK_EPS
                    movable_body = ET.SubElement(
                        worldbody,
                        "body",
@ -257,29 +250,6 @@ class MazeEnv(gym.Env):
        tree.write(file_path)
        self.wrapped_env = self.MODEL_CLASS(*args, file_path=file_path, **kwargs)
        # Set reward function
        self._reward_fn = _reward_fn(maze_id, dense_reward)
        # Set goal sampler
        if isinstance(goal_sampler, str):
            if goal_sampler == "random":
                self._goal_sampler = lambda: np.random.uniform((-4, -4), (20, 20))
            elif goal_sampler == "default":
                default_goal = _default_goal(maze_id, size_scaling)
                self._goal_sampler = lambda: default_goal
            else:
                raise NotImplementedError(f"Unknown goal_sampler: {goal_sampler}")
        elif isinstance(goal_sampler, np.ndarray):
            self._goal_sampler = lambda: goal_sampler
        elif callable(goal_sampler):
            self._goal_sampler = goal_sampler
        else:
            raise ValueError(f"Invalid goal_sampler: {goal_sampler}")
        self.goal = self._goal_sampler()
        # Set goal function
        self._goal_fn = _goal_fn(maze_id)
    def get_ori(self):
        return self.wrapped_env.get_ori()
@ -488,7 +458,7 @@ class MazeEnv(gym.Env):
        self.t = 0
        self.wrapped_env.reset()
        # Sample a new goal
-        self.goal = self._goal_sampler()
+        self._task.sample_goals(self._maze_size_scaling)
        if len(self._init_positions) > 1:
            xy = np.random.choice(self._init_positions)
            self.wrapped_env.set_xy(xy)
@ -540,51 +510,6 @@ class MazeEnv(gym.Env):
        else:
            inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action)
        next_obs = self._get_obs()
-        outer_reward = self._reward_fn(next_obs, self.goal)
+        outer_reward = self._task.reward(next_obs)
-        done = self._goal_fn(next_obs, self.goal)
+        done = self._task.termination(next_obs)
        return next_obs, inner_reward + outer_reward, done, info
 def _goal_fn(maze_id: str) -> callable:
    if maze_id in ["Maze", "Push", "BlockMaze"]:
        return lambda obs, goal: np.linalg.norm(obs[:2] - goal) <= 0.6
    elif maze_id == "Fall":
        return lambda obs, goal: np.linalg.norm(obs[:3] - goal) <= 0.6
    else:
        raise NotImplementedError(f"Unknown maze id: {maze_id}")
 def _reward_fn(maze_id: str, dense: str) -> callable:
    if dense:
        if maze_id in ["Maze", "Push", "BlockMaze"]:
            return lambda obs, goal: -np.sum(np.square(obs[:2] - goal)) ** 0.5
        elif maze_id == "Fall":
            return lambda obs, goal: -np.sum(np.square(obs[:3] - goal)) ** 0.5
        else:
            raise NotImplementedError(f"Unknown maze id: {maze_id}")
    else:
        if maze_id in ["Maze", "Push", "BlockMaze"]:
            return (
                lambda obs, goal: 1.0
                if np.linalg.norm(obs[:2] - goal) <= 0.6
                else -0.0001
            )
        elif maze_id == "Fall":
            return (
                lambda obs, goal: 1.0
                if np.linalg.norm(obs[:3] - goal) <= 0.6
                else -0.0001
            )
        else:
            raise NotImplementedError(f"Unknown maze id: {maze_id}")
 def _default_goal(maze_id: str, scale: float) -> np.ndarray:
    if maze_id == "Maze" or maze_id == "BlockMaze":
        return np.array([0.0, 2.0 * scale])
    elif maze_id == "Push":
        return np.array([0.0, 2.375 * scale])
    elif maze_id == "Fall":
        return np.array([0.0, 3.375 * scale, 4.5])
    else:
        raise NotImplementedError(f"Unknown maze id: {maze_id}")
--- a/mujoco_maze/maze_env_utils.py
+++ b/mujoco_maze/maze_env_utils.py
@ -77,55 +77,6 @@ class MazeCell(Enum):
        return self.can_move_x() or self.can_move_y() or self.can_move_z()
 def construct_maze(maze_id="Maze"):
    E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT
    if maze_id == "Maze":
        structure = [
            [B, B, B, B, B],
            [B, R, E, E, B],
            [B, B, B, E, B],
            [B, E, E, E, B],
            [B, B, B, B, B],
        ]
    elif maze_id == "Push":
        structure = [
            [B, B, B, B, B],
            [B, E, R, B, B],
            [B, E, MazeCell.XY, E, B],
            [B, B, E, B, B],
            [B, B, B, B, B],
        ]
    elif maze_id == "Fall":
        structure = [
            [B, B, B, B],
            [B, R, E, B],
            [B, E, MazeCell.YZ, B],
            [B, C, C, B],
            [B, E, E, B],
            [B, B, B, B],
        ]
    elif maze_id == "Block":
        structure = [
            [B, B, B, B, B],
            [B, R, E, E, B],
            [B, E, E, E, B],
            [B, E, E, E, B],
            [B, B, B, B, B],
        ]
    elif maze_id == "BlockMaze":
        structure = [
            [B, B, B, B],
            [B, R, E, B],
            [B, B, E, B],
            [B, E, E, B],
            [B, B, B, B],
        ]
    else:
        raise NotImplementedError("The provided MazeId %s is not recognized" % maze_id)
    return structure
 class Collision:
    """For manual collision detection.
    """
--- a/mujoco_maze/maze_task.py
+++ b/mujoco_maze/maze_task.py
@ -0,0 +1,137 @@
 from abc import ABC, abstractmethod
 from typing import Dict, List, Type
 import numpy as np
 from mujoco_maze.maze_env_utils import MazeCell
 class MazeGoal:
    THRESHOLD: float = 0.6
    def __init__(self, goal: np.ndarray, reward_scale: float = 1.0) -> None:
        self.goal = goal
        self.goal_dim = goal.shape[0]
        self.reward_scale = reward_scale
    def neighbor(self, obs: np.ndarray) -> float:
        return np.linalg.norm(obs[: self.goal_dim] - self.goal) <= self.THRESHOLD
    def euc_dist(self, obs: np.ndarray) -> float:
        return np.sum(np.square(obs[: self.goal_dim] - self.goal)) ** 0.5
 class MazeTask(ABC):
    REWARD_THRESHOLD: float
    def __init__(self) -> None:
        self.goals = []
    @abstractmethod
    def sample_goals(self, scale: float) -> None:
        pass
    @abstractmethod
    def reward(self, obs: np.ndarray) -> float:
        pass
    @abstractmethod
    def termination(self, obs: np.ndarray) -> bool:
        pass
    @staticmethod
    @abstractmethod
    def create_maze() -> List[List[MazeCell]]:
        pass
 class SingleGoalSparseEMaze(MazeTask):
    REWARD_THRESHOLD: float = 0.9
    def sample_goals(self, scale: float) -> None:
        goal = MazeGoal(np.array([0.0, 2.0 * scale]))
        self.goals = [goal]
    def reward(self, obs: np.ndarray) -> float:
        if self.goals[0].neighbor(obs):
            return 1.0
        else:
            return -0.0001
    def termination(self, obs: np.ndarray) -> bool:
        return self.goals[0].neighbor(obs)
    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        return [
            [B, B, B, B, B],
            [B, R, E, E, B],
            [B, B, B, E, B],
            [B, E, E, E, B],
            [B, B, B, B, B],
        ]
 class SingleGoalDenseEMaze(SingleGoalSparseEMaze):
    REWARD_THRESHOLD: float = 1000.0
    def reward(self, obs: np.ndarray) -> float:
        return -self.goals[0].euc_dist(obs)
 class SingleGoalSparsePush(SingleGoalSparseEMaze):
    def sample_goals(self, scale: float) -> None:
        goal = MazeGoal(np.array([0.0, 2.375 * scale]))
        self.goals = [goal]
    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        return [
            [B, B, B, B, B],
            [B, E, R, B, B],
            [B, E, MazeCell.XY, E, B],
            [B, B, E, B, B],
            [B, B, B, B, B],
        ]
 class SingleGoalDensePush(SingleGoalSparsePush):
    REWARD_THRESHOLD: float = 1000.0
    def reward(self, obs: np.ndarray) -> float:
        return -self.goals[0].euc_dist(obs)
 class SingleGoalSparseFall(SingleGoalSparseEMaze):
    def sample_goals(self, scale: float) -> None:
        goal = MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))
        self.goals = [goal]
    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT
        return [
            [B, B, B, B],
            [B, R, E, B],
            [B, E, MazeCell.YZ, B],
            [B, C, C, B],
            [B, E, E, B],
            [B, B, B, B],
        ]
 class SingleGoalDenseFall(SingleGoalSparseFall):
    REWARD_THRESHOLD: float = 1000.0
    def reward(self, obs: np.ndarray) -> float:
        return -self.goals[0].euc_dist(obs)
 class TaskRegistry:
    REGISTRY: Dict[str, List[Type[MazeTask]]] = {
        "Maze": [SingleGoalDenseEMaze, SingleGoalSparseEMaze],
        "Push": [SingleGoalDensePush, SingleGoalSparsePush],
        "Fall": [SingleGoalDenseFall, SingleGoalSparseFall],
    }