From 720f535682ce77ce72ca284969e7979b4dbd5c48 Mon Sep 17 00:00:00 2001 From: kngwyu Date: Sat, 26 Sep 2020 18:37:20 +0900 Subject: [PATCH] Block Maze --- README.md | 4 ++ mujoco_maze/__init__.py | 21 ++++++- mujoco_maze/assets/swimmer.xml | 2 +- mujoco_maze/maze_env.py | 9 +-- mujoco_maze/maze_task.py | 105 +++++++++++++++++++++++---------- tests/test_envs.py | 39 ++++++++++-- 6 files changed, 135 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index ef097cc..fbae456 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,10 @@ Thankfully, this project is based on the code from [rllab] and [tensorflow/mode - PointFall-v0/AntFall-v0 (Distance-based Reward) - PointFall-v1/AntFall-v1 (Goal-based Reward) +## Caveats +This project has some other features (e.g., block maze and other +robots) but they are work in progress. + ## License This project is licensed under Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE) or http://www.apache.org/licenses/LICENSE-2.0). diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py index 652f7ee..7eb34c5 100644 --- a/mujoco_maze/__init__.py +++ b/mujoco_maze/__init__.py @@ -11,6 +11,7 @@ import gym from mujoco_maze.ant import AntEnv from mujoco_maze.maze_task import TaskRegistry from mujoco_maze.point import PointEnv +from mujoco_maze.reacher import ReacherEnv from mujoco_maze.swimmer import SwimmerEnv for maze_id in TaskRegistry.keys(): @@ -41,10 +42,28 @@ for maze_id in TaskRegistry.keys(): max_episode_steps=1000, reward_threshold=task_cls.REWARD_THRESHOLD, ) + skip_swimmer = False + for inhibited in ["Fall", "Push", "Block"]: + if inhibited in maze_id: + skip_swimmer = True - if "Push" in maze_id or "Fall" in maze_id: + if skip_swimmer: continue + # Reacher + gym.envs.register( + id=f"Reacher{maze_id}-v{i}", + entry_point="mujoco_maze.maze_env:MazeEnv", + kwargs=dict( + model_cls=ReacherEnv, + maze_task=task_cls, + maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer, + inner_reward_scaling=task_cls.INNER_REWARD_SCALING, + ), + max_episode_steps=1000, + reward_threshold=task_cls.REWARD_THRESHOLD, + ) + # Swimmer gym.envs.register( id=f"Swimmer{maze_id}-v{i}", diff --git a/mujoco_maze/assets/swimmer.xml b/mujoco_maze/assets/swimmer.xml index 1a50dfc..b743d85 100644 --- a/mujoco_maze/assets/swimmer.xml +++ b/mujoco_maze/assets/swimmer.xml @@ -13,7 +13,7 @@ - + diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py index 8c0febd..248a457 100644 --- a/mujoco_maze/maze_env.py +++ b/mujoco_maze/maze_env.py @@ -145,11 +145,8 @@ class MazeEnv(gym.Env): spinning = struct.can_spin() shrink = 0.1 if spinning else 0.99 if falling else 1.0 height_shrink = 0.1 if spinning else 1.0 - x = ( - j * size_scaling - torso_x + 0.25 * size_scaling - if spinning - else 0.0 - ) + x_offset = 0.25 * size_scaling if spinning else 0.0 + x = j * size_scaling - torso_x + x_offset y = i * size_scaling - torso_y h = height / 2 * size_scaling * height_shrink size = 0.5 * size_scaling * shrink @@ -462,5 +459,5 @@ class MazeEnv(gym.Env): info["position"] = self.wrapped_env.get_xy() return next_obs, inner_reward + outer_reward, done, info - def close(self): + def close(self) -> None: self.wrapped_env.close() diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index b2be5dc..d1a35ad 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -2,7 +2,7 @@ """ from abc import ABC, abstractmethod -from typing import Dict, List, NamedTuple, Tuple, Type +from typing import Dict, List, NamedTuple, Optional, Tuple, Type import numpy as np @@ -51,6 +51,7 @@ class Scaling(NamedTuple): class MazeTask(ABC): REWARD_THRESHOLD: float + PENALTY: Optional[float] = None MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0) INNER_REWARD_SCALING: float = 0.01 TOP_DOWN_VIEW: bool = False @@ -89,41 +90,16 @@ class DistRewardMixIn: return -self.goals[0].euc_dist(obs) / self.scale -class GoalRewardSimpleRoom(MazeTask): - """ Very easy task. For testing. - """ - REWARD_THRESHOLD: float = 0.9 - - def __init__(self, scale: float) -> None: - super().__init__(scale) - self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))] - - def reward(self, obs: np.ndarray) -> float: - return 1.0 if self.termination(obs) else -0.0001 - - @staticmethod - def create_maze() -> List[List[MazeCell]]: - E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT - return [ - [B, B, B, B, B], - [B, R, E, E, B], - [B, B, B, B, B], - ] - - -class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn): - pass - - class GoalRewardUMaze(MazeTask): REWARD_THRESHOLD: float = 0.9 + PENALTY: float = -0.0001 def __init__(self, scale: float) -> None: super().__init__(scale) self.goals = [MazeGoal(np.array([0.0, 2.0 * scale]))] def reward(self, obs: np.ndarray) -> float: - return 1.0 if self.termination(obs) else -0.0001 + return 1.0 if self.termination(obs) else self.PENALTY @staticmethod def create_maze() -> List[List[MazeCell]]: @@ -141,6 +117,25 @@ class DistRewardUMaze(GoalRewardUMaze, DistRewardMixIn): pass +class GoalRewardSimpleRoom(GoalRewardUMaze): + def __init__(self, scale: float) -> None: + super().__init__(scale) + self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))] + + @staticmethod + def create_maze() -> List[List[MazeCell]]: + E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT + return [ + [B, B, B, B, B], + [B, R, E, E, B], + [B, B, B, B, B], + ] + + +class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn): + pass + + class GoalRewardPush(GoalRewardUMaze): TOP_DOWN_VIEW = True @@ -188,8 +183,29 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn): pass +class GoalRewardFall(GoalRewardUMaze): + TOP_DOWN_VIEW = True + + def __init__(self, scale: float) -> None: + super().__init__(scale) + self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))] + + @staticmethod + def create_maze() -> List[List[MazeCell]]: + E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT + return [ + [B, B, B, B], + [B, R, E, B], + [B, E, MazeCell.YZ, B], + [B, C, C, B], + [B, E, E, B], + [B, B, B, B], + ] + + class GoalReward2Rooms(MazeTask): REWARD_THRESHOLD: float = 0.9 + PENALTY: float = -0.0001 MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) def __init__(self, scale: float) -> None: @@ -200,7 +216,7 @@ class GoalReward2Rooms(MazeTask): for goal in self.goals: if goal.neighbor(obs): return goal.reward_scale - return -0.0001 + return self.PENALTY @staticmethod def create_maze() -> List[List[MazeCell]]: @@ -228,6 +244,7 @@ class SubGoal2Rooms(GoalReward2Rooms): class GoalReward4Rooms(MazeTask): REWARD_THRESHOLD: float = 0.9 + PENALTY: float = -0.0001 MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) def __init__(self, scale: float) -> None: @@ -238,7 +255,7 @@ class GoalReward4Rooms(MazeTask): for goal in self.goals: if goal.neighbor(obs): return goal.reward_scale - return -0.0001 + return self.PENALTY @staticmethod def create_maze() -> List[List[MazeCell]]: @@ -271,6 +288,7 @@ class SubGoal4Rooms(GoalReward4Rooms): class GoalRewardTRoom(MazeTask): REWARD_THRESHOLD: float = 0.9 + PENALTY: float = -0.0001 MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) def __init__( @@ -285,7 +303,7 @@ class GoalRewardTRoom(MazeTask): for goal in self.goals: if goal.neighbor(obs): return goal.reward_scale - return -0.0001 + return self.PENALTY @staticmethod def create_maze() -> List[List[MazeCell]]: @@ -304,6 +322,30 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn): pass +class GoalRewardBlockMaze(GoalRewardUMaze): + OBSERVE_BLOCKS: bool = True + + def __init__(self, scale: float) -> None: + super().__init__(scale) + self.goals = [MazeGoal(np.array([0.0, 3.0 * scale]))] + + @staticmethod + def create_maze() -> List[List[MazeCell]]: + E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY + return [ + [B, B, B, B, B], + [B, R, E, E, B], + [B, B, B, M, B], + [B, E, E, E, B], + [B, E, E, E, B], + [B, B, B, B, B], + ] + + +class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn): + pass + + class TaskRegistry: REGISTRY: Dict[str, List[Type[MazeTask]]] = { "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom], @@ -313,6 +355,7 @@ class TaskRegistry: "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms], "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], "TRoom": [DistRewardTRoom, GoalRewardTRoom], + "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], } @staticmethod diff --git a/tests/test_envs.py b/tests/test_envs.py index 1cf331a..79866a0 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -10,7 +10,7 @@ def test_ant_maze(maze_id): env = gym.make(f"Ant{maze_id}-v{i}") s0 = env.reset() s, _, _, _ = env.step(env.action_space.sample()) - if not env.unwrapped._top_down_view: + if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: assert s0.shape == (30,) assert s.shape == (30,) @@ -20,21 +20,41 @@ def test_point_maze(maze_id): for i in range(2): env = gym.make(f"Point{maze_id}-v{i}") s0 = env.reset() - s, _, _, _ = env.step(env.action_space.sample()) - if not env.unwrapped._top_down_view: + s, r, _, _ = env.step(env.action_space.sample()) + if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: assert s0.shape == (7,) assert s.shape == (7,) + if i == 0: + assert r != 0.0 + else: + assert r == env.unwrapped._task.PENALTY + assert r < 0.0 + + +@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) +def test_reacher_maze(maze_id): + for inhibited in ["Fall", "Push", "Block"]: + if inhibited in maze_id: + return + for i in range(2): + env = gym.make(f"Reacher{maze_id}-v{i}") + s0 = env.reset() + s, _, _, _ = env.step(env.action_space.sample()) + if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: + assert s0.shape == (9,) + assert s.shape == (9,) @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) def test_swimmer_maze(maze_id): - if "Fall" in maze_id or "Push" in maze_id: - return + for inhibited in ["Fall", "Push", "Block"]: + if inhibited in maze_id: + return for i in range(2): env = gym.make(f"Swimmer{maze_id}-v{i}") s0 = env.reset() s, _, _, _ = env.step(env.action_space.sample()) - if not env.unwrapped._top_down_view: + if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: assert s0.shape == (11,) assert s.shape == (11,) @@ -45,3 +65,10 @@ def test_maze_args(v): assert env.reset().shape == (7,) s, _, _, _ = env.step(env.action_space.sample()) assert s.shape == (7,) + + +def test_getting_movable(v): + env = gym.make("PointBlockMaze-v1") + assert env.reset().shape == (7,) + s, _, _, _ = env.step(env.action_space.sample()) + assert s.shape == (7,)