Block Maze
This commit is contained in:
		
							parent
							
								
									1c4152654b
								
							
						
					
					
						commit
						720f535682
					
				| @ -32,6 +32,10 @@ Thankfully, this project is based on the code from  [rllab] and [tensorflow/mode | ||||
|   - PointFall-v0/AntFall-v0 (Distance-based Reward) | ||||
|   - PointFall-v1/AntFall-v1 (Goal-based Reward) | ||||
| 
 | ||||
| ## Caveats | ||||
| This project has some other features (e.g., block maze and other | ||||
| robots) but they are work in progress. | ||||
| 
 | ||||
| ## License | ||||
| This project is licensed under Apache License, Version 2.0 | ||||
| ([LICENSE-APACHE](LICENSE) or http://www.apache.org/licenses/LICENSE-2.0). | ||||
|  | ||||
| @ -11,6 +11,7 @@ import gym | ||||
| from mujoco_maze.ant import AntEnv | ||||
| from mujoco_maze.maze_task import TaskRegistry | ||||
| from mujoco_maze.point import PointEnv | ||||
| from mujoco_maze.reacher import ReacherEnv | ||||
| from mujoco_maze.swimmer import SwimmerEnv | ||||
| 
 | ||||
| for maze_id in TaskRegistry.keys(): | ||||
| @ -41,10 +42,28 @@ for maze_id in TaskRegistry.keys(): | ||||
|             max_episode_steps=1000, | ||||
|             reward_threshold=task_cls.REWARD_THRESHOLD, | ||||
|         ) | ||||
|         skip_swimmer = False | ||||
|         for inhibited in ["Fall", "Push", "Block"]: | ||||
|             if inhibited in maze_id: | ||||
|                 skip_swimmer = True | ||||
| 
 | ||||
|         if "Push" in maze_id or "Fall" in maze_id: | ||||
|         if skip_swimmer: | ||||
|             continue | ||||
| 
 | ||||
|         # Reacher | ||||
|         gym.envs.register( | ||||
|             id=f"Reacher{maze_id}-v{i}", | ||||
|             entry_point="mujoco_maze.maze_env:MazeEnv", | ||||
|             kwargs=dict( | ||||
|                 model_cls=ReacherEnv, | ||||
|                 maze_task=task_cls, | ||||
|                 maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer, | ||||
|                 inner_reward_scaling=task_cls.INNER_REWARD_SCALING, | ||||
|             ), | ||||
|             max_episode_steps=1000, | ||||
|             reward_threshold=task_cls.REWARD_THRESHOLD, | ||||
|         ) | ||||
| 
 | ||||
|         # Swimmer | ||||
|         gym.envs.register( | ||||
|             id=f"Swimmer{maze_id}-v{i}", | ||||
|  | ||||
| @ -13,7 +13,7 @@ | ||||
|     <material name='geom' texture="texgeom" texuniform="true" /> | ||||
|   </asset> | ||||
|   <worldbody> | ||||
|     <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" /> | ||||
|     <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0s 1.3" specular=".1 .1 .1" /> | ||||
|     <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" /> | ||||
|     <!--  ================= SWIMMER ================= /--> | ||||
|     <body name="torso" pos="0 0 0"> | ||||
|  | ||||
| @ -145,11 +145,8 @@ class MazeEnv(gym.Env): | ||||
|                     spinning = struct.can_spin() | ||||
|                     shrink = 0.1 if spinning else 0.99 if falling else 1.0 | ||||
|                     height_shrink = 0.1 if spinning else 1.0 | ||||
|                     x = ( | ||||
|                         j * size_scaling - torso_x + 0.25 * size_scaling | ||||
|                         if spinning | ||||
|                         else 0.0 | ||||
|                     ) | ||||
|                     x_offset = 0.25 * size_scaling if spinning else 0.0 | ||||
|                     x = j * size_scaling - torso_x + x_offset | ||||
|                     y = i * size_scaling - torso_y | ||||
|                     h = height / 2 * size_scaling * height_shrink | ||||
|                     size = 0.5 * size_scaling * shrink | ||||
| @ -462,5 +459,5 @@ class MazeEnv(gym.Env): | ||||
|         info["position"] = self.wrapped_env.get_xy() | ||||
|         return next_obs, inner_reward + outer_reward, done, info | ||||
| 
 | ||||
|     def close(self): | ||||
|     def close(self) -> None: | ||||
|         self.wrapped_env.close() | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from abc import ABC, abstractmethod | ||||
| from typing import Dict, List, NamedTuple, Tuple, Type | ||||
| from typing import Dict, List, NamedTuple, Optional, Tuple, Type | ||||
| 
 | ||||
| import numpy as np | ||||
| 
 | ||||
| @ -51,6 +51,7 @@ class Scaling(NamedTuple): | ||||
| 
 | ||||
| class MazeTask(ABC): | ||||
|     REWARD_THRESHOLD: float | ||||
|     PENALTY: Optional[float] = None | ||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0) | ||||
|     INNER_REWARD_SCALING: float = 0.01 | ||||
|     TOP_DOWN_VIEW: bool = False | ||||
| @ -89,41 +90,16 @@ class DistRewardMixIn: | ||||
|         return -self.goals[0].euc_dist(obs) / self.scale | ||||
| 
 | ||||
| 
 | ||||
| class GoalRewardSimpleRoom(MazeTask): | ||||
|     """ Very easy task. For testing. | ||||
|     """ | ||||
|     REWARD_THRESHOLD: float = 0.9 | ||||
| 
 | ||||
|     def __init__(self, scale: float) -> None: | ||||
|         super().__init__(scale) | ||||
|         self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))] | ||||
| 
 | ||||
|     def reward(self, obs: np.ndarray) -> float: | ||||
|         return 1.0 if self.termination(obs) else -0.0001 | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
|         E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT | ||||
|         return [ | ||||
|             [B, B, B, B, B], | ||||
|             [B, R, E, E, B], | ||||
|             [B, B, B, B, B], | ||||
|         ] | ||||
| 
 | ||||
| 
 | ||||
| class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class GoalRewardUMaze(MazeTask): | ||||
|     REWARD_THRESHOLD: float = 0.9 | ||||
|     PENALTY: float = -0.0001 | ||||
| 
 | ||||
|     def __init__(self, scale: float) -> None: | ||||
|         super().__init__(scale) | ||||
|         self.goals = [MazeGoal(np.array([0.0, 2.0 * scale]))] | ||||
| 
 | ||||
|     def reward(self, obs: np.ndarray) -> float: | ||||
|         return 1.0 if self.termination(obs) else -0.0001 | ||||
|         return 1.0 if self.termination(obs) else self.PENALTY | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
| @ -141,6 +117,25 @@ class DistRewardUMaze(GoalRewardUMaze, DistRewardMixIn): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class GoalRewardSimpleRoom(GoalRewardUMaze): | ||||
|     def __init__(self, scale: float) -> None: | ||||
|         super().__init__(scale) | ||||
|         self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
|         E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT | ||||
|         return [ | ||||
|             [B, B, B, B, B], | ||||
|             [B, R, E, E, B], | ||||
|             [B, B, B, B, B], | ||||
|         ] | ||||
| 
 | ||||
| 
 | ||||
| class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class GoalRewardPush(GoalRewardUMaze): | ||||
|     TOP_DOWN_VIEW = True | ||||
| 
 | ||||
| @ -188,8 +183,29 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class GoalRewardFall(GoalRewardUMaze): | ||||
|     TOP_DOWN_VIEW = True | ||||
| 
 | ||||
|     def __init__(self, scale: float) -> None: | ||||
|         super().__init__(scale) | ||||
|         self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
|         E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT | ||||
|         return [ | ||||
|             [B, B, B, B], | ||||
|             [B, R, E, B], | ||||
|             [B, E, MazeCell.YZ, B], | ||||
|             [B, C, C, B], | ||||
|             [B, E, E, B], | ||||
|             [B, B, B, B], | ||||
|         ] | ||||
| 
 | ||||
| 
 | ||||
| class GoalReward2Rooms(MazeTask): | ||||
|     REWARD_THRESHOLD: float = 0.9 | ||||
|     PENALTY: float = -0.0001 | ||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) | ||||
| 
 | ||||
|     def __init__(self, scale: float) -> None: | ||||
| @ -200,7 +216,7 @@ class GoalReward2Rooms(MazeTask): | ||||
|         for goal in self.goals: | ||||
|             if goal.neighbor(obs): | ||||
|                 return goal.reward_scale | ||||
|         return -0.0001 | ||||
|         return self.PENALTY | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
| @ -228,6 +244,7 @@ class SubGoal2Rooms(GoalReward2Rooms): | ||||
| 
 | ||||
| class GoalReward4Rooms(MazeTask): | ||||
|     REWARD_THRESHOLD: float = 0.9 | ||||
|     PENALTY: float = -0.0001 | ||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) | ||||
| 
 | ||||
|     def __init__(self, scale: float) -> None: | ||||
| @ -238,7 +255,7 @@ class GoalReward4Rooms(MazeTask): | ||||
|         for goal in self.goals: | ||||
|             if goal.neighbor(obs): | ||||
|                 return goal.reward_scale | ||||
|         return -0.0001 | ||||
|         return self.PENALTY | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
| @ -271,6 +288,7 @@ class SubGoal4Rooms(GoalReward4Rooms): | ||||
| 
 | ||||
| class GoalRewardTRoom(MazeTask): | ||||
|     REWARD_THRESHOLD: float = 0.9 | ||||
|     PENALTY: float = -0.0001 | ||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) | ||||
| 
 | ||||
|     def __init__( | ||||
| @ -285,7 +303,7 @@ class GoalRewardTRoom(MazeTask): | ||||
|         for goal in self.goals: | ||||
|             if goal.neighbor(obs): | ||||
|                 return goal.reward_scale | ||||
|         return -0.0001 | ||||
|         return self.PENALTY | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
| @ -304,6 +322,30 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class GoalRewardBlockMaze(GoalRewardUMaze): | ||||
|     OBSERVE_BLOCKS: bool = True | ||||
| 
 | ||||
|     def __init__(self, scale: float) -> None: | ||||
|         super().__init__(scale) | ||||
|         self.goals = [MazeGoal(np.array([0.0, 3.0 * scale]))] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def create_maze() -> List[List[MazeCell]]: | ||||
|         E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY | ||||
|         return [ | ||||
|             [B, B, B, B, B], | ||||
|             [B, R, E, E, B], | ||||
|             [B, B, B, M, B], | ||||
|             [B, E, E, E, B], | ||||
|             [B, E, E, E, B], | ||||
|             [B, B, B, B, B], | ||||
|         ] | ||||
| 
 | ||||
| 
 | ||||
| class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class TaskRegistry: | ||||
|     REGISTRY: Dict[str, List[Type[MazeTask]]] = { | ||||
|         "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom], | ||||
| @ -313,6 +355,7 @@ class TaskRegistry: | ||||
|         "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms], | ||||
|         "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], | ||||
|         "TRoom": [DistRewardTRoom, GoalRewardTRoom], | ||||
|         "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], | ||||
|     } | ||||
| 
 | ||||
|     @staticmethod | ||||
|  | ||||
| @ -10,7 +10,7 @@ def test_ant_maze(maze_id): | ||||
|         env = gym.make(f"Ant{maze_id}-v{i}") | ||||
|         s0 = env.reset() | ||||
|         s, _, _, _ = env.step(env.action_space.sample()) | ||||
|         if not env.unwrapped._top_down_view: | ||||
|         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||
|             assert s0.shape == (30,) | ||||
|             assert s.shape == (30,) | ||||
| 
 | ||||
| @ -20,21 +20,41 @@ def test_point_maze(maze_id): | ||||
|     for i in range(2): | ||||
|         env = gym.make(f"Point{maze_id}-v{i}") | ||||
|         s0 = env.reset() | ||||
|         s, _, _, _ = env.step(env.action_space.sample()) | ||||
|         if not env.unwrapped._top_down_view: | ||||
|         s, r, _, _ = env.step(env.action_space.sample()) | ||||
|         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||
|             assert s0.shape == (7,) | ||||
|             assert s.shape == (7,) | ||||
|         if i == 0: | ||||
|             assert r != 0.0 | ||||
|         else: | ||||
|             assert r == env.unwrapped._task.PENALTY | ||||
|             assert r < 0.0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) | ||||
| def test_reacher_maze(maze_id): | ||||
|     for inhibited in ["Fall", "Push", "Block"]: | ||||
|         if inhibited in maze_id: | ||||
|             return | ||||
|     for i in range(2): | ||||
|         env = gym.make(f"Reacher{maze_id}-v{i}") | ||||
|         s0 = env.reset() | ||||
|         s, _, _, _ = env.step(env.action_space.sample()) | ||||
|         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||
|             assert s0.shape == (9,) | ||||
|             assert s.shape == (9,) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) | ||||
| def test_swimmer_maze(maze_id): | ||||
|     if "Fall" in maze_id or "Push" in maze_id: | ||||
|         return | ||||
|     for inhibited in ["Fall", "Push", "Block"]: | ||||
|         if inhibited in maze_id: | ||||
|             return | ||||
|     for i in range(2): | ||||
|         env = gym.make(f"Swimmer{maze_id}-v{i}") | ||||
|         s0 = env.reset() | ||||
|         s, _, _, _ = env.step(env.action_space.sample()) | ||||
|         if not env.unwrapped._top_down_view: | ||||
|         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||
|             assert s0.shape == (11,) | ||||
|             assert s.shape == (11,) | ||||
| 
 | ||||
| @ -45,3 +65,10 @@ def test_maze_args(v): | ||||
|     assert env.reset().shape == (7,) | ||||
|     s, _, _, _ = env.step(env.action_space.sample()) | ||||
|     assert s.shape == (7,) | ||||
| 
 | ||||
| 
 | ||||
| def test_getting_movable(v): | ||||
|     env = gym.make("PointBlockMaze-v1") | ||||
|     assert env.reset().shape == (7,) | ||||
|     s, _, _, _ = env.step(env.action_space.sample()) | ||||
|     assert s.shape == (7,) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user