Block Maze
This commit is contained in:
		
							parent
							
								
									1c4152654b
								
							
						
					
					
						commit
						720f535682
					
				| @ -32,6 +32,10 @@ Thankfully, this project is based on the code from  [rllab] and [tensorflow/mode | |||||||
|   - PointFall-v0/AntFall-v0 (Distance-based Reward) |   - PointFall-v0/AntFall-v0 (Distance-based Reward) | ||||||
|   - PointFall-v1/AntFall-v1 (Goal-based Reward) |   - PointFall-v1/AntFall-v1 (Goal-based Reward) | ||||||
| 
 | 
 | ||||||
|  | ## Caveats | ||||||
|  | This project has some other features (e.g., block maze and other | ||||||
|  | robots) but they are work in progress. | ||||||
|  | 
 | ||||||
| ## License | ## License | ||||||
| This project is licensed under Apache License, Version 2.0 | This project is licensed under Apache License, Version 2.0 | ||||||
| ([LICENSE-APACHE](LICENSE) or http://www.apache.org/licenses/LICENSE-2.0). | ([LICENSE-APACHE](LICENSE) or http://www.apache.org/licenses/LICENSE-2.0). | ||||||
|  | |||||||
| @ -11,6 +11,7 @@ import gym | |||||||
| from mujoco_maze.ant import AntEnv | from mujoco_maze.ant import AntEnv | ||||||
| from mujoco_maze.maze_task import TaskRegistry | from mujoco_maze.maze_task import TaskRegistry | ||||||
| from mujoco_maze.point import PointEnv | from mujoco_maze.point import PointEnv | ||||||
|  | from mujoco_maze.reacher import ReacherEnv | ||||||
| from mujoco_maze.swimmer import SwimmerEnv | from mujoco_maze.swimmer import SwimmerEnv | ||||||
| 
 | 
 | ||||||
| for maze_id in TaskRegistry.keys(): | for maze_id in TaskRegistry.keys(): | ||||||
| @ -41,10 +42,28 @@ for maze_id in TaskRegistry.keys(): | |||||||
|             max_episode_steps=1000, |             max_episode_steps=1000, | ||||||
|             reward_threshold=task_cls.REWARD_THRESHOLD, |             reward_threshold=task_cls.REWARD_THRESHOLD, | ||||||
|         ) |         ) | ||||||
|  |         skip_swimmer = False | ||||||
|  |         for inhibited in ["Fall", "Push", "Block"]: | ||||||
|  |             if inhibited in maze_id: | ||||||
|  |                 skip_swimmer = True | ||||||
| 
 | 
 | ||||||
|         if "Push" in maze_id or "Fall" in maze_id: |         if skip_swimmer: | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|  |         # Reacher | ||||||
|  |         gym.envs.register( | ||||||
|  |             id=f"Reacher{maze_id}-v{i}", | ||||||
|  |             entry_point="mujoco_maze.maze_env:MazeEnv", | ||||||
|  |             kwargs=dict( | ||||||
|  |                 model_cls=ReacherEnv, | ||||||
|  |                 maze_task=task_cls, | ||||||
|  |                 maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer, | ||||||
|  |                 inner_reward_scaling=task_cls.INNER_REWARD_SCALING, | ||||||
|  |             ), | ||||||
|  |             max_episode_steps=1000, | ||||||
|  |             reward_threshold=task_cls.REWARD_THRESHOLD, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|         # Swimmer |         # Swimmer | ||||||
|         gym.envs.register( |         gym.envs.register( | ||||||
|             id=f"Swimmer{maze_id}-v{i}", |             id=f"Swimmer{maze_id}-v{i}", | ||||||
|  | |||||||
| @ -13,7 +13,7 @@ | |||||||
|     <material name='geom' texture="texgeom" texuniform="true" /> |     <material name='geom' texture="texgeom" texuniform="true" /> | ||||||
|   </asset> |   </asset> | ||||||
|   <worldbody> |   <worldbody> | ||||||
|     <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" /> |     <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0s 1.3" specular=".1 .1 .1" /> | ||||||
|     <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" /> |     <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" /> | ||||||
|     <!--  ================= SWIMMER ================= /--> |     <!--  ================= SWIMMER ================= /--> | ||||||
|     <body name="torso" pos="0 0 0"> |     <body name="torso" pos="0 0 0"> | ||||||
|  | |||||||
| @ -145,11 +145,8 @@ class MazeEnv(gym.Env): | |||||||
|                     spinning = struct.can_spin() |                     spinning = struct.can_spin() | ||||||
|                     shrink = 0.1 if spinning else 0.99 if falling else 1.0 |                     shrink = 0.1 if spinning else 0.99 if falling else 1.0 | ||||||
|                     height_shrink = 0.1 if spinning else 1.0 |                     height_shrink = 0.1 if spinning else 1.0 | ||||||
|                     x = ( |                     x_offset = 0.25 * size_scaling if spinning else 0.0 | ||||||
|                         j * size_scaling - torso_x + 0.25 * size_scaling |                     x = j * size_scaling - torso_x + x_offset | ||||||
|                         if spinning |  | ||||||
|                         else 0.0 |  | ||||||
|                     ) |  | ||||||
|                     y = i * size_scaling - torso_y |                     y = i * size_scaling - torso_y | ||||||
|                     h = height / 2 * size_scaling * height_shrink |                     h = height / 2 * size_scaling * height_shrink | ||||||
|                     size = 0.5 * size_scaling * shrink |                     size = 0.5 * size_scaling * shrink | ||||||
| @ -462,5 +459,5 @@ class MazeEnv(gym.Env): | |||||||
|         info["position"] = self.wrapped_env.get_xy() |         info["position"] = self.wrapped_env.get_xy() | ||||||
|         return next_obs, inner_reward + outer_reward, done, info |         return next_obs, inner_reward + outer_reward, done, info | ||||||
| 
 | 
 | ||||||
|     def close(self): |     def close(self) -> None: | ||||||
|         self.wrapped_env.close() |         self.wrapped_env.close() | ||||||
|  | |||||||
| @ -2,7 +2,7 @@ | |||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from abc import ABC, abstractmethod | from abc import ABC, abstractmethod | ||||||
| from typing import Dict, List, NamedTuple, Tuple, Type | from typing import Dict, List, NamedTuple, Optional, Tuple, Type | ||||||
| 
 | 
 | ||||||
| import numpy as np | import numpy as np | ||||||
| 
 | 
 | ||||||
| @ -51,6 +51,7 @@ class Scaling(NamedTuple): | |||||||
| 
 | 
 | ||||||
| class MazeTask(ABC): | class MazeTask(ABC): | ||||||
|     REWARD_THRESHOLD: float |     REWARD_THRESHOLD: float | ||||||
|  |     PENALTY: Optional[float] = None | ||||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0) |     MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0) | ||||||
|     INNER_REWARD_SCALING: float = 0.01 |     INNER_REWARD_SCALING: float = 0.01 | ||||||
|     TOP_DOWN_VIEW: bool = False |     TOP_DOWN_VIEW: bool = False | ||||||
| @ -89,41 +90,16 @@ class DistRewardMixIn: | |||||||
|         return -self.goals[0].euc_dist(obs) / self.scale |         return -self.goals[0].euc_dist(obs) / self.scale | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class GoalRewardSimpleRoom(MazeTask): |  | ||||||
|     """ Very easy task. For testing. |  | ||||||
|     """ |  | ||||||
|     REWARD_THRESHOLD: float = 0.9 |  | ||||||
| 
 |  | ||||||
|     def __init__(self, scale: float) -> None: |  | ||||||
|         super().__init__(scale) |  | ||||||
|         self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))] |  | ||||||
| 
 |  | ||||||
|     def reward(self, obs: np.ndarray) -> float: |  | ||||||
|         return 1.0 if self.termination(obs) else -0.0001 |  | ||||||
| 
 |  | ||||||
|     @staticmethod |  | ||||||
|     def create_maze() -> List[List[MazeCell]]: |  | ||||||
|         E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT |  | ||||||
|         return [ |  | ||||||
|             [B, B, B, B, B], |  | ||||||
|             [B, R, E, E, B], |  | ||||||
|             [B, B, B, B, B], |  | ||||||
|         ] |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn): |  | ||||||
|     pass |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class GoalRewardUMaze(MazeTask): | class GoalRewardUMaze(MazeTask): | ||||||
|     REWARD_THRESHOLD: float = 0.9 |     REWARD_THRESHOLD: float = 0.9 | ||||||
|  |     PENALTY: float = -0.0001 | ||||||
| 
 | 
 | ||||||
|     def __init__(self, scale: float) -> None: |     def __init__(self, scale: float) -> None: | ||||||
|         super().__init__(scale) |         super().__init__(scale) | ||||||
|         self.goals = [MazeGoal(np.array([0.0, 2.0 * scale]))] |         self.goals = [MazeGoal(np.array([0.0, 2.0 * scale]))] | ||||||
| 
 | 
 | ||||||
|     def reward(self, obs: np.ndarray) -> float: |     def reward(self, obs: np.ndarray) -> float: | ||||||
|         return 1.0 if self.termination(obs) else -0.0001 |         return 1.0 if self.termination(obs) else self.PENALTY | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def create_maze() -> List[List[MazeCell]]: |     def create_maze() -> List[List[MazeCell]]: | ||||||
| @ -141,6 +117,25 @@ class DistRewardUMaze(GoalRewardUMaze, DistRewardMixIn): | |||||||
|     pass |     pass | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class GoalRewardSimpleRoom(GoalRewardUMaze): | ||||||
|  |     def __init__(self, scale: float) -> None: | ||||||
|  |         super().__init__(scale) | ||||||
|  |         self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))] | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def create_maze() -> List[List[MazeCell]]: | ||||||
|  |         E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT | ||||||
|  |         return [ | ||||||
|  |             [B, B, B, B, B], | ||||||
|  |             [B, R, E, E, B], | ||||||
|  |             [B, B, B, B, B], | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class GoalRewardPush(GoalRewardUMaze): | class GoalRewardPush(GoalRewardUMaze): | ||||||
|     TOP_DOWN_VIEW = True |     TOP_DOWN_VIEW = True | ||||||
| 
 | 
 | ||||||
| @ -188,8 +183,29 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn): | |||||||
|     pass |     pass | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class GoalRewardFall(GoalRewardUMaze): | ||||||
|  |     TOP_DOWN_VIEW = True | ||||||
|  | 
 | ||||||
|  |     def __init__(self, scale: float) -> None: | ||||||
|  |         super().__init__(scale) | ||||||
|  |         self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))] | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def create_maze() -> List[List[MazeCell]]: | ||||||
|  |         E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT | ||||||
|  |         return [ | ||||||
|  |             [B, B, B, B], | ||||||
|  |             [B, R, E, B], | ||||||
|  |             [B, E, MazeCell.YZ, B], | ||||||
|  |             [B, C, C, B], | ||||||
|  |             [B, E, E, B], | ||||||
|  |             [B, B, B, B], | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class GoalReward2Rooms(MazeTask): | class GoalReward2Rooms(MazeTask): | ||||||
|     REWARD_THRESHOLD: float = 0.9 |     REWARD_THRESHOLD: float = 0.9 | ||||||
|  |     PENALTY: float = -0.0001 | ||||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) |     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) | ||||||
| 
 | 
 | ||||||
|     def __init__(self, scale: float) -> None: |     def __init__(self, scale: float) -> None: | ||||||
| @ -200,7 +216,7 @@ class GoalReward2Rooms(MazeTask): | |||||||
|         for goal in self.goals: |         for goal in self.goals: | ||||||
|             if goal.neighbor(obs): |             if goal.neighbor(obs): | ||||||
|                 return goal.reward_scale |                 return goal.reward_scale | ||||||
|         return -0.0001 |         return self.PENALTY | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def create_maze() -> List[List[MazeCell]]: |     def create_maze() -> List[List[MazeCell]]: | ||||||
| @ -228,6 +244,7 @@ class SubGoal2Rooms(GoalReward2Rooms): | |||||||
| 
 | 
 | ||||||
| class GoalReward4Rooms(MazeTask): | class GoalReward4Rooms(MazeTask): | ||||||
|     REWARD_THRESHOLD: float = 0.9 |     REWARD_THRESHOLD: float = 0.9 | ||||||
|  |     PENALTY: float = -0.0001 | ||||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) |     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) | ||||||
| 
 | 
 | ||||||
|     def __init__(self, scale: float) -> None: |     def __init__(self, scale: float) -> None: | ||||||
| @ -238,7 +255,7 @@ class GoalReward4Rooms(MazeTask): | |||||||
|         for goal in self.goals: |         for goal in self.goals: | ||||||
|             if goal.neighbor(obs): |             if goal.neighbor(obs): | ||||||
|                 return goal.reward_scale |                 return goal.reward_scale | ||||||
|         return -0.0001 |         return self.PENALTY | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def create_maze() -> List[List[MazeCell]]: |     def create_maze() -> List[List[MazeCell]]: | ||||||
| @ -271,6 +288,7 @@ class SubGoal4Rooms(GoalReward4Rooms): | |||||||
| 
 | 
 | ||||||
| class GoalRewardTRoom(MazeTask): | class GoalRewardTRoom(MazeTask): | ||||||
|     REWARD_THRESHOLD: float = 0.9 |     REWARD_THRESHOLD: float = 0.9 | ||||||
|  |     PENALTY: float = -0.0001 | ||||||
|     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) |     MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) | ||||||
| 
 | 
 | ||||||
|     def __init__( |     def __init__( | ||||||
| @ -285,7 +303,7 @@ class GoalRewardTRoom(MazeTask): | |||||||
|         for goal in self.goals: |         for goal in self.goals: | ||||||
|             if goal.neighbor(obs): |             if goal.neighbor(obs): | ||||||
|                 return goal.reward_scale |                 return goal.reward_scale | ||||||
|         return -0.0001 |         return self.PENALTY | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def create_maze() -> List[List[MazeCell]]: |     def create_maze() -> List[List[MazeCell]]: | ||||||
| @ -304,6 +322,30 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn): | |||||||
|     pass |     pass | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class GoalRewardBlockMaze(GoalRewardUMaze): | ||||||
|  |     OBSERVE_BLOCKS: bool = True | ||||||
|  | 
 | ||||||
|  |     def __init__(self, scale: float) -> None: | ||||||
|  |         super().__init__(scale) | ||||||
|  |         self.goals = [MazeGoal(np.array([0.0, 3.0 * scale]))] | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def create_maze() -> List[List[MazeCell]]: | ||||||
|  |         E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY | ||||||
|  |         return [ | ||||||
|  |             [B, B, B, B, B], | ||||||
|  |             [B, R, E, E, B], | ||||||
|  |             [B, B, B, M, B], | ||||||
|  |             [B, E, E, E, B], | ||||||
|  |             [B, E, E, E, B], | ||||||
|  |             [B, B, B, B, B], | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn): | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class TaskRegistry: | class TaskRegistry: | ||||||
|     REGISTRY: Dict[str, List[Type[MazeTask]]] = { |     REGISTRY: Dict[str, List[Type[MazeTask]]] = { | ||||||
|         "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom], |         "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom], | ||||||
| @ -313,6 +355,7 @@ class TaskRegistry: | |||||||
|         "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms], |         "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms], | ||||||
|         "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], |         "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], | ||||||
|         "TRoom": [DistRewardTRoom, GoalRewardTRoom], |         "TRoom": [DistRewardTRoom, GoalRewardTRoom], | ||||||
|  |         "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|  | |||||||
| @ -10,7 +10,7 @@ def test_ant_maze(maze_id): | |||||||
|         env = gym.make(f"Ant{maze_id}-v{i}") |         env = gym.make(f"Ant{maze_id}-v{i}") | ||||||
|         s0 = env.reset() |         s0 = env.reset() | ||||||
|         s, _, _, _ = env.step(env.action_space.sample()) |         s, _, _, _ = env.step(env.action_space.sample()) | ||||||
|         if not env.unwrapped._top_down_view: |         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||||
|             assert s0.shape == (30,) |             assert s0.shape == (30,) | ||||||
|             assert s.shape == (30,) |             assert s.shape == (30,) | ||||||
| 
 | 
 | ||||||
| @ -20,21 +20,41 @@ def test_point_maze(maze_id): | |||||||
|     for i in range(2): |     for i in range(2): | ||||||
|         env = gym.make(f"Point{maze_id}-v{i}") |         env = gym.make(f"Point{maze_id}-v{i}") | ||||||
|         s0 = env.reset() |         s0 = env.reset() | ||||||
|         s, _, _, _ = env.step(env.action_space.sample()) |         s, r, _, _ = env.step(env.action_space.sample()) | ||||||
|         if not env.unwrapped._top_down_view: |         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||||
|             assert s0.shape == (7,) |             assert s0.shape == (7,) | ||||||
|             assert s.shape == (7,) |             assert s.shape == (7,) | ||||||
|  |         if i == 0: | ||||||
|  |             assert r != 0.0 | ||||||
|  |         else: | ||||||
|  |             assert r == env.unwrapped._task.PENALTY | ||||||
|  |             assert r < 0.0 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) | ||||||
|  | def test_reacher_maze(maze_id): | ||||||
|  |     for inhibited in ["Fall", "Push", "Block"]: | ||||||
|  |         if inhibited in maze_id: | ||||||
|  |             return | ||||||
|  |     for i in range(2): | ||||||
|  |         env = gym.make(f"Reacher{maze_id}-v{i}") | ||||||
|  |         s0 = env.reset() | ||||||
|  |         s, _, _, _ = env.step(env.action_space.sample()) | ||||||
|  |         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||||
|  |             assert s0.shape == (9,) | ||||||
|  |             assert s.shape == (9,) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) | @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) | ||||||
| def test_swimmer_maze(maze_id): | def test_swimmer_maze(maze_id): | ||||||
|     if "Fall" in maze_id or "Push" in maze_id: |     for inhibited in ["Fall", "Push", "Block"]: | ||||||
|         return |         if inhibited in maze_id: | ||||||
|  |             return | ||||||
|     for i in range(2): |     for i in range(2): | ||||||
|         env = gym.make(f"Swimmer{maze_id}-v{i}") |         env = gym.make(f"Swimmer{maze_id}-v{i}") | ||||||
|         s0 = env.reset() |         s0 = env.reset() | ||||||
|         s, _, _, _ = env.step(env.action_space.sample()) |         s, _, _, _ = env.step(env.action_space.sample()) | ||||||
|         if not env.unwrapped._top_down_view: |         if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: | ||||||
|             assert s0.shape == (11,) |             assert s0.shape == (11,) | ||||||
|             assert s.shape == (11,) |             assert s.shape == (11,) | ||||||
| 
 | 
 | ||||||
| @ -45,3 +65,10 @@ def test_maze_args(v): | |||||||
|     assert env.reset().shape == (7,) |     assert env.reset().shape == (7,) | ||||||
|     s, _, _, _ = env.step(env.action_space.sample()) |     s, _, _, _ = env.step(env.action_space.sample()) | ||||||
|     assert s.shape == (7,) |     assert s.shape == (7,) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_getting_movable(v): | ||||||
|  |     env = gym.make("PointBlockMaze-v1") | ||||||
|  |     assert env.reset().shape == (7,) | ||||||
|  |     s, _, _, _ = env.step(env.action_space.sample()) | ||||||
|  |     assert s.shape == (7,) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user