16
									
								
								README.md
									
									
									
									
									
								
							
							
						
						@ -1,4 +1,5 @@
 | 
			
		||||
# mujoco-maze
 | 
			
		||||
[](https://github.com/psf/black)
 | 
			
		||||
 | 
			
		||||
Some maze environments for reinforcement learning(RL) using [mujoco-py] and
 | 
			
		||||
[openai gym][gym].
 | 
			
		||||
@ -22,19 +23,26 @@ Thankfully, this project is based on the code from  [rllab] and [tensorflow/mode
 | 
			
		||||
 | 
			
		||||
- PointPush/AntPush
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  - PointPush-v0/AntPush-v0 (Distance-based Reward)
 | 
			
		||||
  - PointPush-v1/AntPush-v1 (Goal-based Reward)
 | 
			
		||||
 | 
			
		||||
- PointFall/AntFall
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  - PointFall-v0/AntFall-v0 (Distance-based Reward)
 | 
			
		||||
  - PointFall-v1/AntFall-v1 (Goal-based Reward)
 | 
			
		||||
 | 
			
		||||
- PointBilliard
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  - PointBilliard-v0 (Distance-based Reward)
 | 
			
		||||
  - PointBilliard-v1 (Goal-based Reward)
 | 
			
		||||
  - PointBilliard-v2 (Multiple Goals (0.5 pt or 1.0 pt))
 | 
			
		||||
 | 
			
		||||
## Warning
 | 
			
		||||
This project has some other environments (e.g., billiard, reacher, and
 | 
			
		||||
swimmer) but if they are not on README, they are work in progress and
 | 
			
		||||
This project has some other environments (e.g., reacher, and swimmer)
 | 
			
		||||
but if they are not on README, they are work in progress and
 | 
			
		||||
not tested well.
 | 
			
		||||
 | 
			
		||||
## License
 | 
			
		||||
 | 
			
		||||
@ -16,69 +16,66 @@ from mujoco_maze.swimmer import SwimmerEnv
 | 
			
		||||
 | 
			
		||||
for maze_id in TaskRegistry.keys():
 | 
			
		||||
    for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
 | 
			
		||||
        # Point
 | 
			
		||||
        gym.envs.register(
 | 
			
		||||
            id=f"Point{maze_id}-v{i}",
 | 
			
		||||
            entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
            kwargs=dict(
 | 
			
		||||
                model_cls=PointEnv,
 | 
			
		||||
                maze_task=task_cls,
 | 
			
		||||
                maze_size_scaling=task_cls.MAZE_SIZE_SCALING.point,
 | 
			
		||||
                inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
            ),
 | 
			
		||||
            max_episode_steps=1000,
 | 
			
		||||
            reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
        )
 | 
			
		||||
        if "Billiard" in maze_id:
 | 
			
		||||
            continue
 | 
			
		||||
        # Ant
 | 
			
		||||
        gym.envs.register(
 | 
			
		||||
            id=f"Ant{maze_id}-v{i}",
 | 
			
		||||
            entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
            kwargs=dict(
 | 
			
		||||
                model_cls=AntEnv,
 | 
			
		||||
                maze_task=task_cls,
 | 
			
		||||
                maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
 | 
			
		||||
                inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
            ),
 | 
			
		||||
            max_episode_steps=1000,
 | 
			
		||||
            reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
        )
 | 
			
		||||
        skip_swimmer = False
 | 
			
		||||
        for inhibited in ["Fall", "Push", "Block"]:
 | 
			
		||||
            if inhibited in maze_id:
 | 
			
		||||
                skip_swimmer = True
 | 
			
		||||
        point_scale = task_cls.MAZE_SIZE_SCALING.point
 | 
			
		||||
        if point_scale is not None:
 | 
			
		||||
            # Point
 | 
			
		||||
            gym.envs.register(
 | 
			
		||||
                id=f"Point{maze_id}-v{i}",
 | 
			
		||||
                entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
                kwargs=dict(
 | 
			
		||||
                    model_cls=PointEnv,
 | 
			
		||||
                    maze_task=task_cls,
 | 
			
		||||
                    maze_size_scaling=point_scale,
 | 
			
		||||
                    inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
                ),
 | 
			
		||||
                max_episode_steps=1000,
 | 
			
		||||
                reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        if skip_swimmer:
 | 
			
		||||
            continue
 | 
			
		||||
        ant_scale = task_cls.MAZE_SIZE_SCALING.ant
 | 
			
		||||
        if ant_scale is not None:
 | 
			
		||||
            # Ant
 | 
			
		||||
            gym.envs.register(
 | 
			
		||||
                id=f"Ant{maze_id}-v{i}",
 | 
			
		||||
                entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
                kwargs=dict(
 | 
			
		||||
                    model_cls=AntEnv,
 | 
			
		||||
                    maze_task=task_cls,
 | 
			
		||||
                    maze_size_scaling=ant_scale,
 | 
			
		||||
                    inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
                ),
 | 
			
		||||
                max_episode_steps=1000,
 | 
			
		||||
                reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # Reacher
 | 
			
		||||
        gym.envs.register(
 | 
			
		||||
            id=f"Reacher{maze_id}-v{i}",
 | 
			
		||||
            entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
            kwargs=dict(
 | 
			
		||||
                model_cls=ReacherEnv,
 | 
			
		||||
                maze_task=task_cls,
 | 
			
		||||
                maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
 | 
			
		||||
                inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
            ),
 | 
			
		||||
            max_episode_steps=1000,
 | 
			
		||||
            reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Swimmer
 | 
			
		||||
        gym.envs.register(
 | 
			
		||||
            id=f"Swimmer{maze_id}-v{i}",
 | 
			
		||||
            entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
            kwargs=dict(
 | 
			
		||||
                model_cls=SwimmerEnv,
 | 
			
		||||
                maze_task=task_cls,
 | 
			
		||||
                maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
 | 
			
		||||
                inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
            ),
 | 
			
		||||
            max_episode_steps=1000,
 | 
			
		||||
            reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
        )
 | 
			
		||||
        swimmer_scale = task_cls.MAZE_SIZE_SCALING.swimmer
 | 
			
		||||
        if swimmer_scale is not None:
 | 
			
		||||
            # Reacher
 | 
			
		||||
            gym.envs.register(
 | 
			
		||||
                id=f"Reacher{maze_id}-v{i}",
 | 
			
		||||
                entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
                kwargs=dict(
 | 
			
		||||
                    model_cls=ReacherEnv,
 | 
			
		||||
                    maze_task=task_cls,
 | 
			
		||||
                    maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
 | 
			
		||||
                    inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
                ),
 | 
			
		||||
                max_episode_steps=1000,
 | 
			
		||||
                reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
            )
 | 
			
		||||
            # Swimmer
 | 
			
		||||
            gym.envs.register(
 | 
			
		||||
                id=f"Swimmer{maze_id}-v{i}",
 | 
			
		||||
                entry_point="mujoco_maze.maze_env:MazeEnv",
 | 
			
		||||
                kwargs=dict(
 | 
			
		||||
                    model_cls=SwimmerEnv,
 | 
			
		||||
                    maze_task=task_cls,
 | 
			
		||||
                    maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
 | 
			
		||||
                    inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
 | 
			
		||||
                ),
 | 
			
		||||
                max_episode_steps=1000,
 | 
			
		||||
                reward_threshold=task_cls.REWARD_THRESHOLD,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__version__ = "0.1.0"
 | 
			
		||||
 | 
			
		||||
@ -11,13 +11,13 @@
 | 
			
		||||
  <asset>
 | 
			
		||||
    <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 | 
			
		||||
    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 | 
			
		||||
    <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 | 
			
		||||
    <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
 | 
			
		||||
    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
 | 
			
		||||
    <material name='geom' texture="texgeom" texuniform="true" />
 | 
			
		||||
  </asset>
 | 
			
		||||
  <worldbody>
 | 
			
		||||
    <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
 | 
			
		||||
    <geom name='floor' pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
 | 
			
		||||
    <geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
 | 
			
		||||
    <body name="torso" pos="0 0 0.75">
 | 
			
		||||
      <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
 | 
			
		||||
      <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
 | 
			
		||||
 | 
			
		||||
@ -8,7 +8,7 @@
 | 
			
		||||
    <asset>
 | 
			
		||||
        <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 | 
			
		||||
        <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 | 
			
		||||
        <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 | 
			
		||||
        <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
 | 
			
		||||
        <material name="MatPlane" texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
 | 
			
		||||
        <material name="geom" texture="texgeom" texuniform="true" />
 | 
			
		||||
    </asset>
 | 
			
		||||
@ -18,8 +18,8 @@
 | 
			
		||||
        <!--  ================= Point ================= /-->
 | 
			
		||||
        <!--  Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
 | 
			
		||||
        <body name="torso" pos="0 0 0">
 | 
			
		||||
            <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
 | 
			
		||||
            <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />
 | 
			
		||||
            <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
 | 
			
		||||
            <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
 | 
			
		||||
            <joint name="ballx" type="slide" axis="1 0 0" pos="0 0 0" />
 | 
			
		||||
            <joint name="bally" type="slide" axis="0 1 0" pos="0 0 0" />
 | 
			
		||||
            <joint name="rot" type="hinge" axis="0 0 1" pos="0 0 0" limited="false" />
 | 
			
		||||
 | 
			
		||||
@ -8,7 +8,7 @@
 | 
			
		||||
  <asset>
 | 
			
		||||
    <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 | 
			
		||||
    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 | 
			
		||||
    <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 | 
			
		||||
    <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
 | 
			
		||||
    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
 | 
			
		||||
    <material name='geom' texture="texgeom" texuniform="true" />
 | 
			
		||||
  </asset>
 | 
			
		||||
 | 
			
		||||
@ -8,7 +8,7 @@
 | 
			
		||||
  <asset>
 | 
			
		||||
    <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 | 
			
		||||
    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 | 
			
		||||
    <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 | 
			
		||||
    <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
 | 
			
		||||
    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
 | 
			
		||||
    <material name='geom' texture="texgeom" texuniform="true" />
 | 
			
		||||
  </asset>
 | 
			
		||||
 | 
			
		||||
@ -72,8 +72,9 @@ class MazeEnv(gym.Env):
 | 
			
		||||
            self._collision = maze_env_utils.CollisionDetector(
 | 
			
		||||
                structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
 | 
			
		||||
            )
 | 
			
		||||
            # Now all object balls have size=1.0
 | 
			
		||||
            self._objball_collision = maze_env_utils.CollisionDetector(
 | 
			
		||||
                structure, size_scaling, torso_x, torso_y, 0.8,
 | 
			
		||||
                structure, size_scaling, torso_x, torso_y, self._task.OBJECT_BALL_SIZE,
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            self._collision = None
 | 
			
		||||
@ -145,7 +146,7 @@ class MazeEnv(gym.Env):
 | 
			
		||||
                elif struct.is_object_ball():
 | 
			
		||||
                    # Movable Ball
 | 
			
		||||
                    self.object_balls.append(f"objball_{i}_{j}")
 | 
			
		||||
                    _add_object_ball(worldbody, i, j, x, y)
 | 
			
		||||
                    _add_object_ball(worldbody, i, j, x, y, self._task.OBJECT_BALL_SIZE)
 | 
			
		||||
 | 
			
		||||
        torso = tree.find(".//body[@name='torso']")
 | 
			
		||||
        geoms = torso.findall(".//geom")
 | 
			
		||||
@ -166,7 +167,7 @@ class MazeEnv(gym.Env):
 | 
			
		||||
                name=f"goal_site{i}",
 | 
			
		||||
                pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
 | 
			
		||||
                size=f"{maze_size_scaling * 0.1}",
 | 
			
		||||
                rgba=goal.rbga_str(),
 | 
			
		||||
                rgba=goal.rgb.rgba_str(),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        _, file_path = tempfile.mkstemp(text=True, suffix=".xml")
 | 
			
		||||
@ -385,6 +386,7 @@ class MazeEnv(gym.Env):
 | 
			
		||||
                    self.wrapped_env.set_xy(old_pos)
 | 
			
		||||
                else:
 | 
			
		||||
                    self.wrapped_env.set_xy(pos)
 | 
			
		||||
            # Do the same check for object balls
 | 
			
		||||
            for name, old, new in zip(self.object_balls, old_objballs, new_objballs):
 | 
			
		||||
                collision = self._objball_collision.detect(old, new)
 | 
			
		||||
                if collision is not None:
 | 
			
		||||
@ -406,20 +408,23 @@ class MazeEnv(gym.Env):
 | 
			
		||||
        self.wrapped_env.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None:
 | 
			
		||||
    body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",)
 | 
			
		||||
def _add_object_ball(
 | 
			
		||||
    worldbody: ET.Element, i: str, j: str, x: float, y: float, size: float
 | 
			
		||||
) -> None:
 | 
			
		||||
    body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0")
 | 
			
		||||
    mass = 0.0001 * (size ** 3)
 | 
			
		||||
    ET.SubElement(
 | 
			
		||||
        body,
 | 
			
		||||
        "geom",
 | 
			
		||||
        type="sphere",
 | 
			
		||||
        name=f"objball_{i}_{j}_geom",
 | 
			
		||||
        size="1.0",  # Radius
 | 
			
		||||
        pos="0.0 0.0 1.0",  # Z = 1.0 so that this ball can move!!
 | 
			
		||||
        rgba="0.1 0.1 0.7 1",
 | 
			
		||||
        size=f"{size}",  # Radius
 | 
			
		||||
        pos=f"0.0 0.0 {size}",  # Z = size so that this ball can move!!
 | 
			
		||||
        rgba=maze_task.BLUE.rgba_str(),
 | 
			
		||||
        contype="1",
 | 
			
		||||
        conaffinity="1",
 | 
			
		||||
        solimp="0.9 0.99 0.001",
 | 
			
		||||
        mass="0.0001",
 | 
			
		||||
        mass=f"{mass}",
 | 
			
		||||
    )
 | 
			
		||||
    ET.SubElement(
 | 
			
		||||
        body,
 | 
			
		||||
 | 
			
		||||
@ -14,6 +14,9 @@ class Rgb(NamedTuple):
 | 
			
		||||
    green: float
 | 
			
		||||
    blue: float
 | 
			
		||||
 | 
			
		||||
    def rgba_str(self) -> str:
 | 
			
		||||
        return f"{self.red} {self.green} {self.blue} 1"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
RED = Rgb(0.7, 0.1, 0.1)
 | 
			
		||||
GREEN = Rgb(0.1, 0.7, 0.1)
 | 
			
		||||
@ -37,10 +40,6 @@ class MazeGoal:
 | 
			
		||||
        self.threshold = threshold
 | 
			
		||||
        self.custom_size = custom_size
 | 
			
		||||
 | 
			
		||||
    def rbga_str(self) -> str:
 | 
			
		||||
        r, g, b = self.rgb
 | 
			
		||||
        return f"{r} {g} {b} 1"
 | 
			
		||||
 | 
			
		||||
    def neighbor(self, obs: np.ndarray) -> float:
 | 
			
		||||
        return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
 | 
			
		||||
 | 
			
		||||
@ -49,9 +48,9 @@ class MazeGoal:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Scaling(NamedTuple):
 | 
			
		||||
    ant: float
 | 
			
		||||
    point: float
 | 
			
		||||
    swimmer: float
 | 
			
		||||
    ant: Optional[float]
 | 
			
		||||
    point: Optional[float]
 | 
			
		||||
    swimmer: Optional[float]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MazeTask(ABC):
 | 
			
		||||
@ -59,10 +58,14 @@ class MazeTask(ABC):
 | 
			
		||||
    PENALTY: Optional[float] = None
 | 
			
		||||
    MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
 | 
			
		||||
    INNER_REWARD_SCALING: float = 0.01
 | 
			
		||||
    TOP_DOWN_VIEW: bool = False
 | 
			
		||||
    # For Fall/Push/BlockMaze
 | 
			
		||||
    OBSERVE_BLOCKS: bool = False
 | 
			
		||||
    # For Billiard
 | 
			
		||||
    OBSERVE_BALLS: bool = False
 | 
			
		||||
    OBJECT_BALL_SIZE: float = 1.0
 | 
			
		||||
    # Unused now
 | 
			
		||||
    PUT_SPIN_NEAR_AGENT: bool = False
 | 
			
		||||
    TOP_DOWN_VIEW: bool = False
 | 
			
		||||
 | 
			
		||||
    def __init__(self, scale: float) -> None:
 | 
			
		||||
        self.goals = []
 | 
			
		||||
@ -143,7 +146,7 @@ class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GoalRewardPush(GoalRewardUMaze):
 | 
			
		||||
    TOP_DOWN_VIEW = True
 | 
			
		||||
    OBSERVE_BLOCKS: bool = True
 | 
			
		||||
 | 
			
		||||
    def __init__(self, scale: float) -> None:
 | 
			
		||||
        super().__init__(scale)
 | 
			
		||||
@ -166,7 +169,7 @@ class DistRewardPush(GoalRewardPush, DistRewardMixIn):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GoalRewardFall(GoalRewardUMaze):
 | 
			
		||||
    TOP_DOWN_VIEW = True
 | 
			
		||||
    OBSERVE_BLOCKS: bool = True
 | 
			
		||||
 | 
			
		||||
    def __init__(self, scale: float) -> None:
 | 
			
		||||
        super().__init__(scale)
 | 
			
		||||
@ -195,9 +198,9 @@ class GoalReward2Rooms(MazeTask):
 | 
			
		||||
    PENALTY: float = -0.0001
 | 
			
		||||
    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
 | 
			
		||||
 | 
			
		||||
    def __init__(self, scale: float) -> None:
 | 
			
		||||
    def __init__(self, scale: float, goal: Tuple[int, int] = (4.0, -2.0)) -> None:
 | 
			
		||||
        super().__init__(scale)
 | 
			
		||||
        self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))]
 | 
			
		||||
        self.goals = [MazeGoal(np.array(goal) * scale)]
 | 
			
		||||
 | 
			
		||||
    def reward(self, obs: np.ndarray) -> float:
 | 
			
		||||
        for goal in self.goals:
 | 
			
		||||
@ -210,10 +213,10 @@ class GoalReward2Rooms(MazeTask):
 | 
			
		||||
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
 | 
			
		||||
        return [
 | 
			
		||||
            [B, B, B, B, B, B, B, B],
 | 
			
		||||
            [B, R, E, E, E, E, E, B],
 | 
			
		||||
            [B, E, E, E, E, E, E, B],
 | 
			
		||||
            [B, B, B, B, B, E, B, B],
 | 
			
		||||
            [B, E, E, E, E, E, E, B],
 | 
			
		||||
            [B, E, E, E, B, E, E, B],
 | 
			
		||||
            [B, E, E, E, B, E, E, B],
 | 
			
		||||
            [B, E, R, E, B, E, E, B],
 | 
			
		||||
            [B, E, E, E, B, E, E, B],
 | 
			
		||||
            [B, E, E, E, E, E, E, B],
 | 
			
		||||
            [B, B, B, B, B, B, B, B],
 | 
			
		||||
        ]
 | 
			
		||||
@ -224,9 +227,17 @@ class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SubGoal2Rooms(GoalReward2Rooms):
 | 
			
		||||
    def __init__(self, scale: float) -> None:
 | 
			
		||||
        super().__init__(scale)
 | 
			
		||||
        self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        scale: float,
 | 
			
		||||
        primary_goal: Tuple[float, float] = (4.0, -2.0),
 | 
			
		||||
        subgoals: List[Tuple[float, float]] = [(1.0, -2.0), (-1.0, 2.0)],
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        super().__init__(scale, primary_goal)
 | 
			
		||||
        for subgoal in subgoals:
 | 
			
		||||
            self.goals.append(
 | 
			
		||||
                MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GoalReward4Rooms(MazeTask):
 | 
			
		||||
@ -305,7 +316,21 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SubGoalTRoom(GoalRewardTRoom):
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        scale: float,
 | 
			
		||||
        primary_goal: Tuple[float, float] = (2.0, -3.0),
 | 
			
		||||
        subgoal: Tuple[float, float] = (-2.0, -3.0),
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        super().__init__(scale, primary_goal)
 | 
			
		||||
        self.goals.append(
 | 
			
		||||
            MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GoalRewardBlockMaze(GoalRewardUMaze):
 | 
			
		||||
    MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None)
 | 
			
		||||
    OBSERVE_BLOCKS: bool = True
 | 
			
		||||
 | 
			
		||||
    def __init__(self, scale: float) -> None:
 | 
			
		||||
@ -333,30 +358,46 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
 | 
			
		||||
class GoalRewardBilliard(MazeTask):
 | 
			
		||||
    REWARD_THRESHOLD: float = 0.9
 | 
			
		||||
    PENALTY: float = -0.0001
 | 
			
		||||
    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
 | 
			
		||||
    MAZE_SIZE_SCALING: Scaling = Scaling(None, 3.0, None)
 | 
			
		||||
    OBSERVE_BALLS: bool = True
 | 
			
		||||
    GOAL_SIZE: float = 0.3
 | 
			
		||||
 | 
			
		||||
    def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None:
 | 
			
		||||
    def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
 | 
			
		||||
        super().__init__(scale)
 | 
			
		||||
        goal = np.array(goal) * scale
 | 
			
		||||
        self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)]
 | 
			
		||||
        self.goals.append(
 | 
			
		||||
            MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _threshold(self) -> float:
 | 
			
		||||
        return self.OBJECT_BALL_SIZE + self.GOAL_SIZE
 | 
			
		||||
 | 
			
		||||
    def reward(self, obs: np.ndarray) -> float:
 | 
			
		||||
        return 1.0 if self.termination(obs) else self.PENALTY
 | 
			
		||||
        object_pos = obs[3:6]
 | 
			
		||||
        for goal in self.goals:
 | 
			
		||||
            if goal.neighbor(object_pos):
 | 
			
		||||
                return goal.reward_scale
 | 
			
		||||
        return self.PENALTY
 | 
			
		||||
 | 
			
		||||
    def termination(self, obs: np.ndarray) -> bool:
 | 
			
		||||
        return super().termination(obs[3:6])
 | 
			
		||||
        object_pos = obs[3:6]
 | 
			
		||||
        for goal in self.goals:
 | 
			
		||||
            if goal.neighbor(object_pos):
 | 
			
		||||
                return True
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def create_maze() -> List[List[MazeCell]]:
 | 
			
		||||
        E, B = MazeCell.EMPTY, MazeCell.BLOCK
 | 
			
		||||
        R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
 | 
			
		||||
        return [
 | 
			
		||||
            [B, B, B, B, B],
 | 
			
		||||
            [B, E, E, E, B],
 | 
			
		||||
            [B, E, M, E, B],
 | 
			
		||||
            [B, E, R, E, B],
 | 
			
		||||
            [B, B, B, B, B],
 | 
			
		||||
            [B, B, B, B, B, B, B],
 | 
			
		||||
            [B, E, E, E, E, E, B],
 | 
			
		||||
            [B, E, E, E, E, E, B],
 | 
			
		||||
            [B, E, E, M, E, E, B],
 | 
			
		||||
            [B, E, E, R, E, E, B],
 | 
			
		||||
            [B, E, E, E, E, E, B],
 | 
			
		||||
            [B, B, B, B, B, B, B],
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -365,6 +406,50 @@ class DistRewardBilliard(GoalRewardBilliard):
 | 
			
		||||
        return -self.goals[0].euc_dist(obs[3:6]) / self.scale
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SubGoalBilliard(GoalRewardBilliard):
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        scale: float,
 | 
			
		||||
        primary_goal: Tuple[float, float] = (2.0, -3.0),
 | 
			
		||||
        subgoals: List[Tuple[float, float]] = [(-2.0, -3.0), (-2.0, 1.0), (2.0, 1.0)],
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        super().__init__(scale, primary_goal)
 | 
			
		||||
        for subgoal in subgoals:
 | 
			
		||||
            self.goals.append(
 | 
			
		||||
                MazeGoal(
 | 
			
		||||
                    np.array(subgoal) * scale,
 | 
			
		||||
                    reward_scale=0.5,
 | 
			
		||||
                    rgb=GREEN,
 | 
			
		||||
                    threshold=self._threshold(),
 | 
			
		||||
                    custom_size=self.GOAL_SIZE,
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BanditBilliard(SubGoalBilliard):
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        scale: float,
 | 
			
		||||
        primary_goal: Tuple[float, float] = (4.0, -2.0),
 | 
			
		||||
        subgoals: List[Tuple[float, float]] = [(4.0, 2.0)],
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        super().__init__(scale, primary_goal, subgoals)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def create_maze() -> List[List[MazeCell]]:
 | 
			
		||||
        E, B = MazeCell.EMPTY, MazeCell.BLOCK
 | 
			
		||||
        R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
 | 
			
		||||
        return [
 | 
			
		||||
            [B, B, B, B, B, B, B],
 | 
			
		||||
            [B, E, E, B, B, E, B],
 | 
			
		||||
            [B, E, E, E, E, E, B],
 | 
			
		||||
            [B, R, M, E, B, B, B],
 | 
			
		||||
            [B, E, E, E, E, E, B],
 | 
			
		||||
            [B, E, E, E, E, E, B],
 | 
			
		||||
            [B, B, B, B, B, B, B],
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TaskRegistry:
 | 
			
		||||
    REGISTRY: Dict[str, List[Type[MazeTask]]] = {
 | 
			
		||||
        "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
 | 
			
		||||
@ -373,9 +458,14 @@ class TaskRegistry:
 | 
			
		||||
        "Fall": [DistRewardFall, GoalRewardFall],
 | 
			
		||||
        "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
 | 
			
		||||
        "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
 | 
			
		||||
        "TRoom": [DistRewardTRoom, GoalRewardTRoom],
 | 
			
		||||
        "TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom],
 | 
			
		||||
        "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
 | 
			
		||||
        "Billiard": [DistRewardBilliard, GoalRewardBilliard],
 | 
			
		||||
        "Billiard": [
 | 
			
		||||
            DistRewardBilliard,
 | 
			
		||||
            GoalRewardBilliard,
 | 
			
		||||
            SubGoalBilliard,
 | 
			
		||||
            BanditBilliard,
 | 
			
		||||
        ],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								screenshots/AntFall.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| 
		 After Width: | Height: | Size: 36 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								screenshots/AntPush.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| 
		 After Width: | Height: | Size: 55 KiB  | 
| 
		 Before Width: | Height: | Size: 93 KiB After Width: | Height: | Size: 30 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								screenshots/PointBilliard.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| 
		 After Width: | Height: | Size: 35 KiB  | 
| 
		 Before Width: | Height: | Size: 30 KiB  | 
| 
		 Before Width: | Height: | Size: 41 KiB  | 
| 
		 Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 28 KiB  | 
@ -36,6 +36,20 @@ def test_point_maze(maze_id):
 | 
			
		||||
            assert r < 0.0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("maze_id", ["2Rooms", "4Rooms", "Billiard"])
 | 
			
		||||
def test_subgoal_envs(maze_id):
 | 
			
		||||
    env = gym.make(f"Point{maze_id}-v2")
 | 
			
		||||
    s0 = env.reset()
 | 
			
		||||
    s, r, _, _ = env.step(env.action_space.sample())
 | 
			
		||||
    if not env.unwrapped.has_extended_obs:
 | 
			
		||||
        assert s0.shape == (7,)
 | 
			
		||||
        assert s.shape == (7,)
 | 
			
		||||
    elif env.unwrapped._observe_balls:
 | 
			
		||||
        assert s0.shape == (10,)
 | 
			
		||||
        assert s.shape == (10,)
 | 
			
		||||
    assert len(env.unwrapped._task.goals) > 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
 | 
			
		||||
def test_reacher_maze(maze_id):
 | 
			
		||||
    for inhibited in ["Fall", "Push", "Block", "Billiard"]:
 | 
			
		||||
 | 
			
		||||