16
README.md
@ -1,4 +1,5 @@
|
||||
# mujoco-maze
|
||||
[![Black](https://img.shields.io/badge/code%20style-black-000.svg)](https://github.com/psf/black)
|
||||
|
||||
Some maze environments for reinforcement learning(RL) using [mujoco-py] and
|
||||
[openai gym][gym].
|
||||
@ -22,19 +23,26 @@ Thankfully, this project is based on the code from [rllab] and [tensorflow/mode
|
||||
|
||||
- PointPush/AntPush
|
||||
|
||||
![PointPush](./screenshots/PointPush.png)
|
||||
![PointPush](./screenshots/AntPush.png)
|
||||
- PointPush-v0/AntPush-v0 (Distance-based Reward)
|
||||
- PointPush-v1/AntPush-v1 (Goal-based Reward)
|
||||
|
||||
- PointFall/AntFall
|
||||
|
||||
![PointFall](./screenshots/PointFall.png)
|
||||
![PointFall](./screenshots/AntFall.png)
|
||||
- PointFall-v0/AntFall-v0 (Distance-based Reward)
|
||||
- PointFall-v1/AntFall-v1 (Goal-based Reward)
|
||||
|
||||
- PointBilliard
|
||||
|
||||
![PointBilliard](./screenshots/PointBilliard.png)
|
||||
- PointBilliard-v0 (Distance-based Reward)
|
||||
- PointBilliard-v1 (Goal-based Reward)
|
||||
- PointBilliard-v2 (Multiple Goals (0.5 pt or 1.0 pt))
|
||||
|
||||
## Warning
|
||||
This project has some other environments (e.g., billiard, reacher, and
|
||||
swimmer) but if they are not on README, they are work in progress and
|
||||
This project has some other environments (e.g., reacher, and swimmer)
|
||||
but if they are not on README, they are work in progress and
|
||||
not tested well.
|
||||
|
||||
## License
|
||||
|
@ -16,69 +16,66 @@ from mujoco_maze.swimmer import SwimmerEnv
|
||||
|
||||
for maze_id in TaskRegistry.keys():
|
||||
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
||||
# Point
|
||||
gym.envs.register(
|
||||
id=f"Point{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=PointEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.point,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
if "Billiard" in maze_id:
|
||||
continue
|
||||
# Ant
|
||||
gym.envs.register(
|
||||
id=f"Ant{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=AntEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
skip_swimmer = False
|
||||
for inhibited in ["Fall", "Push", "Block"]:
|
||||
if inhibited in maze_id:
|
||||
skip_swimmer = True
|
||||
point_scale = task_cls.MAZE_SIZE_SCALING.point
|
||||
if point_scale is not None:
|
||||
# Point
|
||||
gym.envs.register(
|
||||
id=f"Point{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=PointEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=point_scale,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
|
||||
if skip_swimmer:
|
||||
continue
|
||||
ant_scale = task_cls.MAZE_SIZE_SCALING.ant
|
||||
if ant_scale is not None:
|
||||
# Ant
|
||||
gym.envs.register(
|
||||
id=f"Ant{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=AntEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=ant_scale,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
|
||||
# Reacher
|
||||
gym.envs.register(
|
||||
id=f"Reacher{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=ReacherEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
|
||||
# Swimmer
|
||||
gym.envs.register(
|
||||
id=f"Swimmer{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=SwimmerEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
swimmer_scale = task_cls.MAZE_SIZE_SCALING.swimmer
|
||||
if swimmer_scale is not None:
|
||||
# Reacher
|
||||
gym.envs.register(
|
||||
id=f"Reacher{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=ReacherEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
# Swimmer
|
||||
gym.envs.register(
|
||||
id=f"Swimmer{maze_id}-v{i}",
|
||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||
kwargs=dict(
|
||||
model_cls=SwimmerEnv,
|
||||
maze_task=task_cls,
|
||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
|
||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||
),
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||
)
|
||||
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
@ -11,13 +11,13 @@
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||
<material name='geom' texture="texgeom" texuniform="true" />
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
|
||||
<geom name='floor' pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
|
||||
<geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
|
||||
<body name="torso" pos="0 0 0.75">
|
||||
<geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
|
||||
<joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
|
||||
|
@ -8,7 +8,7 @@
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||
<material name="MatPlane" texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5" />
|
||||
<material name="geom" texture="texgeom" texuniform="true" />
|
||||
</asset>
|
||||
@ -18,8 +18,8 @@
|
||||
<!-- ================= Point ================= /-->
|
||||
<!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
|
||||
<body name="torso" pos="0 0 0">
|
||||
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
|
||||
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />
|
||||
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
|
||||
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
|
||||
<joint name="ballx" type="slide" axis="1 0 0" pos="0 0 0" />
|
||||
<joint name="bally" type="slide" axis="0 1 0" pos="0 0 0" />
|
||||
<joint name="rot" type="hinge" axis="0 0 1" pos="0 0 0" limited="false" />
|
||||
|
@ -8,7 +8,7 @@
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||
<material name='geom' texture="texgeom" texuniform="true" />
|
||||
</asset>
|
||||
|
@ -8,7 +8,7 @@
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||
<material name='geom' texture="texgeom" texuniform="true" />
|
||||
</asset>
|
||||
|
@ -72,8 +72,9 @@ class MazeEnv(gym.Env):
|
||||
self._collision = maze_env_utils.CollisionDetector(
|
||||
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
|
||||
)
|
||||
# Now all object balls have size=1.0
|
||||
self._objball_collision = maze_env_utils.CollisionDetector(
|
||||
structure, size_scaling, torso_x, torso_y, 0.8,
|
||||
structure, size_scaling, torso_x, torso_y, self._task.OBJECT_BALL_SIZE,
|
||||
)
|
||||
else:
|
||||
self._collision = None
|
||||
@ -145,7 +146,7 @@ class MazeEnv(gym.Env):
|
||||
elif struct.is_object_ball():
|
||||
# Movable Ball
|
||||
self.object_balls.append(f"objball_{i}_{j}")
|
||||
_add_object_ball(worldbody, i, j, x, y)
|
||||
_add_object_ball(worldbody, i, j, x, y, self._task.OBJECT_BALL_SIZE)
|
||||
|
||||
torso = tree.find(".//body[@name='torso']")
|
||||
geoms = torso.findall(".//geom")
|
||||
@ -166,7 +167,7 @@ class MazeEnv(gym.Env):
|
||||
name=f"goal_site{i}",
|
||||
pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
|
||||
size=f"{maze_size_scaling * 0.1}",
|
||||
rgba=goal.rbga_str(),
|
||||
rgba=goal.rgb.rgba_str(),
|
||||
)
|
||||
|
||||
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
|
||||
@ -385,6 +386,7 @@ class MazeEnv(gym.Env):
|
||||
self.wrapped_env.set_xy(old_pos)
|
||||
else:
|
||||
self.wrapped_env.set_xy(pos)
|
||||
# Do the same check for object balls
|
||||
for name, old, new in zip(self.object_balls, old_objballs, new_objballs):
|
||||
collision = self._objball_collision.detect(old, new)
|
||||
if collision is not None:
|
||||
@ -406,20 +408,23 @@ class MazeEnv(gym.Env):
|
||||
self.wrapped_env.close()
|
||||
|
||||
|
||||
def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None:
|
||||
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",)
|
||||
def _add_object_ball(
|
||||
worldbody: ET.Element, i: str, j: str, x: float, y: float, size: float
|
||||
) -> None:
|
||||
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0")
|
||||
mass = 0.0001 * (size ** 3)
|
||||
ET.SubElement(
|
||||
body,
|
||||
"geom",
|
||||
type="sphere",
|
||||
name=f"objball_{i}_{j}_geom",
|
||||
size="1.0", # Radius
|
||||
pos="0.0 0.0 1.0", # Z = 1.0 so that this ball can move!!
|
||||
rgba="0.1 0.1 0.7 1",
|
||||
size=f"{size}", # Radius
|
||||
pos=f"0.0 0.0 {size}", # Z = size so that this ball can move!!
|
||||
rgba=maze_task.BLUE.rgba_str(),
|
||||
contype="1",
|
||||
conaffinity="1",
|
||||
solimp="0.9 0.99 0.001",
|
||||
mass="0.0001",
|
||||
mass=f"{mass}",
|
||||
)
|
||||
ET.SubElement(
|
||||
body,
|
||||
|
@ -14,6 +14,9 @@ class Rgb(NamedTuple):
|
||||
green: float
|
||||
blue: float
|
||||
|
||||
def rgba_str(self) -> str:
|
||||
return f"{self.red} {self.green} {self.blue} 1"
|
||||
|
||||
|
||||
RED = Rgb(0.7, 0.1, 0.1)
|
||||
GREEN = Rgb(0.1, 0.7, 0.1)
|
||||
@ -37,10 +40,6 @@ class MazeGoal:
|
||||
self.threshold = threshold
|
||||
self.custom_size = custom_size
|
||||
|
||||
def rbga_str(self) -> str:
|
||||
r, g, b = self.rgb
|
||||
return f"{r} {g} {b} 1"
|
||||
|
||||
def neighbor(self, obs: np.ndarray) -> float:
|
||||
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
|
||||
|
||||
@ -49,9 +48,9 @@ class MazeGoal:
|
||||
|
||||
|
||||
class Scaling(NamedTuple):
|
||||
ant: float
|
||||
point: float
|
||||
swimmer: float
|
||||
ant: Optional[float]
|
||||
point: Optional[float]
|
||||
swimmer: Optional[float]
|
||||
|
||||
|
||||
class MazeTask(ABC):
|
||||
@ -59,10 +58,14 @@ class MazeTask(ABC):
|
||||
PENALTY: Optional[float] = None
|
||||
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
|
||||
INNER_REWARD_SCALING: float = 0.01
|
||||
TOP_DOWN_VIEW: bool = False
|
||||
# For Fall/Push/BlockMaze
|
||||
OBSERVE_BLOCKS: bool = False
|
||||
# For Billiard
|
||||
OBSERVE_BALLS: bool = False
|
||||
OBJECT_BALL_SIZE: float = 1.0
|
||||
# Unused now
|
||||
PUT_SPIN_NEAR_AGENT: bool = False
|
||||
TOP_DOWN_VIEW: bool = False
|
||||
|
||||
def __init__(self, scale: float) -> None:
|
||||
self.goals = []
|
||||
@ -143,7 +146,7 @@ class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
|
||||
|
||||
|
||||
class GoalRewardPush(GoalRewardUMaze):
|
||||
TOP_DOWN_VIEW = True
|
||||
OBSERVE_BLOCKS: bool = True
|
||||
|
||||
def __init__(self, scale: float) -> None:
|
||||
super().__init__(scale)
|
||||
@ -166,7 +169,7 @@ class DistRewardPush(GoalRewardPush, DistRewardMixIn):
|
||||
|
||||
|
||||
class GoalRewardFall(GoalRewardUMaze):
|
||||
TOP_DOWN_VIEW = True
|
||||
OBSERVE_BLOCKS: bool = True
|
||||
|
||||
def __init__(self, scale: float) -> None:
|
||||
super().__init__(scale)
|
||||
@ -195,9 +198,9 @@ class GoalReward2Rooms(MazeTask):
|
||||
PENALTY: float = -0.0001
|
||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
||||
|
||||
def __init__(self, scale: float) -> None:
|
||||
def __init__(self, scale: float, goal: Tuple[int, int] = (4.0, -2.0)) -> None:
|
||||
super().__init__(scale)
|
||||
self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))]
|
||||
self.goals = [MazeGoal(np.array(goal) * scale)]
|
||||
|
||||
def reward(self, obs: np.ndarray) -> float:
|
||||
for goal in self.goals:
|
||||
@ -210,10 +213,10 @@ class GoalReward2Rooms(MazeTask):
|
||||
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
|
||||
return [
|
||||
[B, B, B, B, B, B, B, B],
|
||||
[B, R, E, E, E, E, E, B],
|
||||
[B, E, E, E, E, E, E, B],
|
||||
[B, B, B, B, B, E, B, B],
|
||||
[B, E, E, E, E, E, E, B],
|
||||
[B, E, E, E, B, E, E, B],
|
||||
[B, E, E, E, B, E, E, B],
|
||||
[B, E, R, E, B, E, E, B],
|
||||
[B, E, E, E, B, E, E, B],
|
||||
[B, E, E, E, E, E, E, B],
|
||||
[B, B, B, B, B, B, B, B],
|
||||
]
|
||||
@ -224,9 +227,17 @@ class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
|
||||
|
||||
|
||||
class SubGoal2Rooms(GoalReward2Rooms):
|
||||
def __init__(self, scale: float) -> None:
|
||||
super().__init__(scale)
|
||||
self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))
|
||||
def __init__(
|
||||
self,
|
||||
scale: float,
|
||||
primary_goal: Tuple[float, float] = (4.0, -2.0),
|
||||
subgoals: List[Tuple[float, float]] = [(1.0, -2.0), (-1.0, 2.0)],
|
||||
) -> None:
|
||||
super().__init__(scale, primary_goal)
|
||||
for subgoal in subgoals:
|
||||
self.goals.append(
|
||||
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
|
||||
)
|
||||
|
||||
|
||||
class GoalReward4Rooms(MazeTask):
|
||||
@ -305,7 +316,21 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
|
||||
pass
|
||||
|
||||
|
||||
class SubGoalTRoom(GoalRewardTRoom):
|
||||
def __init__(
|
||||
self,
|
||||
scale: float,
|
||||
primary_goal: Tuple[float, float] = (2.0, -3.0),
|
||||
subgoal: Tuple[float, float] = (-2.0, -3.0),
|
||||
) -> None:
|
||||
super().__init__(scale, primary_goal)
|
||||
self.goals.append(
|
||||
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
|
||||
)
|
||||
|
||||
|
||||
class GoalRewardBlockMaze(GoalRewardUMaze):
|
||||
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None)
|
||||
OBSERVE_BLOCKS: bool = True
|
||||
|
||||
def __init__(self, scale: float) -> None:
|
||||
@ -333,30 +358,46 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
|
||||
class GoalRewardBilliard(MazeTask):
|
||||
REWARD_THRESHOLD: float = 0.9
|
||||
PENALTY: float = -0.0001
|
||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
|
||||
MAZE_SIZE_SCALING: Scaling = Scaling(None, 3.0, None)
|
||||
OBSERVE_BALLS: bool = True
|
||||
GOAL_SIZE: float = 0.3
|
||||
|
||||
def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None:
|
||||
def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
|
||||
super().__init__(scale)
|
||||
goal = np.array(goal) * scale
|
||||
self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)]
|
||||
self.goals.append(
|
||||
MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
|
||||
)
|
||||
|
||||
def _threshold(self) -> float:
|
||||
return self.OBJECT_BALL_SIZE + self.GOAL_SIZE
|
||||
|
||||
def reward(self, obs: np.ndarray) -> float:
|
||||
return 1.0 if self.termination(obs) else self.PENALTY
|
||||
object_pos = obs[3:6]
|
||||
for goal in self.goals:
|
||||
if goal.neighbor(object_pos):
|
||||
return goal.reward_scale
|
||||
return self.PENALTY
|
||||
|
||||
def termination(self, obs: np.ndarray) -> bool:
|
||||
return super().termination(obs[3:6])
|
||||
object_pos = obs[3:6]
|
||||
for goal in self.goals:
|
||||
if goal.neighbor(object_pos):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def create_maze() -> List[List[MazeCell]]:
|
||||
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
||||
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
|
||||
return [
|
||||
[B, B, B, B, B],
|
||||
[B, E, E, E, B],
|
||||
[B, E, M, E, B],
|
||||
[B, E, R, E, B],
|
||||
[B, B, B, B, B],
|
||||
[B, B, B, B, B, B, B],
|
||||
[B, E, E, E, E, E, B],
|
||||
[B, E, E, E, E, E, B],
|
||||
[B, E, E, M, E, E, B],
|
||||
[B, E, E, R, E, E, B],
|
||||
[B, E, E, E, E, E, B],
|
||||
[B, B, B, B, B, B, B],
|
||||
]
|
||||
|
||||
|
||||
@ -365,6 +406,50 @@ class DistRewardBilliard(GoalRewardBilliard):
|
||||
return -self.goals[0].euc_dist(obs[3:6]) / self.scale
|
||||
|
||||
|
||||
class SubGoalBilliard(GoalRewardBilliard):
|
||||
def __init__(
|
||||
self,
|
||||
scale: float,
|
||||
primary_goal: Tuple[float, float] = (2.0, -3.0),
|
||||
subgoals: List[Tuple[float, float]] = [(-2.0, -3.0), (-2.0, 1.0), (2.0, 1.0)],
|
||||
) -> None:
|
||||
super().__init__(scale, primary_goal)
|
||||
for subgoal in subgoals:
|
||||
self.goals.append(
|
||||
MazeGoal(
|
||||
np.array(subgoal) * scale,
|
||||
reward_scale=0.5,
|
||||
rgb=GREEN,
|
||||
threshold=self._threshold(),
|
||||
custom_size=self.GOAL_SIZE,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class BanditBilliard(SubGoalBilliard):
|
||||
def __init__(
|
||||
self,
|
||||
scale: float,
|
||||
primary_goal: Tuple[float, float] = (4.0, -2.0),
|
||||
subgoals: List[Tuple[float, float]] = [(4.0, 2.0)],
|
||||
) -> None:
|
||||
super().__init__(scale, primary_goal, subgoals)
|
||||
|
||||
@staticmethod
|
||||
def create_maze() -> List[List[MazeCell]]:
|
||||
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
||||
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
|
||||
return [
|
||||
[B, B, B, B, B, B, B],
|
||||
[B, E, E, B, B, E, B],
|
||||
[B, E, E, E, E, E, B],
|
||||
[B, R, M, E, B, B, B],
|
||||
[B, E, E, E, E, E, B],
|
||||
[B, E, E, E, E, E, B],
|
||||
[B, B, B, B, B, B, B],
|
||||
]
|
||||
|
||||
|
||||
class TaskRegistry:
|
||||
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
||||
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
|
||||
@ -373,9 +458,14 @@ class TaskRegistry:
|
||||
"Fall": [DistRewardFall, GoalRewardFall],
|
||||
"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
|
||||
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
||||
"TRoom": [DistRewardTRoom, GoalRewardTRoom],
|
||||
"TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom],
|
||||
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
|
||||
"Billiard": [DistRewardBilliard, GoalRewardBilliard],
|
||||
"Billiard": [
|
||||
DistRewardBilliard,
|
||||
GoalRewardBilliard,
|
||||
SubGoalBilliard,
|
||||
BanditBilliard,
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
BIN
screenshots/AntFall.png
Normal file
After Width: | Height: | Size: 36 KiB |
BIN
screenshots/AntPush.png
Normal file
After Width: | Height: | Size: 55 KiB |
Before Width: | Height: | Size: 93 KiB After Width: | Height: | Size: 30 KiB |
BIN
screenshots/PointBilliard.png
Normal file
After Width: | Height: | Size: 35 KiB |
Before Width: | Height: | Size: 30 KiB |
Before Width: | Height: | Size: 41 KiB |
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 28 KiB |
@ -36,6 +36,20 @@ def test_point_maze(maze_id):
|
||||
assert r < 0.0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("maze_id", ["2Rooms", "4Rooms", "Billiard"])
|
||||
def test_subgoal_envs(maze_id):
|
||||
env = gym.make(f"Point{maze_id}-v2")
|
||||
s0 = env.reset()
|
||||
s, r, _, _ = env.step(env.action_space.sample())
|
||||
if not env.unwrapped.has_extended_obs:
|
||||
assert s0.shape == (7,)
|
||||
assert s.shape == (7,)
|
||||
elif env.unwrapped._observe_balls:
|
||||
assert s0.shape == (10,)
|
||||
assert s.shape == (10,)
|
||||
assert len(env.unwrapped._task.goals) > 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
||||
def test_reacher_maze(maze_id):
|
||||
for inhibited in ["Fall", "Push", "Block", "Billiard"]:
|
||||
|