16
README.md
@ -1,4 +1,5 @@
|
|||||||
# mujoco-maze
|
# mujoco-maze
|
||||||
|
[![Black](https://img.shields.io/badge/code%20style-black-000.svg)](https://github.com/psf/black)
|
||||||
|
|
||||||
Some maze environments for reinforcement learning(RL) using [mujoco-py] and
|
Some maze environments for reinforcement learning(RL) using [mujoco-py] and
|
||||||
[openai gym][gym].
|
[openai gym][gym].
|
||||||
@ -22,19 +23,26 @@ Thankfully, this project is based on the code from [rllab] and [tensorflow/mode
|
|||||||
|
|
||||||
- PointPush/AntPush
|
- PointPush/AntPush
|
||||||
|
|
||||||
![PointPush](./screenshots/PointPush.png)
|
![PointPush](./screenshots/AntPush.png)
|
||||||
- PointPush-v0/AntPush-v0 (Distance-based Reward)
|
- PointPush-v0/AntPush-v0 (Distance-based Reward)
|
||||||
- PointPush-v1/AntPush-v1 (Goal-based Reward)
|
- PointPush-v1/AntPush-v1 (Goal-based Reward)
|
||||||
|
|
||||||
- PointFall/AntFall
|
- PointFall/AntFall
|
||||||
|
|
||||||
![PointFall](./screenshots/PointFall.png)
|
![PointFall](./screenshots/AntFall.png)
|
||||||
- PointFall-v0/AntFall-v0 (Distance-based Reward)
|
- PointFall-v0/AntFall-v0 (Distance-based Reward)
|
||||||
- PointFall-v1/AntFall-v1 (Goal-based Reward)
|
- PointFall-v1/AntFall-v1 (Goal-based Reward)
|
||||||
|
|
||||||
|
- PointBilliard
|
||||||
|
|
||||||
|
![PointBilliard](./screenshots/PointBilliard.png)
|
||||||
|
- PointBilliard-v0 (Distance-based Reward)
|
||||||
|
- PointBilliard-v1 (Goal-based Reward)
|
||||||
|
- PointBilliard-v2 (Multiple Goals (0.5 pt or 1.0 pt))
|
||||||
|
|
||||||
## Warning
|
## Warning
|
||||||
This project has some other environments (e.g., billiard, reacher, and
|
This project has some other environments (e.g., reacher, and swimmer)
|
||||||
swimmer) but if they are not on README, they are work in progress and
|
but if they are not on README, they are work in progress and
|
||||||
not tested well.
|
not tested well.
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
@ -16,6 +16,8 @@ from mujoco_maze.swimmer import SwimmerEnv
|
|||||||
|
|
||||||
for maze_id in TaskRegistry.keys():
|
for maze_id in TaskRegistry.keys():
|
||||||
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
||||||
|
point_scale = task_cls.MAZE_SIZE_SCALING.point
|
||||||
|
if point_scale is not None:
|
||||||
# Point
|
# Point
|
||||||
gym.envs.register(
|
gym.envs.register(
|
||||||
id=f"Point{maze_id}-v{i}",
|
id=f"Point{maze_id}-v{i}",
|
||||||
@ -23,14 +25,15 @@ for maze_id in TaskRegistry.keys():
|
|||||||
kwargs=dict(
|
kwargs=dict(
|
||||||
model_cls=PointEnv,
|
model_cls=PointEnv,
|
||||||
maze_task=task_cls,
|
maze_task=task_cls,
|
||||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.point,
|
maze_size_scaling=point_scale,
|
||||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||||
),
|
),
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
)
|
)
|
||||||
if "Billiard" in maze_id:
|
|
||||||
continue
|
ant_scale = task_cls.MAZE_SIZE_SCALING.ant
|
||||||
|
if ant_scale is not None:
|
||||||
# Ant
|
# Ant
|
||||||
gym.envs.register(
|
gym.envs.register(
|
||||||
id=f"Ant{maze_id}-v{i}",
|
id=f"Ant{maze_id}-v{i}",
|
||||||
@ -38,20 +41,15 @@ for maze_id in TaskRegistry.keys():
|
|||||||
kwargs=dict(
|
kwargs=dict(
|
||||||
model_cls=AntEnv,
|
model_cls=AntEnv,
|
||||||
maze_task=task_cls,
|
maze_task=task_cls,
|
||||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
|
maze_size_scaling=ant_scale,
|
||||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||||
),
|
),
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
)
|
)
|
||||||
skip_swimmer = False
|
|
||||||
for inhibited in ["Fall", "Push", "Block"]:
|
|
||||||
if inhibited in maze_id:
|
|
||||||
skip_swimmer = True
|
|
||||||
|
|
||||||
if skip_swimmer:
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
swimmer_scale = task_cls.MAZE_SIZE_SCALING.swimmer
|
||||||
|
if swimmer_scale is not None:
|
||||||
# Reacher
|
# Reacher
|
||||||
gym.envs.register(
|
gym.envs.register(
|
||||||
id=f"Reacher{maze_id}-v{i}",
|
id=f"Reacher{maze_id}-v{i}",
|
||||||
@ -65,7 +63,6 @@ for maze_id in TaskRegistry.keys():
|
|||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Swimmer
|
# Swimmer
|
||||||
gym.envs.register(
|
gym.envs.register(
|
||||||
id=f"Swimmer{maze_id}-v{i}",
|
id=f"Swimmer{maze_id}-v{i}",
|
||||||
|
@ -11,13 +11,13 @@
|
|||||||
<asset>
|
<asset>
|
||||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||||
<material name='geom' texture="texgeom" texuniform="true" />
|
<material name='geom' texture="texgeom" texuniform="true" />
|
||||||
</asset>
|
</asset>
|
||||||
<worldbody>
|
<worldbody>
|
||||||
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
|
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
|
||||||
<geom name='floor' pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
|
<geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
|
||||||
<body name="torso" pos="0 0 0.75">
|
<body name="torso" pos="0 0 0.75">
|
||||||
<geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
|
<geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
|
||||||
<joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
|
<joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
<asset>
|
<asset>
|
||||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||||
<material name="MatPlane" texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5" />
|
<material name="MatPlane" texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5" />
|
||||||
<material name="geom" texture="texgeom" texuniform="true" />
|
<material name="geom" texture="texgeom" texuniform="true" />
|
||||||
</asset>
|
</asset>
|
||||||
@ -18,8 +18,8 @@
|
|||||||
<!-- ================= Point ================= /-->
|
<!-- ================= Point ================= /-->
|
||||||
<!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
|
<!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
|
||||||
<body name="torso" pos="0 0 0">
|
<body name="torso" pos="0 0 0">
|
||||||
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
|
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
|
||||||
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />
|
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
|
||||||
<joint name="ballx" type="slide" axis="1 0 0" pos="0 0 0" />
|
<joint name="ballx" type="slide" axis="1 0 0" pos="0 0 0" />
|
||||||
<joint name="bally" type="slide" axis="0 1 0" pos="0 0 0" />
|
<joint name="bally" type="slide" axis="0 1 0" pos="0 0 0" />
|
||||||
<joint name="rot" type="hinge" axis="0 0 1" pos="0 0 0" limited="false" />
|
<joint name="rot" type="hinge" axis="0 0 1" pos="0 0 0" limited="false" />
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
<asset>
|
<asset>
|
||||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||||
<material name='geom' texture="texgeom" texuniform="true" />
|
<material name='geom' texture="texgeom" texuniform="true" />
|
||||||
</asset>
|
</asset>
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
<asset>
|
<asset>
|
||||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
|
||||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||||
<material name='geom' texture="texgeom" texuniform="true" />
|
<material name='geom' texture="texgeom" texuniform="true" />
|
||||||
</asset>
|
</asset>
|
||||||
|
@ -72,8 +72,9 @@ class MazeEnv(gym.Env):
|
|||||||
self._collision = maze_env_utils.CollisionDetector(
|
self._collision = maze_env_utils.CollisionDetector(
|
||||||
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
|
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
|
||||||
)
|
)
|
||||||
|
# Now all object balls have size=1.0
|
||||||
self._objball_collision = maze_env_utils.CollisionDetector(
|
self._objball_collision = maze_env_utils.CollisionDetector(
|
||||||
structure, size_scaling, torso_x, torso_y, 0.8,
|
structure, size_scaling, torso_x, torso_y, self._task.OBJECT_BALL_SIZE,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self._collision = None
|
self._collision = None
|
||||||
@ -145,7 +146,7 @@ class MazeEnv(gym.Env):
|
|||||||
elif struct.is_object_ball():
|
elif struct.is_object_ball():
|
||||||
# Movable Ball
|
# Movable Ball
|
||||||
self.object_balls.append(f"objball_{i}_{j}")
|
self.object_balls.append(f"objball_{i}_{j}")
|
||||||
_add_object_ball(worldbody, i, j, x, y)
|
_add_object_ball(worldbody, i, j, x, y, self._task.OBJECT_BALL_SIZE)
|
||||||
|
|
||||||
torso = tree.find(".//body[@name='torso']")
|
torso = tree.find(".//body[@name='torso']")
|
||||||
geoms = torso.findall(".//geom")
|
geoms = torso.findall(".//geom")
|
||||||
@ -166,7 +167,7 @@ class MazeEnv(gym.Env):
|
|||||||
name=f"goal_site{i}",
|
name=f"goal_site{i}",
|
||||||
pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
|
pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
|
||||||
size=f"{maze_size_scaling * 0.1}",
|
size=f"{maze_size_scaling * 0.1}",
|
||||||
rgba=goal.rbga_str(),
|
rgba=goal.rgb.rgba_str(),
|
||||||
)
|
)
|
||||||
|
|
||||||
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
|
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
|
||||||
@ -385,6 +386,7 @@ class MazeEnv(gym.Env):
|
|||||||
self.wrapped_env.set_xy(old_pos)
|
self.wrapped_env.set_xy(old_pos)
|
||||||
else:
|
else:
|
||||||
self.wrapped_env.set_xy(pos)
|
self.wrapped_env.set_xy(pos)
|
||||||
|
# Do the same check for object balls
|
||||||
for name, old, new in zip(self.object_balls, old_objballs, new_objballs):
|
for name, old, new in zip(self.object_balls, old_objballs, new_objballs):
|
||||||
collision = self._objball_collision.detect(old, new)
|
collision = self._objball_collision.detect(old, new)
|
||||||
if collision is not None:
|
if collision is not None:
|
||||||
@ -406,20 +408,23 @@ class MazeEnv(gym.Env):
|
|||||||
self.wrapped_env.close()
|
self.wrapped_env.close()
|
||||||
|
|
||||||
|
|
||||||
def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None:
|
def _add_object_ball(
|
||||||
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",)
|
worldbody: ET.Element, i: str, j: str, x: float, y: float, size: float
|
||||||
|
) -> None:
|
||||||
|
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0")
|
||||||
|
mass = 0.0001 * (size ** 3)
|
||||||
ET.SubElement(
|
ET.SubElement(
|
||||||
body,
|
body,
|
||||||
"geom",
|
"geom",
|
||||||
type="sphere",
|
type="sphere",
|
||||||
name=f"objball_{i}_{j}_geom",
|
name=f"objball_{i}_{j}_geom",
|
||||||
size="1.0", # Radius
|
size=f"{size}", # Radius
|
||||||
pos="0.0 0.0 1.0", # Z = 1.0 so that this ball can move!!
|
pos=f"0.0 0.0 {size}", # Z = size so that this ball can move!!
|
||||||
rgba="0.1 0.1 0.7 1",
|
rgba=maze_task.BLUE.rgba_str(),
|
||||||
contype="1",
|
contype="1",
|
||||||
conaffinity="1",
|
conaffinity="1",
|
||||||
solimp="0.9 0.99 0.001",
|
solimp="0.9 0.99 0.001",
|
||||||
mass="0.0001",
|
mass=f"{mass}",
|
||||||
)
|
)
|
||||||
ET.SubElement(
|
ET.SubElement(
|
||||||
body,
|
body,
|
||||||
|
@ -14,6 +14,9 @@ class Rgb(NamedTuple):
|
|||||||
green: float
|
green: float
|
||||||
blue: float
|
blue: float
|
||||||
|
|
||||||
|
def rgba_str(self) -> str:
|
||||||
|
return f"{self.red} {self.green} {self.blue} 1"
|
||||||
|
|
||||||
|
|
||||||
RED = Rgb(0.7, 0.1, 0.1)
|
RED = Rgb(0.7, 0.1, 0.1)
|
||||||
GREEN = Rgb(0.1, 0.7, 0.1)
|
GREEN = Rgb(0.1, 0.7, 0.1)
|
||||||
@ -37,10 +40,6 @@ class MazeGoal:
|
|||||||
self.threshold = threshold
|
self.threshold = threshold
|
||||||
self.custom_size = custom_size
|
self.custom_size = custom_size
|
||||||
|
|
||||||
def rbga_str(self) -> str:
|
|
||||||
r, g, b = self.rgb
|
|
||||||
return f"{r} {g} {b} 1"
|
|
||||||
|
|
||||||
def neighbor(self, obs: np.ndarray) -> float:
|
def neighbor(self, obs: np.ndarray) -> float:
|
||||||
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
|
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
|
||||||
|
|
||||||
@ -49,9 +48,9 @@ class MazeGoal:
|
|||||||
|
|
||||||
|
|
||||||
class Scaling(NamedTuple):
|
class Scaling(NamedTuple):
|
||||||
ant: float
|
ant: Optional[float]
|
||||||
point: float
|
point: Optional[float]
|
||||||
swimmer: float
|
swimmer: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
class MazeTask(ABC):
|
class MazeTask(ABC):
|
||||||
@ -59,10 +58,14 @@ class MazeTask(ABC):
|
|||||||
PENALTY: Optional[float] = None
|
PENALTY: Optional[float] = None
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
|
||||||
INNER_REWARD_SCALING: float = 0.01
|
INNER_REWARD_SCALING: float = 0.01
|
||||||
TOP_DOWN_VIEW: bool = False
|
# For Fall/Push/BlockMaze
|
||||||
OBSERVE_BLOCKS: bool = False
|
OBSERVE_BLOCKS: bool = False
|
||||||
|
# For Billiard
|
||||||
OBSERVE_BALLS: bool = False
|
OBSERVE_BALLS: bool = False
|
||||||
|
OBJECT_BALL_SIZE: float = 1.0
|
||||||
|
# Unused now
|
||||||
PUT_SPIN_NEAR_AGENT: bool = False
|
PUT_SPIN_NEAR_AGENT: bool = False
|
||||||
|
TOP_DOWN_VIEW: bool = False
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
self.goals = []
|
self.goals = []
|
||||||
@ -143,7 +146,7 @@ class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
|
|||||||
|
|
||||||
|
|
||||||
class GoalRewardPush(GoalRewardUMaze):
|
class GoalRewardPush(GoalRewardUMaze):
|
||||||
TOP_DOWN_VIEW = True
|
OBSERVE_BLOCKS: bool = True
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
@ -166,7 +169,7 @@ class DistRewardPush(GoalRewardPush, DistRewardMixIn):
|
|||||||
|
|
||||||
|
|
||||||
class GoalRewardFall(GoalRewardUMaze):
|
class GoalRewardFall(GoalRewardUMaze):
|
||||||
TOP_DOWN_VIEW = True
|
OBSERVE_BLOCKS: bool = True
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
@ -195,9 +198,9 @@ class GoalReward2Rooms(MazeTask):
|
|||||||
PENALTY: float = -0.0001
|
PENALTY: float = -0.0001
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float, goal: Tuple[int, int] = (4.0, -2.0)) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))]
|
self.goals = [MazeGoal(np.array(goal) * scale)]
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
def reward(self, obs: np.ndarray) -> float:
|
||||||
for goal in self.goals:
|
for goal in self.goals:
|
||||||
@ -210,10 +213,10 @@ class GoalReward2Rooms(MazeTask):
|
|||||||
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
|
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
|
||||||
return [
|
return [
|
||||||
[B, B, B, B, B, B, B, B],
|
[B, B, B, B, B, B, B, B],
|
||||||
[B, R, E, E, E, E, E, B],
|
[B, E, E, E, B, E, E, B],
|
||||||
[B, E, E, E, E, E, E, B],
|
[B, E, E, E, B, E, E, B],
|
||||||
[B, B, B, B, B, E, B, B],
|
[B, E, R, E, B, E, E, B],
|
||||||
[B, E, E, E, E, E, E, B],
|
[B, E, E, E, B, E, E, B],
|
||||||
[B, E, E, E, E, E, E, B],
|
[B, E, E, E, E, E, E, B],
|
||||||
[B, B, B, B, B, B, B, B],
|
[B, B, B, B, B, B, B, B],
|
||||||
]
|
]
|
||||||
@ -224,9 +227,17 @@ class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
|
|||||||
|
|
||||||
|
|
||||||
class SubGoal2Rooms(GoalReward2Rooms):
|
class SubGoal2Rooms(GoalReward2Rooms):
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(
|
||||||
super().__init__(scale)
|
self,
|
||||||
self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))
|
scale: float,
|
||||||
|
primary_goal: Tuple[float, float] = (4.0, -2.0),
|
||||||
|
subgoals: List[Tuple[float, float]] = [(1.0, -2.0), (-1.0, 2.0)],
|
||||||
|
) -> None:
|
||||||
|
super().__init__(scale, primary_goal)
|
||||||
|
for subgoal in subgoals:
|
||||||
|
self.goals.append(
|
||||||
|
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class GoalReward4Rooms(MazeTask):
|
class GoalReward4Rooms(MazeTask):
|
||||||
@ -305,7 +316,21 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SubGoalTRoom(GoalRewardTRoom):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
scale: float,
|
||||||
|
primary_goal: Tuple[float, float] = (2.0, -3.0),
|
||||||
|
subgoal: Tuple[float, float] = (-2.0, -3.0),
|
||||||
|
) -> None:
|
||||||
|
super().__init__(scale, primary_goal)
|
||||||
|
self.goals.append(
|
||||||
|
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class GoalRewardBlockMaze(GoalRewardUMaze):
|
class GoalRewardBlockMaze(GoalRewardUMaze):
|
||||||
|
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None)
|
||||||
OBSERVE_BLOCKS: bool = True
|
OBSERVE_BLOCKS: bool = True
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
@ -333,30 +358,46 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
|
|||||||
class GoalRewardBilliard(MazeTask):
|
class GoalRewardBilliard(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
PENALTY: float = -0.0001
|
PENALTY: float = -0.0001
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(None, 3.0, None)
|
||||||
OBSERVE_BALLS: bool = True
|
OBSERVE_BALLS: bool = True
|
||||||
|
GOAL_SIZE: float = 0.3
|
||||||
|
|
||||||
def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None:
|
def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
goal = np.array(goal) * scale
|
goal = np.array(goal) * scale
|
||||||
self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)]
|
self.goals.append(
|
||||||
|
MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _threshold(self) -> float:
|
||||||
|
return self.OBJECT_BALL_SIZE + self.GOAL_SIZE
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
def reward(self, obs: np.ndarray) -> float:
|
||||||
return 1.0 if self.termination(obs) else self.PENALTY
|
object_pos = obs[3:6]
|
||||||
|
for goal in self.goals:
|
||||||
|
if goal.neighbor(object_pos):
|
||||||
|
return goal.reward_scale
|
||||||
|
return self.PENALTY
|
||||||
|
|
||||||
def termination(self, obs: np.ndarray) -> bool:
|
def termination(self, obs: np.ndarray) -> bool:
|
||||||
return super().termination(obs[3:6])
|
object_pos = obs[3:6]
|
||||||
|
for goal in self.goals:
|
||||||
|
if goal.neighbor(object_pos):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_maze() -> List[List[MazeCell]]:
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
||||||
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
|
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
|
||||||
return [
|
return [
|
||||||
[B, B, B, B, B],
|
[B, B, B, B, B, B, B],
|
||||||
[B, E, E, E, B],
|
[B, E, E, E, E, E, B],
|
||||||
[B, E, M, E, B],
|
[B, E, E, E, E, E, B],
|
||||||
[B, E, R, E, B],
|
[B, E, E, M, E, E, B],
|
||||||
[B, B, B, B, B],
|
[B, E, E, R, E, E, B],
|
||||||
|
[B, E, E, E, E, E, B],
|
||||||
|
[B, B, B, B, B, B, B],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -365,6 +406,50 @@ class DistRewardBilliard(GoalRewardBilliard):
|
|||||||
return -self.goals[0].euc_dist(obs[3:6]) / self.scale
|
return -self.goals[0].euc_dist(obs[3:6]) / self.scale
|
||||||
|
|
||||||
|
|
||||||
|
class SubGoalBilliard(GoalRewardBilliard):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
scale: float,
|
||||||
|
primary_goal: Tuple[float, float] = (2.0, -3.0),
|
||||||
|
subgoals: List[Tuple[float, float]] = [(-2.0, -3.0), (-2.0, 1.0), (2.0, 1.0)],
|
||||||
|
) -> None:
|
||||||
|
super().__init__(scale, primary_goal)
|
||||||
|
for subgoal in subgoals:
|
||||||
|
self.goals.append(
|
||||||
|
MazeGoal(
|
||||||
|
np.array(subgoal) * scale,
|
||||||
|
reward_scale=0.5,
|
||||||
|
rgb=GREEN,
|
||||||
|
threshold=self._threshold(),
|
||||||
|
custom_size=self.GOAL_SIZE,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BanditBilliard(SubGoalBilliard):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
scale: float,
|
||||||
|
primary_goal: Tuple[float, float] = (4.0, -2.0),
|
||||||
|
subgoals: List[Tuple[float, float]] = [(4.0, 2.0)],
|
||||||
|
) -> None:
|
||||||
|
super().__init__(scale, primary_goal, subgoals)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
|
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
||||||
|
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
|
||||||
|
return [
|
||||||
|
[B, B, B, B, B, B, B],
|
||||||
|
[B, E, E, B, B, E, B],
|
||||||
|
[B, E, E, E, E, E, B],
|
||||||
|
[B, R, M, E, B, B, B],
|
||||||
|
[B, E, E, E, E, E, B],
|
||||||
|
[B, E, E, E, E, E, B],
|
||||||
|
[B, B, B, B, B, B, B],
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class TaskRegistry:
|
class TaskRegistry:
|
||||||
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
||||||
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
|
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
|
||||||
@ -373,9 +458,14 @@ class TaskRegistry:
|
|||||||
"Fall": [DistRewardFall, GoalRewardFall],
|
"Fall": [DistRewardFall, GoalRewardFall],
|
||||||
"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
|
"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
|
||||||
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
||||||
"TRoom": [DistRewardTRoom, GoalRewardTRoom],
|
"TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom],
|
||||||
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
|
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
|
||||||
"Billiard": [DistRewardBilliard, GoalRewardBilliard],
|
"Billiard": [
|
||||||
|
DistRewardBilliard,
|
||||||
|
GoalRewardBilliard,
|
||||||
|
SubGoalBilliard,
|
||||||
|
BanditBilliard,
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
BIN
screenshots/AntFall.png
Normal file
After Width: | Height: | Size: 36 KiB |
BIN
screenshots/AntPush.png
Normal file
After Width: | Height: | Size: 55 KiB |
Before Width: | Height: | Size: 93 KiB After Width: | Height: | Size: 30 KiB |
BIN
screenshots/PointBilliard.png
Normal file
After Width: | Height: | Size: 35 KiB |
Before Width: | Height: | Size: 30 KiB |
Before Width: | Height: | Size: 41 KiB |
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 28 KiB |
@ -36,6 +36,20 @@ def test_point_maze(maze_id):
|
|||||||
assert r < 0.0
|
assert r < 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("maze_id", ["2Rooms", "4Rooms", "Billiard"])
|
||||||
|
def test_subgoal_envs(maze_id):
|
||||||
|
env = gym.make(f"Point{maze_id}-v2")
|
||||||
|
s0 = env.reset()
|
||||||
|
s, r, _, _ = env.step(env.action_space.sample())
|
||||||
|
if not env.unwrapped.has_extended_obs:
|
||||||
|
assert s0.shape == (7,)
|
||||||
|
assert s.shape == (7,)
|
||||||
|
elif env.unwrapped._observe_balls:
|
||||||
|
assert s0.shape == (10,)
|
||||||
|
assert s.shape == (10,)
|
||||||
|
assert len(env.unwrapped._task.goals) > 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
||||||
def test_reacher_maze(maze_id):
|
def test_reacher_maze(maze_id):
|
||||||
for inhibited in ["Fall", "Push", "Block", "Billiard"]:
|
for inhibited in ["Fall", "Push", "Block", "Billiard"]:
|
||||||
|