Merge pull request #3 from kngwyu/ball

Billiard
This commit is contained in:
Yuji Kanagawa 2020-10-05 14:10:36 +09:00 committed by GitHub
commit 2d1cfe81de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 226 additions and 112 deletions

View File

@ -1,4 +1,5 @@
# mujoco-maze # mujoco-maze
[![Black](https://img.shields.io/badge/code%20style-black-000.svg)](https://github.com/psf/black)
Some maze environments for reinforcement learning(RL) using [mujoco-py] and Some maze environments for reinforcement learning(RL) using [mujoco-py] and
[openai gym][gym]. [openai gym][gym].
@ -22,19 +23,26 @@ Thankfully, this project is based on the code from [rllab] and [tensorflow/mode
- PointPush/AntPush - PointPush/AntPush
![PointPush](./screenshots/PointPush.png) ![PointPush](./screenshots/AntPush.png)
- PointPush-v0/AntPush-v0 (Distance-based Reward) - PointPush-v0/AntPush-v0 (Distance-based Reward)
- PointPush-v1/AntPush-v1 (Goal-based Reward) - PointPush-v1/AntPush-v1 (Goal-based Reward)
- PointFall/AntFall - PointFall/AntFall
![PointFall](./screenshots/PointFall.png) ![PointFall](./screenshots/AntFall.png)
- PointFall-v0/AntFall-v0 (Distance-based Reward) - PointFall-v0/AntFall-v0 (Distance-based Reward)
- PointFall-v1/AntFall-v1 (Goal-based Reward) - PointFall-v1/AntFall-v1 (Goal-based Reward)
- PointBilliard
![PointBilliard](./screenshots/PointBilliard.png)
- PointBilliard-v0 (Distance-based Reward)
- PointBilliard-v1 (Goal-based Reward)
- PointBilliard-v2 (Multiple Goals (0.5 pt or 1.0 pt))
## Warning ## Warning
This project has some other environments (e.g., billiard, reacher, and This project has some other environments (e.g., reacher, and swimmer)
swimmer) but if they are not on README, they are work in progress and but if they are not on README, they are work in progress and
not tested well. not tested well.
## License ## License

View File

@ -16,69 +16,66 @@ from mujoco_maze.swimmer import SwimmerEnv
for maze_id in TaskRegistry.keys(): for maze_id in TaskRegistry.keys():
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)): for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
# Point point_scale = task_cls.MAZE_SIZE_SCALING.point
gym.envs.register( if point_scale is not None:
id=f"Point{maze_id}-v{i}", # Point
entry_point="mujoco_maze.maze_env:MazeEnv", gym.envs.register(
kwargs=dict( id=f"Point{maze_id}-v{i}",
model_cls=PointEnv, entry_point="mujoco_maze.maze_env:MazeEnv",
maze_task=task_cls, kwargs=dict(
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.point, model_cls=PointEnv,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING, maze_task=task_cls,
), maze_size_scaling=point_scale,
max_episode_steps=1000, inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
reward_threshold=task_cls.REWARD_THRESHOLD, ),
) max_episode_steps=1000,
if "Billiard" in maze_id: reward_threshold=task_cls.REWARD_THRESHOLD,
continue )
# Ant
gym.envs.register(
id=f"Ant{maze_id}-v{i}",
entry_point="mujoco_maze.maze_env:MazeEnv",
kwargs=dict(
model_cls=AntEnv,
maze_task=task_cls,
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
),
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
skip_swimmer = False
for inhibited in ["Fall", "Push", "Block"]:
if inhibited in maze_id:
skip_swimmer = True
if skip_swimmer: ant_scale = task_cls.MAZE_SIZE_SCALING.ant
continue if ant_scale is not None:
# Ant
gym.envs.register(
id=f"Ant{maze_id}-v{i}",
entry_point="mujoco_maze.maze_env:MazeEnv",
kwargs=dict(
model_cls=AntEnv,
maze_task=task_cls,
maze_size_scaling=ant_scale,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
),
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
# Reacher swimmer_scale = task_cls.MAZE_SIZE_SCALING.swimmer
gym.envs.register( if swimmer_scale is not None:
id=f"Reacher{maze_id}-v{i}", # Reacher
entry_point="mujoco_maze.maze_env:MazeEnv", gym.envs.register(
kwargs=dict( id=f"Reacher{maze_id}-v{i}",
model_cls=ReacherEnv, entry_point="mujoco_maze.maze_env:MazeEnv",
maze_task=task_cls, kwargs=dict(
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer, model_cls=ReacherEnv,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING, maze_task=task_cls,
), maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
max_episode_steps=1000, inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
reward_threshold=task_cls.REWARD_THRESHOLD, ),
) max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
# Swimmer )
gym.envs.register( # Swimmer
id=f"Swimmer{maze_id}-v{i}", gym.envs.register(
entry_point="mujoco_maze.maze_env:MazeEnv", id=f"Swimmer{maze_id}-v{i}",
kwargs=dict( entry_point="mujoco_maze.maze_env:MazeEnv",
model_cls=SwimmerEnv, kwargs=dict(
maze_task=task_cls, model_cls=SwimmerEnv,
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer, maze_task=task_cls,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING, maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
), inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
max_episode_steps=1000, ),
reward_threshold=task_cls.REWARD_THRESHOLD, max_episode_steps=1000,
) reward_threshold=task_cls.REWARD_THRESHOLD,
)
__version__ = "0.1.0" __version__ = "0.1.0"

View File

@ -11,13 +11,13 @@
<asset> <asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" /> <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" /> <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" /> <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" /> <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" /> <material name='geom' texture="texgeom" texuniform="true" />
</asset> </asset>
<worldbody> <worldbody>
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" /> <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
<geom name='floor' pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' /> <geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
<body name="torso" pos="0 0 0.75"> <body name="torso" pos="0 0 0.75">
<geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" /> <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
<joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" /> <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />

View File

@ -8,7 +8,7 @@
<asset> <asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" /> <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" /> <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" /> <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name="MatPlane" texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5" /> <material name="MatPlane" texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5" />
<material name="geom" texture="texgeom" texuniform="true" /> <material name="geom" texture="texgeom" texuniform="true" />
</asset> </asset>
@ -18,8 +18,8 @@
<!-- ================= Point ================= /--> <!-- ================= Point ================= /-->
<!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /--> <!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
<body name="torso" pos="0 0 0"> <body name="torso" pos="0 0 0">
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" /> <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" /> <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
<joint name="ballx" type="slide" axis="1 0 0" pos="0 0 0" /> <joint name="ballx" type="slide" axis="1 0 0" pos="0 0 0" />
<joint name="bally" type="slide" axis="0 1 0" pos="0 0 0" /> <joint name="bally" type="slide" axis="0 1 0" pos="0 0 0" />
<joint name="rot" type="hinge" axis="0 0 1" pos="0 0 0" limited="false" /> <joint name="rot" type="hinge" axis="0 0 1" pos="0 0 0" limited="false" />

View File

@ -8,7 +8,7 @@
<asset> <asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" /> <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" /> <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" /> <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" /> <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" /> <material name='geom' texture="texgeom" texuniform="true" />
</asset> </asset>

View File

@ -8,7 +8,7 @@
<asset> <asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" /> <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" /> <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" /> <texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" /> <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" /> <material name='geom' texture="texgeom" texuniform="true" />
</asset> </asset>

View File

@ -72,8 +72,9 @@ class MazeEnv(gym.Env):
self._collision = maze_env_utils.CollisionDetector( self._collision = maze_env_utils.CollisionDetector(
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS, structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
) )
# Now all object balls have size=1.0
self._objball_collision = maze_env_utils.CollisionDetector( self._objball_collision = maze_env_utils.CollisionDetector(
structure, size_scaling, torso_x, torso_y, 0.8, structure, size_scaling, torso_x, torso_y, self._task.OBJECT_BALL_SIZE,
) )
else: else:
self._collision = None self._collision = None
@ -145,7 +146,7 @@ class MazeEnv(gym.Env):
elif struct.is_object_ball(): elif struct.is_object_ball():
# Movable Ball # Movable Ball
self.object_balls.append(f"objball_{i}_{j}") self.object_balls.append(f"objball_{i}_{j}")
_add_object_ball(worldbody, i, j, x, y) _add_object_ball(worldbody, i, j, x, y, self._task.OBJECT_BALL_SIZE)
torso = tree.find(".//body[@name='torso']") torso = tree.find(".//body[@name='torso']")
geoms = torso.findall(".//geom") geoms = torso.findall(".//geom")
@ -166,7 +167,7 @@ class MazeEnv(gym.Env):
name=f"goal_site{i}", name=f"goal_site{i}",
pos=f"{goal.pos[0]} {goal.pos[1]} {z}", pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
size=f"{maze_size_scaling * 0.1}", size=f"{maze_size_scaling * 0.1}",
rgba=goal.rbga_str(), rgba=goal.rgb.rgba_str(),
) )
_, file_path = tempfile.mkstemp(text=True, suffix=".xml") _, file_path = tempfile.mkstemp(text=True, suffix=".xml")
@ -385,6 +386,7 @@ class MazeEnv(gym.Env):
self.wrapped_env.set_xy(old_pos) self.wrapped_env.set_xy(old_pos)
else: else:
self.wrapped_env.set_xy(pos) self.wrapped_env.set_xy(pos)
# Do the same check for object balls
for name, old, new in zip(self.object_balls, old_objballs, new_objballs): for name, old, new in zip(self.object_balls, old_objballs, new_objballs):
collision = self._objball_collision.detect(old, new) collision = self._objball_collision.detect(old, new)
if collision is not None: if collision is not None:
@ -406,20 +408,23 @@ class MazeEnv(gym.Env):
self.wrapped_env.close() self.wrapped_env.close()
def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None: def _add_object_ball(
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",) worldbody: ET.Element, i: str, j: str, x: float, y: float, size: float
) -> None:
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0")
mass = 0.0001 * (size ** 3)
ET.SubElement( ET.SubElement(
body, body,
"geom", "geom",
type="sphere", type="sphere",
name=f"objball_{i}_{j}_geom", name=f"objball_{i}_{j}_geom",
size="1.0", # Radius size=f"{size}", # Radius
pos="0.0 0.0 1.0", # Z = 1.0 so that this ball can move!! pos=f"0.0 0.0 {size}", # Z = size so that this ball can move!!
rgba="0.1 0.1 0.7 1", rgba=maze_task.BLUE.rgba_str(),
contype="1", contype="1",
conaffinity="1", conaffinity="1",
solimp="0.9 0.99 0.001", solimp="0.9 0.99 0.001",
mass="0.0001", mass=f"{mass}",
) )
ET.SubElement( ET.SubElement(
body, body,

View File

@ -14,6 +14,9 @@ class Rgb(NamedTuple):
green: float green: float
blue: float blue: float
def rgba_str(self) -> str:
return f"{self.red} {self.green} {self.blue} 1"
RED = Rgb(0.7, 0.1, 0.1) RED = Rgb(0.7, 0.1, 0.1)
GREEN = Rgb(0.1, 0.7, 0.1) GREEN = Rgb(0.1, 0.7, 0.1)
@ -37,10 +40,6 @@ class MazeGoal:
self.threshold = threshold self.threshold = threshold
self.custom_size = custom_size self.custom_size = custom_size
def rbga_str(self) -> str:
r, g, b = self.rgb
return f"{r} {g} {b} 1"
def neighbor(self, obs: np.ndarray) -> float: def neighbor(self, obs: np.ndarray) -> float:
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
@ -49,9 +48,9 @@ class MazeGoal:
class Scaling(NamedTuple): class Scaling(NamedTuple):
ant: float ant: Optional[float]
point: float point: Optional[float]
swimmer: float swimmer: Optional[float]
class MazeTask(ABC): class MazeTask(ABC):
@ -59,10 +58,14 @@ class MazeTask(ABC):
PENALTY: Optional[float] = None PENALTY: Optional[float] = None
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0) MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
INNER_REWARD_SCALING: float = 0.01 INNER_REWARD_SCALING: float = 0.01
TOP_DOWN_VIEW: bool = False # For Fall/Push/BlockMaze
OBSERVE_BLOCKS: bool = False OBSERVE_BLOCKS: bool = False
# For Billiard
OBSERVE_BALLS: bool = False OBSERVE_BALLS: bool = False
OBJECT_BALL_SIZE: float = 1.0
# Unused now
PUT_SPIN_NEAR_AGENT: bool = False PUT_SPIN_NEAR_AGENT: bool = False
TOP_DOWN_VIEW: bool = False
def __init__(self, scale: float) -> None: def __init__(self, scale: float) -> None:
self.goals = [] self.goals = []
@ -143,7 +146,7 @@ class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
class GoalRewardPush(GoalRewardUMaze): class GoalRewardPush(GoalRewardUMaze):
TOP_DOWN_VIEW = True OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None: def __init__(self, scale: float) -> None:
super().__init__(scale) super().__init__(scale)
@ -166,7 +169,7 @@ class DistRewardPush(GoalRewardPush, DistRewardMixIn):
class GoalRewardFall(GoalRewardUMaze): class GoalRewardFall(GoalRewardUMaze):
TOP_DOWN_VIEW = True OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None: def __init__(self, scale: float) -> None:
super().__init__(scale) super().__init__(scale)
@ -195,9 +198,9 @@ class GoalReward2Rooms(MazeTask):
PENALTY: float = -0.0001 PENALTY: float = -0.0001
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
def __init__(self, scale: float) -> None: def __init__(self, scale: float, goal: Tuple[int, int] = (4.0, -2.0)) -> None:
super().__init__(scale) super().__init__(scale)
self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))] self.goals = [MazeGoal(np.array(goal) * scale)]
def reward(self, obs: np.ndarray) -> float: def reward(self, obs: np.ndarray) -> float:
for goal in self.goals: for goal in self.goals:
@ -210,10 +213,10 @@ class GoalReward2Rooms(MazeTask):
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
return [ return [
[B, B, B, B, B, B, B, B], [B, B, B, B, B, B, B, B],
[B, R, E, E, E, E, E, B], [B, E, E, E, B, E, E, B],
[B, E, E, E, E, E, E, B], [B, E, E, E, B, E, E, B],
[B, B, B, B, B, E, B, B], [B, E, R, E, B, E, E, B],
[B, E, E, E, E, E, E, B], [B, E, E, E, B, E, E, B],
[B, E, E, E, E, E, E, B], [B, E, E, E, E, E, E, B],
[B, B, B, B, B, B, B, B], [B, B, B, B, B, B, B, B],
] ]
@ -224,9 +227,17 @@ class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
class SubGoal2Rooms(GoalReward2Rooms): class SubGoal2Rooms(GoalReward2Rooms):
def __init__(self, scale: float) -> None: def __init__(
super().__init__(scale) self,
self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN)) scale: float,
primary_goal: Tuple[float, float] = (4.0, -2.0),
subgoals: List[Tuple[float, float]] = [(1.0, -2.0), (-1.0, 2.0)],
) -> None:
super().__init__(scale, primary_goal)
for subgoal in subgoals:
self.goals.append(
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
)
class GoalReward4Rooms(MazeTask): class GoalReward4Rooms(MazeTask):
@ -305,7 +316,21 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
pass pass
class SubGoalTRoom(GoalRewardTRoom):
def __init__(
self,
scale: float,
primary_goal: Tuple[float, float] = (2.0, -3.0),
subgoal: Tuple[float, float] = (-2.0, -3.0),
) -> None:
super().__init__(scale, primary_goal)
self.goals.append(
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
)
class GoalRewardBlockMaze(GoalRewardUMaze): class GoalRewardBlockMaze(GoalRewardUMaze):
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None)
OBSERVE_BLOCKS: bool = True OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None: def __init__(self, scale: float) -> None:
@ -333,30 +358,46 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
class GoalRewardBilliard(MazeTask): class GoalRewardBilliard(MazeTask):
REWARD_THRESHOLD: float = 0.9 REWARD_THRESHOLD: float = 0.9
PENALTY: float = -0.0001 PENALTY: float = -0.0001
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0) MAZE_SIZE_SCALING: Scaling = Scaling(None, 3.0, None)
OBSERVE_BALLS: bool = True OBSERVE_BALLS: bool = True
GOAL_SIZE: float = 0.3
def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None: def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
super().__init__(scale) super().__init__(scale)
goal = np.array(goal) * scale goal = np.array(goal) * scale
self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)] self.goals.append(
MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
)
def _threshold(self) -> float:
return self.OBJECT_BALL_SIZE + self.GOAL_SIZE
def reward(self, obs: np.ndarray) -> float: def reward(self, obs: np.ndarray) -> float:
return 1.0 if self.termination(obs) else self.PENALTY object_pos = obs[3:6]
for goal in self.goals:
if goal.neighbor(object_pos):
return goal.reward_scale
return self.PENALTY
def termination(self, obs: np.ndarray) -> bool: def termination(self, obs: np.ndarray) -> bool:
return super().termination(obs[3:6]) object_pos = obs[3:6]
for goal in self.goals:
if goal.neighbor(object_pos):
return True
return False
@staticmethod @staticmethod
def create_maze() -> List[List[MazeCell]]: def create_maze() -> List[List[MazeCell]]:
E, B = MazeCell.EMPTY, MazeCell.BLOCK E, B = MazeCell.EMPTY, MazeCell.BLOCK
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
return [ return [
[B, B, B, B, B], [B, B, B, B, B, B, B],
[B, E, E, E, B], [B, E, E, E, E, E, B],
[B, E, M, E, B], [B, E, E, E, E, E, B],
[B, E, R, E, B], [B, E, E, M, E, E, B],
[B, B, B, B, B], [B, E, E, R, E, E, B],
[B, E, E, E, E, E, B],
[B, B, B, B, B, B, B],
] ]
@ -365,6 +406,50 @@ class DistRewardBilliard(GoalRewardBilliard):
return -self.goals[0].euc_dist(obs[3:6]) / self.scale return -self.goals[0].euc_dist(obs[3:6]) / self.scale
class SubGoalBilliard(GoalRewardBilliard):
def __init__(
self,
scale: float,
primary_goal: Tuple[float, float] = (2.0, -3.0),
subgoals: List[Tuple[float, float]] = [(-2.0, -3.0), (-2.0, 1.0), (2.0, 1.0)],
) -> None:
super().__init__(scale, primary_goal)
for subgoal in subgoals:
self.goals.append(
MazeGoal(
np.array(subgoal) * scale,
reward_scale=0.5,
rgb=GREEN,
threshold=self._threshold(),
custom_size=self.GOAL_SIZE,
)
)
class BanditBilliard(SubGoalBilliard):
def __init__(
self,
scale: float,
primary_goal: Tuple[float, float] = (4.0, -2.0),
subgoals: List[Tuple[float, float]] = [(4.0, 2.0)],
) -> None:
super().__init__(scale, primary_goal, subgoals)
@staticmethod
def create_maze() -> List[List[MazeCell]]:
E, B = MazeCell.EMPTY, MazeCell.BLOCK
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
return [
[B, B, B, B, B, B, B],
[B, E, E, B, B, E, B],
[B, E, E, E, E, E, B],
[B, R, M, E, B, B, B],
[B, E, E, E, E, E, B],
[B, E, E, E, E, E, B],
[B, B, B, B, B, B, B],
]
class TaskRegistry: class TaskRegistry:
REGISTRY: Dict[str, List[Type[MazeTask]]] = { REGISTRY: Dict[str, List[Type[MazeTask]]] = {
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom], "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
@ -373,9 +458,14 @@ class TaskRegistry:
"Fall": [DistRewardFall, GoalRewardFall], "Fall": [DistRewardFall, GoalRewardFall],
"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms], "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
"TRoom": [DistRewardTRoom, GoalRewardTRoom], "TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom],
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
"Billiard": [DistRewardBilliard, GoalRewardBilliard], "Billiard": [
DistRewardBilliard,
GoalRewardBilliard,
SubGoalBilliard,
BanditBilliard,
],
} }
@staticmethod @staticmethod

BIN
screenshots/AntFall.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

BIN
screenshots/AntPush.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -36,6 +36,20 @@ def test_point_maze(maze_id):
assert r < 0.0 assert r < 0.0
@pytest.mark.parametrize("maze_id", ["2Rooms", "4Rooms", "Billiard"])
def test_subgoal_envs(maze_id):
env = gym.make(f"Point{maze_id}-v2")
s0 = env.reset()
s, r, _, _ = env.step(env.action_space.sample())
if not env.unwrapped.has_extended_obs:
assert s0.shape == (7,)
assert s.shape == (7,)
elif env.unwrapped._observe_balls:
assert s0.shape == (10,)
assert s.shape == (10,)
assert len(env.unwrapped._task.goals) > 1
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
def test_reacher_maze(maze_id): def test_reacher_maze(maze_id):
for inhibited in ["Fall", "Push", "Block", "Billiard"]: for inhibited in ["Fall", "Push", "Block", "Billiard"]: