diff --git a/README.md b/README.md
index 5d921e5..21d2de9 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
# mujoco-maze
+[![Black](https://img.shields.io/badge/code%20style-black-000.svg)](https://github.com/psf/black)
Some maze environments for reinforcement learning(RL) using [mujoco-py] and
[openai gym][gym].
@@ -22,19 +23,26 @@ Thankfully, this project is based on the code from [rllab] and [tensorflow/mode
- PointPush/AntPush
- ![PointPush](./screenshots/PointPush.png)
+ ![PointPush](./screenshots/AntPush.png)
- PointPush-v0/AntPush-v0 (Distance-based Reward)
- PointPush-v1/AntPush-v1 (Goal-based Reward)
- PointFall/AntFall
- ![PointFall](./screenshots/PointFall.png)
+ ![PointFall](./screenshots/AntFall.png)
- PointFall-v0/AntFall-v0 (Distance-based Reward)
- PointFall-v1/AntFall-v1 (Goal-based Reward)
+- PointBilliard
+
+ ![PointBilliard](./screenshots/PointBilliard.png)
+ - PointBilliard-v0 (Distance-based Reward)
+ - PointBilliard-v1 (Goal-based Reward)
+ - PointBilliard-v2 (Multiple Goals (0.5 pt or 1.0 pt))
+
## Warning
-This project has some other environments (e.g., billiard, reacher, and
-swimmer) but if they are not on README, they are work in progress and
+This project has some other environments (e.g., reacher, and swimmer)
+but if they are not on README, they are work in progress and
not tested well.
## License
diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py
index b468a1c..65f18c5 100644
--- a/mujoco_maze/__init__.py
+++ b/mujoco_maze/__init__.py
@@ -16,69 +16,66 @@ from mujoco_maze.swimmer import SwimmerEnv
for maze_id in TaskRegistry.keys():
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
- # Point
- gym.envs.register(
- id=f"Point{maze_id}-v{i}",
- entry_point="mujoco_maze.maze_env:MazeEnv",
- kwargs=dict(
- model_cls=PointEnv,
- maze_task=task_cls,
- maze_size_scaling=task_cls.MAZE_SIZE_SCALING.point,
- inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
- ),
- max_episode_steps=1000,
- reward_threshold=task_cls.REWARD_THRESHOLD,
- )
- if "Billiard" in maze_id:
- continue
- # Ant
- gym.envs.register(
- id=f"Ant{maze_id}-v{i}",
- entry_point="mujoco_maze.maze_env:MazeEnv",
- kwargs=dict(
- model_cls=AntEnv,
- maze_task=task_cls,
- maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
- inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
- ),
- max_episode_steps=1000,
- reward_threshold=task_cls.REWARD_THRESHOLD,
- )
- skip_swimmer = False
- for inhibited in ["Fall", "Push", "Block"]:
- if inhibited in maze_id:
- skip_swimmer = True
+ point_scale = task_cls.MAZE_SIZE_SCALING.point
+ if point_scale is not None:
+ # Point
+ gym.envs.register(
+ id=f"Point{maze_id}-v{i}",
+ entry_point="mujoco_maze.maze_env:MazeEnv",
+ kwargs=dict(
+ model_cls=PointEnv,
+ maze_task=task_cls,
+ maze_size_scaling=point_scale,
+ inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
+ ),
+ max_episode_steps=1000,
+ reward_threshold=task_cls.REWARD_THRESHOLD,
+ )
- if skip_swimmer:
- continue
+ ant_scale = task_cls.MAZE_SIZE_SCALING.ant
+ if ant_scale is not None:
+ # Ant
+ gym.envs.register(
+ id=f"Ant{maze_id}-v{i}",
+ entry_point="mujoco_maze.maze_env:MazeEnv",
+ kwargs=dict(
+ model_cls=AntEnv,
+ maze_task=task_cls,
+ maze_size_scaling=ant_scale,
+ inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
+ ),
+ max_episode_steps=1000,
+ reward_threshold=task_cls.REWARD_THRESHOLD,
+ )
- # Reacher
- gym.envs.register(
- id=f"Reacher{maze_id}-v{i}",
- entry_point="mujoco_maze.maze_env:MazeEnv",
- kwargs=dict(
- model_cls=ReacherEnv,
- maze_task=task_cls,
- maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
- inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
- ),
- max_episode_steps=1000,
- reward_threshold=task_cls.REWARD_THRESHOLD,
- )
-
- # Swimmer
- gym.envs.register(
- id=f"Swimmer{maze_id}-v{i}",
- entry_point="mujoco_maze.maze_env:MazeEnv",
- kwargs=dict(
- model_cls=SwimmerEnv,
- maze_task=task_cls,
- maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
- inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
- ),
- max_episode_steps=1000,
- reward_threshold=task_cls.REWARD_THRESHOLD,
- )
+ swimmer_scale = task_cls.MAZE_SIZE_SCALING.swimmer
+ if swimmer_scale is not None:
+ # Reacher
+ gym.envs.register(
+ id=f"Reacher{maze_id}-v{i}",
+ entry_point="mujoco_maze.maze_env:MazeEnv",
+ kwargs=dict(
+ model_cls=ReacherEnv,
+ maze_task=task_cls,
+ maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
+ inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
+ ),
+ max_episode_steps=1000,
+ reward_threshold=task_cls.REWARD_THRESHOLD,
+ )
+ # Swimmer
+ gym.envs.register(
+ id=f"Swimmer{maze_id}-v{i}",
+ entry_point="mujoco_maze.maze_env:MazeEnv",
+ kwargs=dict(
+ model_cls=SwimmerEnv,
+ maze_task=task_cls,
+ maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
+ inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
+ ),
+ max_episode_steps=1000,
+ reward_threshold=task_cls.REWARD_THRESHOLD,
+ )
__version__ = "0.1.0"
diff --git a/mujoco_maze/assets/ant.xml b/mujoco_maze/assets/ant.xml
index ac555ef..ffe156b 100755
--- a/mujoco_maze/assets/ant.xml
+++ b/mujoco_maze/assets/ant.xml
@@ -11,13 +11,13 @@
-
+
-
+
diff --git a/mujoco_maze/assets/point.xml b/mujoco_maze/assets/point.xml
index 578fce2..4c06cb1 100755
--- a/mujoco_maze/assets/point.xml
+++ b/mujoco_maze/assets/point.xml
@@ -8,7 +8,7 @@
-
+
@@ -18,8 +18,8 @@
-
-
+
+
diff --git a/mujoco_maze/assets/reacher.xml b/mujoco_maze/assets/reacher.xml
index f9acaeb..0d238c8 100644
--- a/mujoco_maze/assets/reacher.xml
+++ b/mujoco_maze/assets/reacher.xml
@@ -8,7 +8,7 @@
-
+
diff --git a/mujoco_maze/assets/swimmer.xml b/mujoco_maze/assets/swimmer.xml
index b743d85..3c6c21a 100644
--- a/mujoco_maze/assets/swimmer.xml
+++ b/mujoco_maze/assets/swimmer.xml
@@ -8,7 +8,7 @@
-
+
diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py
index 01586a3..7619c35 100644
--- a/mujoco_maze/maze_env.py
+++ b/mujoco_maze/maze_env.py
@@ -72,8 +72,9 @@ class MazeEnv(gym.Env):
self._collision = maze_env_utils.CollisionDetector(
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
)
+ # Now all object balls have size=1.0
self._objball_collision = maze_env_utils.CollisionDetector(
- structure, size_scaling, torso_x, torso_y, 0.8,
+ structure, size_scaling, torso_x, torso_y, self._task.OBJECT_BALL_SIZE,
)
else:
self._collision = None
@@ -145,7 +146,7 @@ class MazeEnv(gym.Env):
elif struct.is_object_ball():
# Movable Ball
self.object_balls.append(f"objball_{i}_{j}")
- _add_object_ball(worldbody, i, j, x, y)
+ _add_object_ball(worldbody, i, j, x, y, self._task.OBJECT_BALL_SIZE)
torso = tree.find(".//body[@name='torso']")
geoms = torso.findall(".//geom")
@@ -166,7 +167,7 @@ class MazeEnv(gym.Env):
name=f"goal_site{i}",
pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
size=f"{maze_size_scaling * 0.1}",
- rgba=goal.rbga_str(),
+ rgba=goal.rgb.rgba_str(),
)
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
@@ -385,6 +386,7 @@ class MazeEnv(gym.Env):
self.wrapped_env.set_xy(old_pos)
else:
self.wrapped_env.set_xy(pos)
+ # Do the same check for object balls
for name, old, new in zip(self.object_balls, old_objballs, new_objballs):
collision = self._objball_collision.detect(old, new)
if collision is not None:
@@ -406,20 +408,23 @@ class MazeEnv(gym.Env):
self.wrapped_env.close()
-def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None:
- body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",)
+def _add_object_ball(
+ worldbody: ET.Element, i: str, j: str, x: float, y: float, size: float
+) -> None:
+ body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0")
+ mass = 0.0001 * (size ** 3)
ET.SubElement(
body,
"geom",
type="sphere",
name=f"objball_{i}_{j}_geom",
- size="1.0", # Radius
- pos="0.0 0.0 1.0", # Z = 1.0 so that this ball can move!!
- rgba="0.1 0.1 0.7 1",
+ size=f"{size}", # Radius
+ pos=f"0.0 0.0 {size}", # Z = size so that this ball can move!!
+ rgba=maze_task.BLUE.rgba_str(),
contype="1",
conaffinity="1",
solimp="0.9 0.99 0.001",
- mass="0.0001",
+ mass=f"{mass}",
)
ET.SubElement(
body,
diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py
index eecd3b2..e953cb1 100644
--- a/mujoco_maze/maze_task.py
+++ b/mujoco_maze/maze_task.py
@@ -14,6 +14,9 @@ class Rgb(NamedTuple):
green: float
blue: float
+ def rgba_str(self) -> str:
+ return f"{self.red} {self.green} {self.blue} 1"
+
RED = Rgb(0.7, 0.1, 0.1)
GREEN = Rgb(0.1, 0.7, 0.1)
@@ -37,10 +40,6 @@ class MazeGoal:
self.threshold = threshold
self.custom_size = custom_size
- def rbga_str(self) -> str:
- r, g, b = self.rgb
- return f"{r} {g} {b} 1"
-
def neighbor(self, obs: np.ndarray) -> float:
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
@@ -49,9 +48,9 @@ class MazeGoal:
class Scaling(NamedTuple):
- ant: float
- point: float
- swimmer: float
+ ant: Optional[float]
+ point: Optional[float]
+ swimmer: Optional[float]
class MazeTask(ABC):
@@ -59,10 +58,14 @@ class MazeTask(ABC):
PENALTY: Optional[float] = None
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
INNER_REWARD_SCALING: float = 0.01
- TOP_DOWN_VIEW: bool = False
+ # For Fall/Push/BlockMaze
OBSERVE_BLOCKS: bool = False
+ # For Billiard
OBSERVE_BALLS: bool = False
+ OBJECT_BALL_SIZE: float = 1.0
+ # Unused now
PUT_SPIN_NEAR_AGENT: bool = False
+ TOP_DOWN_VIEW: bool = False
def __init__(self, scale: float) -> None:
self.goals = []
@@ -143,7 +146,7 @@ class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
class GoalRewardPush(GoalRewardUMaze):
- TOP_DOWN_VIEW = True
+ OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None:
super().__init__(scale)
@@ -166,7 +169,7 @@ class DistRewardPush(GoalRewardPush, DistRewardMixIn):
class GoalRewardFall(GoalRewardUMaze):
- TOP_DOWN_VIEW = True
+ OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None:
super().__init__(scale)
@@ -195,9 +198,9 @@ class GoalReward2Rooms(MazeTask):
PENALTY: float = -0.0001
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
- def __init__(self, scale: float) -> None:
+ def __init__(self, scale: float, goal: Tuple[int, int] = (4.0, -2.0)) -> None:
super().__init__(scale)
- self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))]
+ self.goals = [MazeGoal(np.array(goal) * scale)]
def reward(self, obs: np.ndarray) -> float:
for goal in self.goals:
@@ -210,10 +213,10 @@ class GoalReward2Rooms(MazeTask):
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
return [
[B, B, B, B, B, B, B, B],
- [B, R, E, E, E, E, E, B],
- [B, E, E, E, E, E, E, B],
- [B, B, B, B, B, E, B, B],
- [B, E, E, E, E, E, E, B],
+ [B, E, E, E, B, E, E, B],
+ [B, E, E, E, B, E, E, B],
+ [B, E, R, E, B, E, E, B],
+ [B, E, E, E, B, E, E, B],
[B, E, E, E, E, E, E, B],
[B, B, B, B, B, B, B, B],
]
@@ -224,9 +227,17 @@ class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
class SubGoal2Rooms(GoalReward2Rooms):
- def __init__(self, scale: float) -> None:
- super().__init__(scale)
- self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))
+ def __init__(
+ self,
+ scale: float,
+ primary_goal: Tuple[float, float] = (4.0, -2.0),
+ subgoals: List[Tuple[float, float]] = [(1.0, -2.0), (-1.0, 2.0)],
+ ) -> None:
+ super().__init__(scale, primary_goal)
+ for subgoal in subgoals:
+ self.goals.append(
+ MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
+ )
class GoalReward4Rooms(MazeTask):
@@ -305,7 +316,21 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
pass
+class SubGoalTRoom(GoalRewardTRoom):
+ def __init__(
+ self,
+ scale: float,
+ primary_goal: Tuple[float, float] = (2.0, -3.0),
+ subgoal: Tuple[float, float] = (-2.0, -3.0),
+ ) -> None:
+ super().__init__(scale, primary_goal)
+ self.goals.append(
+ MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
+ )
+
+
class GoalRewardBlockMaze(GoalRewardUMaze):
+ MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None)
OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None:
@@ -333,30 +358,46 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
class GoalRewardBilliard(MazeTask):
REWARD_THRESHOLD: float = 0.9
PENALTY: float = -0.0001
- MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
+ MAZE_SIZE_SCALING: Scaling = Scaling(None, 3.0, None)
OBSERVE_BALLS: bool = True
+ GOAL_SIZE: float = 0.3
- def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None:
+ def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
super().__init__(scale)
goal = np.array(goal) * scale
- self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)]
+ self.goals.append(
+ MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
+ )
+
+ def _threshold(self) -> float:
+ return self.OBJECT_BALL_SIZE + self.GOAL_SIZE
def reward(self, obs: np.ndarray) -> float:
- return 1.0 if self.termination(obs) else self.PENALTY
+ object_pos = obs[3:6]
+ for goal in self.goals:
+ if goal.neighbor(object_pos):
+ return goal.reward_scale
+ return self.PENALTY
def termination(self, obs: np.ndarray) -> bool:
- return super().termination(obs[3:6])
+ object_pos = obs[3:6]
+ for goal in self.goals:
+ if goal.neighbor(object_pos):
+ return True
+ return False
@staticmethod
def create_maze() -> List[List[MazeCell]]:
E, B = MazeCell.EMPTY, MazeCell.BLOCK
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
return [
- [B, B, B, B, B],
- [B, E, E, E, B],
- [B, E, M, E, B],
- [B, E, R, E, B],
- [B, B, B, B, B],
+ [B, B, B, B, B, B, B],
+ [B, E, E, E, E, E, B],
+ [B, E, E, E, E, E, B],
+ [B, E, E, M, E, E, B],
+ [B, E, E, R, E, E, B],
+ [B, E, E, E, E, E, B],
+ [B, B, B, B, B, B, B],
]
@@ -365,6 +406,50 @@ class DistRewardBilliard(GoalRewardBilliard):
return -self.goals[0].euc_dist(obs[3:6]) / self.scale
+class SubGoalBilliard(GoalRewardBilliard):
+ def __init__(
+ self,
+ scale: float,
+ primary_goal: Tuple[float, float] = (2.0, -3.0),
+ subgoals: List[Tuple[float, float]] = [(-2.0, -3.0), (-2.0, 1.0), (2.0, 1.0)],
+ ) -> None:
+ super().__init__(scale, primary_goal)
+ for subgoal in subgoals:
+ self.goals.append(
+ MazeGoal(
+ np.array(subgoal) * scale,
+ reward_scale=0.5,
+ rgb=GREEN,
+ threshold=self._threshold(),
+ custom_size=self.GOAL_SIZE,
+ )
+ )
+
+
+class BanditBilliard(SubGoalBilliard):
+ def __init__(
+ self,
+ scale: float,
+ primary_goal: Tuple[float, float] = (4.0, -2.0),
+ subgoals: List[Tuple[float, float]] = [(4.0, 2.0)],
+ ) -> None:
+ super().__init__(scale, primary_goal, subgoals)
+
+ @staticmethod
+ def create_maze() -> List[List[MazeCell]]:
+ E, B = MazeCell.EMPTY, MazeCell.BLOCK
+ R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
+ return [
+ [B, B, B, B, B, B, B],
+ [B, E, E, B, B, E, B],
+ [B, E, E, E, E, E, B],
+ [B, R, M, E, B, B, B],
+ [B, E, E, E, E, E, B],
+ [B, E, E, E, E, E, B],
+ [B, B, B, B, B, B, B],
+ ]
+
+
class TaskRegistry:
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
@@ -373,9 +458,14 @@ class TaskRegistry:
"Fall": [DistRewardFall, GoalRewardFall],
"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
- "TRoom": [DistRewardTRoom, GoalRewardTRoom],
+ "TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom],
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
- "Billiard": [DistRewardBilliard, GoalRewardBilliard],
+ "Billiard": [
+ DistRewardBilliard,
+ GoalRewardBilliard,
+ SubGoalBilliard,
+ BanditBilliard,
+ ],
}
@staticmethod
diff --git a/screenshots/AntFall.png b/screenshots/AntFall.png
new file mode 100644
index 0000000..344ddcb
Binary files /dev/null and b/screenshots/AntFall.png differ
diff --git a/screenshots/AntPush.png b/screenshots/AntPush.png
new file mode 100644
index 0000000..3687bb4
Binary files /dev/null and b/screenshots/AntPush.png differ
diff --git a/screenshots/Point4Rooms.png b/screenshots/Point4Rooms.png
index 41fa577..9f0c163 100644
Binary files a/screenshots/Point4Rooms.png and b/screenshots/Point4Rooms.png differ
diff --git a/screenshots/PointBilliard.png b/screenshots/PointBilliard.png
new file mode 100644
index 0000000..ec23282
Binary files /dev/null and b/screenshots/PointBilliard.png differ
diff --git a/screenshots/PointFall.png b/screenshots/PointFall.png
deleted file mode 100644
index f9ce24a..0000000
Binary files a/screenshots/PointFall.png and /dev/null differ
diff --git a/screenshots/PointPush.png b/screenshots/PointPush.png
deleted file mode 100644
index 003dc7a..0000000
Binary files a/screenshots/PointPush.png and /dev/null differ
diff --git a/screenshots/PointUMaze.png b/screenshots/PointUMaze.png
index e4cf4e0..40aa637 100644
Binary files a/screenshots/PointUMaze.png and b/screenshots/PointUMaze.png differ
diff --git a/tests/test_envs.py b/tests/test_envs.py
index d769fd8..92dc9e8 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -36,6 +36,20 @@ def test_point_maze(maze_id):
assert r < 0.0
+@pytest.mark.parametrize("maze_id", ["2Rooms", "4Rooms", "Billiard"])
+def test_subgoal_envs(maze_id):
+ env = gym.make(f"Point{maze_id}-v2")
+ s0 = env.reset()
+ s, r, _, _ = env.step(env.action_space.sample())
+ if not env.unwrapped.has_extended_obs:
+ assert s0.shape == (7,)
+ assert s.shape == (7,)
+ elif env.unwrapped._observe_balls:
+ assert s0.shape == (10,)
+ assert s.shape == (10,)
+ assert len(env.unwrapped._task.goals) > 1
+
+
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
def test_reacher_maze(maze_id):
for inhibited in ["Fall", "Push", "Block", "Billiard"]: