Merge pull request #3 from kngwyu/ball

Billiard
This commit is contained in:
Yuji Kanagawa 2020-10-05 14:10:36 +09:00 committed by GitHub
commit 2d1cfe81de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 226 additions and 112 deletions

View File

@ -1,4 +1,5 @@
# mujoco-maze
[![Black](https://img.shields.io/badge/code%20style-black-000.svg)](https://github.com/psf/black)
Some maze environments for reinforcement learning(RL) using [mujoco-py] and
[openai gym][gym].
@ -22,19 +23,26 @@ Thankfully, this project is based on the code from [rllab] and [tensorflow/mode
- PointPush/AntPush
![PointPush](./screenshots/PointPush.png)
![PointPush](./screenshots/AntPush.png)
- PointPush-v0/AntPush-v0 (Distance-based Reward)
- PointPush-v1/AntPush-v1 (Goal-based Reward)
- PointFall/AntFall
![PointFall](./screenshots/PointFall.png)
![PointFall](./screenshots/AntFall.png)
- PointFall-v0/AntFall-v0 (Distance-based Reward)
- PointFall-v1/AntFall-v1 (Goal-based Reward)
- PointBilliard
![PointBilliard](./screenshots/PointBilliard.png)
- PointBilliard-v0 (Distance-based Reward)
- PointBilliard-v1 (Goal-based Reward)
- PointBilliard-v2 (Multiple Goals (0.5 pt or 1.0 pt))
## Warning
This project has some other environments (e.g., billiard, reacher, and
swimmer) but if they are not on README, they are work in progress and
This project has some other environments (e.g., reacher, and swimmer)
but if they are not on README, they are work in progress and
not tested well.
## License

View File

@ -16,6 +16,8 @@ from mujoco_maze.swimmer import SwimmerEnv
for maze_id in TaskRegistry.keys():
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
point_scale = task_cls.MAZE_SIZE_SCALING.point
if point_scale is not None:
# Point
gym.envs.register(
id=f"Point{maze_id}-v{i}",
@ -23,14 +25,15 @@ for maze_id in TaskRegistry.keys():
kwargs=dict(
model_cls=PointEnv,
maze_task=task_cls,
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.point,
maze_size_scaling=point_scale,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
),
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
if "Billiard" in maze_id:
continue
ant_scale = task_cls.MAZE_SIZE_SCALING.ant
if ant_scale is not None:
# Ant
gym.envs.register(
id=f"Ant{maze_id}-v{i}",
@ -38,20 +41,15 @@ for maze_id in TaskRegistry.keys():
kwargs=dict(
model_cls=AntEnv,
maze_task=task_cls,
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
maze_size_scaling=ant_scale,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
),
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
skip_swimmer = False
for inhibited in ["Fall", "Push", "Block"]:
if inhibited in maze_id:
skip_swimmer = True
if skip_swimmer:
continue
swimmer_scale = task_cls.MAZE_SIZE_SCALING.swimmer
if swimmer_scale is not None:
# Reacher
gym.envs.register(
id=f"Reacher{maze_id}-v{i}",
@ -65,7 +63,6 @@ for maze_id in TaskRegistry.keys():
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
# Swimmer
gym.envs.register(
id=f"Swimmer{maze_id}-v{i}",

View File

@ -11,13 +11,13 @@
<asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" />
</asset>
<worldbody>
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
<geom name='floor' pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
<geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
<body name="torso" pos="0 0 0.75">
<geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
<joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />

View File

@ -8,7 +8,7 @@
<asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name="MatPlane" texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5" />
<material name="geom" texture="texgeom" texuniform="true" />
</asset>
@ -18,8 +18,8 @@
<!-- ================= Point ================= /-->
<!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
<body name="torso" pos="0 0 0">
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" rgba="0.8 0.4 0.1 1" solimp="0.9 0.99 0.001" />
<joint name="ballx" type="slide" axis="1 0 0" pos="0 0 0" />
<joint name="bally" type="slide" axis="0 1 0" pos="0 0 0" />
<joint name="rot" type="hinge" axis="0 0 1" pos="0 0 0" limited="false" />

View File

@ -8,7 +8,7 @@
<asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" />
</asset>

View File

@ -8,7 +8,7 @@
<asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" />
</asset>

View File

@ -72,8 +72,9 @@ class MazeEnv(gym.Env):
self._collision = maze_env_utils.CollisionDetector(
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
)
# Now all object balls have size=1.0
self._objball_collision = maze_env_utils.CollisionDetector(
structure, size_scaling, torso_x, torso_y, 0.8,
structure, size_scaling, torso_x, torso_y, self._task.OBJECT_BALL_SIZE,
)
else:
self._collision = None
@ -145,7 +146,7 @@ class MazeEnv(gym.Env):
elif struct.is_object_ball():
# Movable Ball
self.object_balls.append(f"objball_{i}_{j}")
_add_object_ball(worldbody, i, j, x, y)
_add_object_ball(worldbody, i, j, x, y, self._task.OBJECT_BALL_SIZE)
torso = tree.find(".//body[@name='torso']")
geoms = torso.findall(".//geom")
@ -166,7 +167,7 @@ class MazeEnv(gym.Env):
name=f"goal_site{i}",
pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
size=f"{maze_size_scaling * 0.1}",
rgba=goal.rbga_str(),
rgba=goal.rgb.rgba_str(),
)
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
@ -385,6 +386,7 @@ class MazeEnv(gym.Env):
self.wrapped_env.set_xy(old_pos)
else:
self.wrapped_env.set_xy(pos)
# Do the same check for object balls
for name, old, new in zip(self.object_balls, old_objballs, new_objballs):
collision = self._objball_collision.detect(old, new)
if collision is not None:
@ -406,20 +408,23 @@ class MazeEnv(gym.Env):
self.wrapped_env.close()
def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None:
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",)
def _add_object_ball(
worldbody: ET.Element, i: str, j: str, x: float, y: float, size: float
) -> None:
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0")
mass = 0.0001 * (size ** 3)
ET.SubElement(
body,
"geom",
type="sphere",
name=f"objball_{i}_{j}_geom",
size="1.0", # Radius
pos="0.0 0.0 1.0", # Z = 1.0 so that this ball can move!!
rgba="0.1 0.1 0.7 1",
size=f"{size}", # Radius
pos=f"0.0 0.0 {size}", # Z = size so that this ball can move!!
rgba=maze_task.BLUE.rgba_str(),
contype="1",
conaffinity="1",
solimp="0.9 0.99 0.001",
mass="0.0001",
mass=f"{mass}",
)
ET.SubElement(
body,

View File

@ -14,6 +14,9 @@ class Rgb(NamedTuple):
green: float
blue: float
def rgba_str(self) -> str:
return f"{self.red} {self.green} {self.blue} 1"
RED = Rgb(0.7, 0.1, 0.1)
GREEN = Rgb(0.1, 0.7, 0.1)
@ -37,10 +40,6 @@ class MazeGoal:
self.threshold = threshold
self.custom_size = custom_size
def rbga_str(self) -> str:
r, g, b = self.rgb
return f"{r} {g} {b} 1"
def neighbor(self, obs: np.ndarray) -> float:
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
@ -49,9 +48,9 @@ class MazeGoal:
class Scaling(NamedTuple):
ant: float
point: float
swimmer: float
ant: Optional[float]
point: Optional[float]
swimmer: Optional[float]
class MazeTask(ABC):
@ -59,10 +58,14 @@ class MazeTask(ABC):
PENALTY: Optional[float] = None
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
INNER_REWARD_SCALING: float = 0.01
TOP_DOWN_VIEW: bool = False
# For Fall/Push/BlockMaze
OBSERVE_BLOCKS: bool = False
# For Billiard
OBSERVE_BALLS: bool = False
OBJECT_BALL_SIZE: float = 1.0
# Unused now
PUT_SPIN_NEAR_AGENT: bool = False
TOP_DOWN_VIEW: bool = False
def __init__(self, scale: float) -> None:
self.goals = []
@ -143,7 +146,7 @@ class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
class GoalRewardPush(GoalRewardUMaze):
TOP_DOWN_VIEW = True
OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None:
super().__init__(scale)
@ -166,7 +169,7 @@ class DistRewardPush(GoalRewardPush, DistRewardMixIn):
class GoalRewardFall(GoalRewardUMaze):
TOP_DOWN_VIEW = True
OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None:
super().__init__(scale)
@ -195,9 +198,9 @@ class GoalReward2Rooms(MazeTask):
PENALTY: float = -0.0001
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
def __init__(self, scale: float) -> None:
def __init__(self, scale: float, goal: Tuple[int, int] = (4.0, -2.0)) -> None:
super().__init__(scale)
self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))]
self.goals = [MazeGoal(np.array(goal) * scale)]
def reward(self, obs: np.ndarray) -> float:
for goal in self.goals:
@ -210,10 +213,10 @@ class GoalReward2Rooms(MazeTask):
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
return [
[B, B, B, B, B, B, B, B],
[B, R, E, E, E, E, E, B],
[B, E, E, E, E, E, E, B],
[B, B, B, B, B, E, B, B],
[B, E, E, E, E, E, E, B],
[B, E, E, E, B, E, E, B],
[B, E, E, E, B, E, E, B],
[B, E, R, E, B, E, E, B],
[B, E, E, E, B, E, E, B],
[B, E, E, E, E, E, E, B],
[B, B, B, B, B, B, B, B],
]
@ -224,9 +227,17 @@ class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
class SubGoal2Rooms(GoalReward2Rooms):
def __init__(self, scale: float) -> None:
super().__init__(scale)
self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))
def __init__(
self,
scale: float,
primary_goal: Tuple[float, float] = (4.0, -2.0),
subgoals: List[Tuple[float, float]] = [(1.0, -2.0), (-1.0, 2.0)],
) -> None:
super().__init__(scale, primary_goal)
for subgoal in subgoals:
self.goals.append(
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
)
class GoalReward4Rooms(MazeTask):
@ -305,7 +316,21 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
pass
class SubGoalTRoom(GoalRewardTRoom):
def __init__(
self,
scale: float,
primary_goal: Tuple[float, float] = (2.0, -3.0),
subgoal: Tuple[float, float] = (-2.0, -3.0),
) -> None:
super().__init__(scale, primary_goal)
self.goals.append(
MazeGoal(np.array(subgoal) * scale, reward_scale=0.5, rgb=GREEN)
)
class GoalRewardBlockMaze(GoalRewardUMaze):
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None)
OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None:
@ -333,30 +358,46 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
class GoalRewardBilliard(MazeTask):
REWARD_THRESHOLD: float = 0.9
PENALTY: float = -0.0001
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
MAZE_SIZE_SCALING: Scaling = Scaling(None, 3.0, None)
OBSERVE_BALLS: bool = True
GOAL_SIZE: float = 0.3
def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None:
def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
super().__init__(scale)
goal = np.array(goal) * scale
self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)]
self.goals.append(
MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
)
def _threshold(self) -> float:
return self.OBJECT_BALL_SIZE + self.GOAL_SIZE
def reward(self, obs: np.ndarray) -> float:
return 1.0 if self.termination(obs) else self.PENALTY
object_pos = obs[3:6]
for goal in self.goals:
if goal.neighbor(object_pos):
return goal.reward_scale
return self.PENALTY
def termination(self, obs: np.ndarray) -> bool:
return super().termination(obs[3:6])
object_pos = obs[3:6]
for goal in self.goals:
if goal.neighbor(object_pos):
return True
return False
@staticmethod
def create_maze() -> List[List[MazeCell]]:
E, B = MazeCell.EMPTY, MazeCell.BLOCK
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
return [
[B, B, B, B, B],
[B, E, E, E, B],
[B, E, M, E, B],
[B, E, R, E, B],
[B, B, B, B, B],
[B, B, B, B, B, B, B],
[B, E, E, E, E, E, B],
[B, E, E, E, E, E, B],
[B, E, E, M, E, E, B],
[B, E, E, R, E, E, B],
[B, E, E, E, E, E, B],
[B, B, B, B, B, B, B],
]
@ -365,6 +406,50 @@ class DistRewardBilliard(GoalRewardBilliard):
return -self.goals[0].euc_dist(obs[3:6]) / self.scale
class SubGoalBilliard(GoalRewardBilliard):
def __init__(
self,
scale: float,
primary_goal: Tuple[float, float] = (2.0, -3.0),
subgoals: List[Tuple[float, float]] = [(-2.0, -3.0), (-2.0, 1.0), (2.0, 1.0)],
) -> None:
super().__init__(scale, primary_goal)
for subgoal in subgoals:
self.goals.append(
MazeGoal(
np.array(subgoal) * scale,
reward_scale=0.5,
rgb=GREEN,
threshold=self._threshold(),
custom_size=self.GOAL_SIZE,
)
)
class BanditBilliard(SubGoalBilliard):
def __init__(
self,
scale: float,
primary_goal: Tuple[float, float] = (4.0, -2.0),
subgoals: List[Tuple[float, float]] = [(4.0, 2.0)],
) -> None:
super().__init__(scale, primary_goal, subgoals)
@staticmethod
def create_maze() -> List[List[MazeCell]]:
E, B = MazeCell.EMPTY, MazeCell.BLOCK
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
return [
[B, B, B, B, B, B, B],
[B, E, E, B, B, E, B],
[B, E, E, E, E, E, B],
[B, R, M, E, B, B, B],
[B, E, E, E, E, E, B],
[B, E, E, E, E, E, B],
[B, B, B, B, B, B, B],
]
class TaskRegistry:
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
@ -373,9 +458,14 @@ class TaskRegistry:
"Fall": [DistRewardFall, GoalRewardFall],
"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
"TRoom": [DistRewardTRoom, GoalRewardTRoom],
"TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom],
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
"Billiard": [DistRewardBilliard, GoalRewardBilliard],
"Billiard": [
DistRewardBilliard,
GoalRewardBilliard,
SubGoalBilliard,
BanditBilliard,
],
}
@staticmethod

BIN
screenshots/AntFall.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

BIN
screenshots/AntPush.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -36,6 +36,20 @@ def test_point_maze(maze_id):
assert r < 0.0
@pytest.mark.parametrize("maze_id", ["2Rooms", "4Rooms", "Billiard"])
def test_subgoal_envs(maze_id):
env = gym.make(f"Point{maze_id}-v2")
s0 = env.reset()
s, r, _, _ = env.step(env.action_space.sample())
if not env.unwrapped.has_extended_obs:
assert s0.shape == (7,)
assert s.shape == (7,)
elif env.unwrapped._observe_balls:
assert s0.shape == (10,)
assert s.shape == (10,)
assert len(env.unwrapped._task.goals) > 1
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
def test_reacher_maze(maze_id):
for inhibited in ["Fall", "Push", "Block", "Billiard"]: