This commit is contained in:
kngwyu 2020-09-24 23:40:33 +09:00
parent 3ed5177906
commit d2c661d55c
10 changed files with 205 additions and 37 deletions

View File

@ -11,9 +11,11 @@ import gym
from mujoco_maze.ant import AntEnv from mujoco_maze.ant import AntEnv
from mujoco_maze.maze_task import TaskRegistry from mujoco_maze.maze_task import TaskRegistry
from mujoco_maze.point import PointEnv from mujoco_maze.point import PointEnv
from mujoco_maze.swimmer import SwimmerEnv
for maze_id in TaskRegistry.keys(): for maze_id in TaskRegistry.keys():
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)): for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
# Ant
gym.envs.register( gym.envs.register(
id=f"Ant{maze_id}-v{i}", id=f"Ant{maze_id}-v{i}",
entry_point="mujoco_maze.maze_env:MazeEnv", entry_point="mujoco_maze.maze_env:MazeEnv",
@ -26,9 +28,7 @@ for maze_id in TaskRegistry.keys():
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD, reward_threshold=task_cls.REWARD_THRESHOLD,
) )
# Point
for maze_id in TaskRegistry.keys():
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
gym.envs.register( gym.envs.register(
id=f"Point{maze_id}-v{i}", id=f"Point{maze_id}-v{i}",
entry_point="mujoco_maze.maze_env:MazeEnv", entry_point="mujoco_maze.maze_env:MazeEnv",
@ -42,5 +42,22 @@ for maze_id in TaskRegistry.keys():
reward_threshold=task_cls.REWARD_THRESHOLD, reward_threshold=task_cls.REWARD_THRESHOLD,
) )
if "Push" in maze_id or "Fall" in maze_id:
continue
# Swimmer
gym.envs.register(
id=f"Swimmer{maze_id}-v{i}",
entry_point="mujoco_maze.maze_env:MazeEnv",
kwargs=dict(
model_cls=SwimmerEnv,
maze_task=task_cls,
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
),
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
__version__ = "0.1.0" __version__ = "0.1.0"

View File

@ -1,6 +1,7 @@
"""Common APIs for defining mujoco robot. """Common APIs for defining mujoco robot.
""" """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Optional
import numpy as np import numpy as np
from gym.envs.mujoco.mujoco_env import MujocoEnv from gym.envs.mujoco.mujoco_env import MujocoEnv
@ -9,9 +10,9 @@ from gym.utils import EzPickle
class AgentModel(ABC, MujocoEnv, EzPickle): class AgentModel(ABC, MujocoEnv, EzPickle):
FILE: str FILE: str
ORI_IND: int
MANUAL_COLLISION: bool MANUAL_COLLISION: bool
RADIUS: float ORI_IND: int
RADIUS: Optional[float] = None
def __init__(self, file_path: str, frame_skip: int) -> None: def __init__(self, file_path: str, frame_skip: int) -> None:
MujocoEnv.__init__(self, file_path, frame_skip) MujocoEnv.__init__(self, file_path, frame_skip)
@ -30,18 +31,12 @@ class AgentModel(ABC, MujocoEnv, EzPickle):
""" """
pass pass
@abstractmethod
def get_xy(self) -> np.ndarray: def get_xy(self) -> np.ndarray:
"""Returns the coordinate of the agent. """Returns the coordinate of the agent.
""" """
pass pass
@abstractmethod
def set_xy(self, xy: np.ndarray) -> None: def set_xy(self, xy: np.ndarray) -> None:
"""Set the coordinate of the agent. """Set the coordinate of the agent.
""" """
pass pass
@abstractmethod
def get_ori(self) -> float:
pass

View File

@ -6,7 +6,7 @@ Based on `models`_ and `gym`_ (both ant and ant-v3).
.. _gym: https://github.com/openai/gym .. _gym: https://github.com/openai/gym
""" """
from typing import Callable, Optional, Tuple from typing import Callable, Tuple
import numpy as np import numpy as np
@ -39,14 +39,15 @@ class AntEnv(AgentModel):
FILE: str = "ant.xml" FILE: str = "ant.xml"
ORI_IND: int = 3 ORI_IND: int = 3
MANUAL_COLLISION: bool = False MANUAL_COLLISION: bool = False
RADIUS: float = 0.2
def __init__( def __init__(
self, self,
file_path: Optional[str] = None, file_path: str,
ctrl_cost_weight: float = 0.0001, forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 1e-4,
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm, forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
) -> None: ) -> None:
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight self._ctrl_cost_weight = ctrl_cost_weight
self._forward_reward_fn = forward_reward_fn self._forward_reward_fn = forward_reward_fn
super().__init__(file_path, 5) super().__init__(file_path, 5)
@ -63,12 +64,11 @@ class AntEnv(AgentModel):
forward_reward = self._forward_reward(xy_pos_before) forward_reward = self._forward_reward(xy_pos_before)
ctrl_cost = self._ctrl_cost_weight * np.square(action).sum() ctrl_cost = self._ctrl_cost_weight * np.square(action).sum()
ob = self._get_obs()
return ( return (
ob, self._get_obs(),
forward_reward - ctrl_cost, self._forward_reward_weight * forward_reward - ctrl_cost,
False, False,
dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost,), dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
) )
def _get_obs(self): def _get_obs(self):
@ -82,7 +82,7 @@ class AntEnv(AgentModel):
def reset_model(self): def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform( qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.1, high=0.1 size=self.model.nq, low=-0.1, high=0.1,
) )
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
@ -92,18 +92,17 @@ class AntEnv(AgentModel):
self.set_state(qpos, qvel) self.set_state(qpos, qvel)
return self._get_obs() return self._get_obs()
def get_ori(self): def get_ori(self) -> np.ndarray:
ori = [0, 1, 0, 0] ori = [0, 1, 0, 0]
ori_ind = self.ORI_IND rot = self.sim.data.qpos[self.ORI_IND : self.ORI_IND + 4] # take the quaternion
rot = self.sim.data.qpos[ori_ind : ori_ind + 4] # take the quaternion
ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3] # project onto x-y plane ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3] # project onto x-y plane
ori = np.arctan2(ori[1], ori[0]) ori = np.arctan2(ori[1], ori[0])
return ori return ori
def set_xy(self, xy): def set_xy(self, xy: np.ndarray) -> None:
qpos = self.sim.data.qpos.copy() qpos = self.sim.data.qpos.copy()
qpos[:2] = xy qpos[:2] = xy
self.set_state(qpos, self.sim.data.qvel) self.set_state(qpos, self.sim.data.qvel)
def get_xy(self): def get_xy(self) -> np.ndarray:
return np.copy(self.sim.data.qpos[:2]) return np.copy(self.sim.data.qpos[:2])

View File

@ -15,6 +15,7 @@
<worldbody> <worldbody>
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" /> <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
<geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" /> <geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
<!-- ================= Point ================= /-->
<body name="torso" pos="0 0 0"> <body name="torso" pos="0 0 0">
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" /> <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" /> <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />

View File

@ -0,0 +1,39 @@
<mujoco model="swimmer">
<compiler angle="degree" coordinate="local" inertiafromgeom="true" />
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1" />
<default>
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1" />
<joint armature="0.1" />
</default>
<asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" />
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" />
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" />
<!-- ================= SWIMMER ================= /-->
<body name="torso" pos="0 0 0">
<camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1" />
<geom name="frontbody" density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule" />
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide" />
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide" />
<joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge" />
<body name="mid" pos="0.5 0 0">
<geom name="midbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
<joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge" />
<body name="back" pos="-1 0 0">
<geom name="backbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
<joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge" />
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2" />
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3" />
</actuator>
</mujoco>

View File

@ -64,9 +64,14 @@ class MazeEnv(gym.Env):
(x - torso_x, y - torso_y) for x, y in self._find_all_robots() (x - torso_x, y - torso_y) for x, y in self._find_all_robots()
] ]
if model_cls.MANUAL_COLLISION:
if model_cls.RADIUS is None:
raise ValueError("Manual collision needs radius of the model")
self._collision = maze_env_utils.CollisionDetector( self._collision = maze_env_utils.CollisionDetector(
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS, structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
) )
else:
self._collision = None
self._xy_to_rowcol = lambda x, y: ( self._xy_to_rowcol = lambda x, y: (
2 + (y + size_scaling / 2) / size_scaling, 2 + (y + size_scaling / 2) / size_scaling,
@ -226,7 +231,7 @@ class MazeEnv(gym.Env):
geoms = torso.findall(".//geom") geoms = torso.findall(".//geom")
for geom in geoms: for geom in geoms:
if "name" not in geom.attrib: if "name" not in geom.attrib:
raise Exception("Every geom of the torso must have a name " "defined") raise Exception("Every geom of the torso must have a name")
# Set goals # Set goals
asset = tree.find(".//asset") asset = tree.find(".//asset")
@ -344,7 +349,6 @@ class MazeEnv(gym.Env):
robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2] robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2]
self._robot_x = robot_x self._robot_x = robot_x
self._robot_y = robot_y self._robot_y = robot_y
self._robot_ori = self.get_ori()
structure = self._maze_structure structure = self._maze_structure
size_scaling = self._maze_size_scaling size_scaling = self._maze_size_scaling

View File

@ -138,6 +138,7 @@ class Collision:
class CollisionDetector: class CollisionDetector:
"""For manual collision detection. """For manual collision detection.
""" """
EPS: float = 0.05 EPS: float = 0.05
NEIGHBORS: List[Tuple[int, int]] = [[0, -1], [-1, 0], [0, 1], [1, 0]] NEIGHBORS: List[Tuple[int, int]] = [[0, -1], [-1, 0], [0, 1], [1, 0]]

View File

@ -46,11 +46,12 @@ class MazeGoal:
class Scaling(NamedTuple): class Scaling(NamedTuple):
ant: float ant: float
point: float point: float
swimmer: float
class MazeTask(ABC): class MazeTask(ABC):
REWARD_THRESHOLD: float REWARD_THRESHOLD: float
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0) MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
INNER_REWARD_SCALING: float = 0.01 INNER_REWARD_SCALING: float = 0.01
TOP_DOWN_VIEW: bool = False TOP_DOWN_VIEW: bool = False
OBSERVE_BLOCKS: bool = False OBSERVE_BLOCKS: bool = False
@ -88,6 +89,32 @@ class DistRewardMixIn:
return -self.goals[0].euc_dist(obs) / self.scale return -self.goals[0].euc_dist(obs) / self.scale
class GoalRewardSimpleRoom(MazeTask):
""" Very easy task. For testing.
"""
REWARD_THRESHOLD: float = 0.9
def __init__(self, scale: float) -> None:
super().__init__(scale)
self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))]
def reward(self, obs: np.ndarray) -> float:
return 1.0 if self.termination(obs) else -0.0001
@staticmethod
def create_maze() -> List[List[MazeCell]]:
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
return [
[B, B, B, B, B],
[B, R, E, E, B],
[B, B, B, B, B],
]
class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
pass
class GoalRewardUMaze(MazeTask): class GoalRewardUMaze(MazeTask):
REWARD_THRESHOLD: float = 0.9 REWARD_THRESHOLD: float = 0.9
@ -163,7 +190,7 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn):
class GoalReward2Rooms(MazeTask): class GoalReward2Rooms(MazeTask):
REWARD_THRESHOLD: float = 0.9 REWARD_THRESHOLD: float = 0.9
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0) MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
def __init__(self, scale: float) -> None: def __init__(self, scale: float) -> None:
super().__init__(scale) super().__init__(scale)
@ -201,7 +228,7 @@ class SubGoal2Rooms(GoalReward2Rooms):
class GoalReward4Rooms(MazeTask): class GoalReward4Rooms(MazeTask):
REWARD_THRESHOLD: float = 0.9 REWARD_THRESHOLD: float = 0.9
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0) MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
def __init__(self, scale: float) -> None: def __init__(self, scale: float) -> None:
super().__init__(scale) super().__init__(scale)
@ -244,12 +271,10 @@ class SubGoal4Rooms(GoalReward4Rooms):
class GoalRewardTRoom(MazeTask): class GoalRewardTRoom(MazeTask):
REWARD_THRESHOLD: float = 0.9 REWARD_THRESHOLD: float = 0.9
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0) MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
def __init__( def __init__(
self, self, scale: float, goals: List[Tuple[float, float]] = [(2.0, -3.0)],
scale: float,
goals: List[Tuple[float, float]] = [(2.0, -3.0)],
) -> None: ) -> None:
super().__init__(scale) super().__init__(scale)
self.goals = [] self.goals = []
@ -281,6 +306,7 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
class TaskRegistry: class TaskRegistry:
REGISTRY: Dict[str, List[Type[MazeTask]]] = { REGISTRY: Dict[str, List[Type[MazeTask]]] = {
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
"UMaze": [DistRewardUMaze, GoalRewardUMaze], "UMaze": [DistRewardUMaze, GoalRewardUMaze],
"Push": [DistRewardPush, GoalRewardPush], "Push": [DistRewardPush, GoalRewardPush],
"Fall": [DistRewardFall, GoalRewardFall], "Fall": [DistRewardFall, GoalRewardFall],

73
mujoco_maze/swimmer.py Normal file
View File

@ -0,0 +1,73 @@
"""
Swimmer robot as an explorer in the maze.
Based on `gym`_ (swimmer-v3).
.. _gym: https://github.com/openai/gym
"""
from typing import Tuple
import numpy as np
from mujoco_maze.agent_model import AgentModel
from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm
class SwimmerEnv(AgentModel):
FILE: str = "swimmer.xml"
MANUAL_COLLISION: bool = False
def __init__(
self,
file_path: str = None,
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 1e-4,
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
) -> None:
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._forward_reward_fn = forward_reward_fn
super().__init__(file_path, 4)
def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
xy_pos_after = self.sim.data.qpos[:2].copy()
xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
return self._forward_reward_fn(xy_velocity)
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
xy_pos_before = self.sim.data.qpos[:2].copy()
self.do_simulation(action, self.frame_skip)
forward_reward = self._forward_reward(xy_pos_before)
ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return (
self._get_obs(),
self._forward_reward_weight * forward_reward - ctrl_cost,
False,
dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
)
def _get_obs(self) -> np.ndarray:
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
observation = np.concatenate([position, velocity]).ravel()
return observation
def reset_model(self) -> np.ndarray:
qpos = self.init_qpos + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nq,
)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nv,
)
self.set_state(qpos, qvel)
return self._get_obs()
def set_xy(self, xy: np.ndarray) -> None:
qpos = self.sim.data.qpos.copy()
qpos[:2] = xy
self.set_state(qpos, self.sim.data.qvel)
def get_xy(self) -> np.ndarray:
return np.copy(self.sim.data.qpos[:2])

View File

@ -26,6 +26,19 @@ def test_point_maze(maze_id):
assert s.shape == (7,) assert s.shape == (7,)
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
def test_swimmer_maze(maze_id):
if "Fall" in maze_id or "Push" in maze_id:
return
for i in range(2):
env = gym.make(f"Swimmer{maze_id}-v{i}")
s0 = env.reset()
s, _, _, _ = env.step(env.action_space.sample())
if not env.unwrapped._top_down_view:
assert s0.shape == (11,)
assert s.shape == (11,)
@pytest.mark.parametrize("v", [0, 1]) @pytest.mark.parametrize("v", [0, 1])
def test_maze_args(v): def test_maze_args(v):
env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goals": [(-2.0, -3.0)]}) env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goals": [(-2.0, -3.0)]})