Swimmer
This commit is contained in:
parent
3ed5177906
commit
d2c661d55c
@ -11,9 +11,11 @@ import gym
|
|||||||
from mujoco_maze.ant import AntEnv
|
from mujoco_maze.ant import AntEnv
|
||||||
from mujoco_maze.maze_task import TaskRegistry
|
from mujoco_maze.maze_task import TaskRegistry
|
||||||
from mujoco_maze.point import PointEnv
|
from mujoco_maze.point import PointEnv
|
||||||
|
from mujoco_maze.swimmer import SwimmerEnv
|
||||||
|
|
||||||
for maze_id in TaskRegistry.keys():
|
for maze_id in TaskRegistry.keys():
|
||||||
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
||||||
|
# Ant
|
||||||
gym.envs.register(
|
gym.envs.register(
|
||||||
id=f"Ant{maze_id}-v{i}",
|
id=f"Ant{maze_id}-v{i}",
|
||||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||||
@ -26,9 +28,7 @@ for maze_id in TaskRegistry.keys():
|
|||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
)
|
)
|
||||||
|
# Point
|
||||||
for maze_id in TaskRegistry.keys():
|
|
||||||
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
|
||||||
gym.envs.register(
|
gym.envs.register(
|
||||||
id=f"Point{maze_id}-v{i}",
|
id=f"Point{maze_id}-v{i}",
|
||||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||||
@ -42,5 +42,22 @@ for maze_id in TaskRegistry.keys():
|
|||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if "Push" in maze_id or "Fall" in maze_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Swimmer
|
||||||
|
gym.envs.register(
|
||||||
|
id=f"Swimmer{maze_id}-v{i}",
|
||||||
|
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||||
|
kwargs=dict(
|
||||||
|
model_cls=SwimmerEnv,
|
||||||
|
maze_task=task_cls,
|
||||||
|
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.swimmer,
|
||||||
|
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||||
|
),
|
||||||
|
max_episode_steps=1000,
|
||||||
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
__version__ = "0.1.0"
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
"""Common APIs for defining mujoco robot.
|
"""Common APIs for defining mujoco robot.
|
||||||
"""
|
"""
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym.envs.mujoco.mujoco_env import MujocoEnv
|
from gym.envs.mujoco.mujoco_env import MujocoEnv
|
||||||
@ -9,9 +10,9 @@ from gym.utils import EzPickle
|
|||||||
|
|
||||||
class AgentModel(ABC, MujocoEnv, EzPickle):
|
class AgentModel(ABC, MujocoEnv, EzPickle):
|
||||||
FILE: str
|
FILE: str
|
||||||
ORI_IND: int
|
|
||||||
MANUAL_COLLISION: bool
|
MANUAL_COLLISION: bool
|
||||||
RADIUS: float
|
ORI_IND: int
|
||||||
|
RADIUS: Optional[float] = None
|
||||||
|
|
||||||
def __init__(self, file_path: str, frame_skip: int) -> None:
|
def __init__(self, file_path: str, frame_skip: int) -> None:
|
||||||
MujocoEnv.__init__(self, file_path, frame_skip)
|
MujocoEnv.__init__(self, file_path, frame_skip)
|
||||||
@ -30,18 +31,12 @@ class AgentModel(ABC, MujocoEnv, EzPickle):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_xy(self) -> np.ndarray:
|
def get_xy(self) -> np.ndarray:
|
||||||
"""Returns the coordinate of the agent.
|
"""Returns the coordinate of the agent.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def set_xy(self, xy: np.ndarray) -> None:
|
def set_xy(self, xy: np.ndarray) -> None:
|
||||||
"""Set the coordinate of the agent.
|
"""Set the coordinate of the agent.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_ori(self) -> float:
|
|
||||||
pass
|
|
||||||
|
@ -6,7 +6,7 @@ Based on `models`_ and `gym`_ (both ant and ant-v3).
|
|||||||
.. _gym: https://github.com/openai/gym
|
.. _gym: https://github.com/openai/gym
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Callable, Optional, Tuple
|
from typing import Callable, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -39,14 +39,15 @@ class AntEnv(AgentModel):
|
|||||||
FILE: str = "ant.xml"
|
FILE: str = "ant.xml"
|
||||||
ORI_IND: int = 3
|
ORI_IND: int = 3
|
||||||
MANUAL_COLLISION: bool = False
|
MANUAL_COLLISION: bool = False
|
||||||
RADIUS: float = 0.2
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
file_path: Optional[str] = None,
|
file_path: str,
|
||||||
ctrl_cost_weight: float = 0.0001,
|
forward_reward_weight: float = 1.0,
|
||||||
|
ctrl_cost_weight: float = 1e-4,
|
||||||
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
self._forward_reward_weight = forward_reward_weight
|
||||||
self._ctrl_cost_weight = ctrl_cost_weight
|
self._ctrl_cost_weight = ctrl_cost_weight
|
||||||
self._forward_reward_fn = forward_reward_fn
|
self._forward_reward_fn = forward_reward_fn
|
||||||
super().__init__(file_path, 5)
|
super().__init__(file_path, 5)
|
||||||
@ -63,12 +64,11 @@ class AntEnv(AgentModel):
|
|||||||
forward_reward = self._forward_reward(xy_pos_before)
|
forward_reward = self._forward_reward(xy_pos_before)
|
||||||
ctrl_cost = self._ctrl_cost_weight * np.square(action).sum()
|
ctrl_cost = self._ctrl_cost_weight * np.square(action).sum()
|
||||||
|
|
||||||
ob = self._get_obs()
|
|
||||||
return (
|
return (
|
||||||
ob,
|
self._get_obs(),
|
||||||
forward_reward - ctrl_cost,
|
self._forward_reward_weight * forward_reward - ctrl_cost,
|
||||||
False,
|
False,
|
||||||
dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost,),
|
dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
@ -82,7 +82,7 @@ class AntEnv(AgentModel):
|
|||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
qpos = self.init_qpos + self.np_random.uniform(
|
qpos = self.init_qpos + self.np_random.uniform(
|
||||||
size=self.model.nq, low=-0.1, high=0.1
|
size=self.model.nq, low=-0.1, high=0.1,
|
||||||
)
|
)
|
||||||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
|
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
|
||||||
|
|
||||||
@ -92,18 +92,17 @@ class AntEnv(AgentModel):
|
|||||||
self.set_state(qpos, qvel)
|
self.set_state(qpos, qvel)
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def get_ori(self):
|
def get_ori(self) -> np.ndarray:
|
||||||
ori = [0, 1, 0, 0]
|
ori = [0, 1, 0, 0]
|
||||||
ori_ind = self.ORI_IND
|
rot = self.sim.data.qpos[self.ORI_IND : self.ORI_IND + 4] # take the quaternion
|
||||||
rot = self.sim.data.qpos[ori_ind : ori_ind + 4] # take the quaternion
|
|
||||||
ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3] # project onto x-y plane
|
ori = q_mult(q_mult(rot, ori), q_inv(rot))[1:3] # project onto x-y plane
|
||||||
ori = np.arctan2(ori[1], ori[0])
|
ori = np.arctan2(ori[1], ori[0])
|
||||||
return ori
|
return ori
|
||||||
|
|
||||||
def set_xy(self, xy):
|
def set_xy(self, xy: np.ndarray) -> None:
|
||||||
qpos = self.sim.data.qpos.copy()
|
qpos = self.sim.data.qpos.copy()
|
||||||
qpos[:2] = xy
|
qpos[:2] = xy
|
||||||
self.set_state(qpos, self.sim.data.qvel)
|
self.set_state(qpos, self.sim.data.qvel)
|
||||||
|
|
||||||
def get_xy(self):
|
def get_xy(self) -> np.ndarray:
|
||||||
return np.copy(self.sim.data.qpos[:2])
|
return np.copy(self.sim.data.qpos[:2])
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
<worldbody>
|
<worldbody>
|
||||||
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
|
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
|
||||||
<geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
|
<geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
|
||||||
|
<!-- ================= Point ================= /-->
|
||||||
<body name="torso" pos="0 0 0">
|
<body name="torso" pos="0 0 0">
|
||||||
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
|
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" solimp="0.9 0.99 0.001" />
|
||||||
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />
|
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" solimp="0.9 0.99 0.001" />
|
||||||
|
39
mujoco_maze/assets/swimmer.xml
Normal file
39
mujoco_maze/assets/swimmer.xml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
<mujoco model="swimmer">
|
||||||
|
<compiler angle="degree" coordinate="local" inertiafromgeom="true" />
|
||||||
|
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1" />
|
||||||
|
<default>
|
||||||
|
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1" />
|
||||||
|
<joint armature="0.1" />
|
||||||
|
</default>
|
||||||
|
<asset>
|
||||||
|
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||||
|
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||||
|
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
||||||
|
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||||
|
<material name='geom' texture="texgeom" texuniform="true" />
|
||||||
|
</asset>
|
||||||
|
<worldbody>
|
||||||
|
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" />
|
||||||
|
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" />
|
||||||
|
<!-- ================= SWIMMER ================= /-->
|
||||||
|
<body name="torso" pos="0 0 0">
|
||||||
|
<camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1" />
|
||||||
|
<geom name="frontbody" density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule" />
|
||||||
|
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide" />
|
||||||
|
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide" />
|
||||||
|
<joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge" />
|
||||||
|
<body name="mid" pos="0.5 0 0">
|
||||||
|
<geom name="midbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
|
||||||
|
<joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge" />
|
||||||
|
<body name="back" pos="-1 0 0">
|
||||||
|
<geom name="backbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
|
||||||
|
<joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge" />
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
<actuator>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2" />
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3" />
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
@ -64,9 +64,14 @@ class MazeEnv(gym.Env):
|
|||||||
(x - torso_x, y - torso_y) for x, y in self._find_all_robots()
|
(x - torso_x, y - torso_y) for x, y in self._find_all_robots()
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if model_cls.MANUAL_COLLISION:
|
||||||
|
if model_cls.RADIUS is None:
|
||||||
|
raise ValueError("Manual collision needs radius of the model")
|
||||||
self._collision = maze_env_utils.CollisionDetector(
|
self._collision = maze_env_utils.CollisionDetector(
|
||||||
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
|
structure, size_scaling, torso_x, torso_y, model_cls.RADIUS,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
self._collision = None
|
||||||
|
|
||||||
self._xy_to_rowcol = lambda x, y: (
|
self._xy_to_rowcol = lambda x, y: (
|
||||||
2 + (y + size_scaling / 2) / size_scaling,
|
2 + (y + size_scaling / 2) / size_scaling,
|
||||||
@ -226,7 +231,7 @@ class MazeEnv(gym.Env):
|
|||||||
geoms = torso.findall(".//geom")
|
geoms = torso.findall(".//geom")
|
||||||
for geom in geoms:
|
for geom in geoms:
|
||||||
if "name" not in geom.attrib:
|
if "name" not in geom.attrib:
|
||||||
raise Exception("Every geom of the torso must have a name " "defined")
|
raise Exception("Every geom of the torso must have a name")
|
||||||
|
|
||||||
# Set goals
|
# Set goals
|
||||||
asset = tree.find(".//asset")
|
asset = tree.find(".//asset")
|
||||||
@ -344,7 +349,6 @@ class MazeEnv(gym.Env):
|
|||||||
robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2]
|
robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2]
|
||||||
self._robot_x = robot_x
|
self._robot_x = robot_x
|
||||||
self._robot_y = robot_y
|
self._robot_y = robot_y
|
||||||
self._robot_ori = self.get_ori()
|
|
||||||
|
|
||||||
structure = self._maze_structure
|
structure = self._maze_structure
|
||||||
size_scaling = self._maze_size_scaling
|
size_scaling = self._maze_size_scaling
|
||||||
|
@ -138,6 +138,7 @@ class Collision:
|
|||||||
class CollisionDetector:
|
class CollisionDetector:
|
||||||
"""For manual collision detection.
|
"""For manual collision detection.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
EPS: float = 0.05
|
EPS: float = 0.05
|
||||||
NEIGHBORS: List[Tuple[int, int]] = [[0, -1], [-1, 0], [0, 1], [1, 0]]
|
NEIGHBORS: List[Tuple[int, int]] = [[0, -1], [-1, 0], [0, 1], [1, 0]]
|
||||||
|
|
||||||
|
@ -46,11 +46,12 @@ class MazeGoal:
|
|||||||
class Scaling(NamedTuple):
|
class Scaling(NamedTuple):
|
||||||
ant: float
|
ant: float
|
||||||
point: float
|
point: float
|
||||||
|
swimmer: float
|
||||||
|
|
||||||
|
|
||||||
class MazeTask(ABC):
|
class MazeTask(ABC):
|
||||||
REWARD_THRESHOLD: float
|
REWARD_THRESHOLD: float
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
|
||||||
INNER_REWARD_SCALING: float = 0.01
|
INNER_REWARD_SCALING: float = 0.01
|
||||||
TOP_DOWN_VIEW: bool = False
|
TOP_DOWN_VIEW: bool = False
|
||||||
OBSERVE_BLOCKS: bool = False
|
OBSERVE_BLOCKS: bool = False
|
||||||
@ -88,6 +89,32 @@ class DistRewardMixIn:
|
|||||||
return -self.goals[0].euc_dist(obs) / self.scale
|
return -self.goals[0].euc_dist(obs) / self.scale
|
||||||
|
|
||||||
|
|
||||||
|
class GoalRewardSimpleRoom(MazeTask):
|
||||||
|
""" Very easy task. For testing.
|
||||||
|
"""
|
||||||
|
REWARD_THRESHOLD: float = 0.9
|
||||||
|
|
||||||
|
def __init__(self, scale: float) -> None:
|
||||||
|
super().__init__(scale)
|
||||||
|
self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))]
|
||||||
|
|
||||||
|
def reward(self, obs: np.ndarray) -> float:
|
||||||
|
return 1.0 if self.termination(obs) else -0.0001
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
|
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
|
||||||
|
return [
|
||||||
|
[B, B, B, B, B],
|
||||||
|
[B, R, E, E, B],
|
||||||
|
[B, B, B, B, B],
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class GoalRewardUMaze(MazeTask):
|
class GoalRewardUMaze(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
|
|
||||||
@ -163,7 +190,7 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn):
|
|||||||
|
|
||||||
class GoalReward2Rooms(MazeTask):
|
class GoalReward2Rooms(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
@ -201,7 +228,7 @@ class SubGoal2Rooms(GoalReward2Rooms):
|
|||||||
|
|
||||||
class GoalReward4Rooms(MazeTask):
|
class GoalReward4Rooms(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
@ -244,12 +271,10 @@ class SubGoal4Rooms(GoalReward4Rooms):
|
|||||||
|
|
||||||
class GoalRewardTRoom(MazeTask):
|
class GoalRewardTRoom(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, scale: float, goals: List[Tuple[float, float]] = [(2.0, -3.0)],
|
||||||
scale: float,
|
|
||||||
goals: List[Tuple[float, float]] = [(2.0, -3.0)],
|
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
self.goals = []
|
self.goals = []
|
||||||
@ -281,6 +306,7 @@ class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
|
|||||||
|
|
||||||
class TaskRegistry:
|
class TaskRegistry:
|
||||||
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
||||||
|
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
|
||||||
"UMaze": [DistRewardUMaze, GoalRewardUMaze],
|
"UMaze": [DistRewardUMaze, GoalRewardUMaze],
|
||||||
"Push": [DistRewardPush, GoalRewardPush],
|
"Push": [DistRewardPush, GoalRewardPush],
|
||||||
"Fall": [DistRewardFall, GoalRewardFall],
|
"Fall": [DistRewardFall, GoalRewardFall],
|
||||||
|
73
mujoco_maze/swimmer.py
Normal file
73
mujoco_maze/swimmer.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
"""
|
||||||
|
Swimmer robot as an explorer in the maze.
|
||||||
|
Based on `gym`_ (swimmer-v3).
|
||||||
|
|
||||||
|
.. _gym: https://github.com/openai/gym
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mujoco_maze.agent_model import AgentModel
|
||||||
|
from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm
|
||||||
|
|
||||||
|
|
||||||
|
class SwimmerEnv(AgentModel):
|
||||||
|
FILE: str = "swimmer.xml"
|
||||||
|
MANUAL_COLLISION: bool = False
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
file_path: str = None,
|
||||||
|
forward_reward_weight: float = 1.0,
|
||||||
|
ctrl_cost_weight: float = 1e-4,
|
||||||
|
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
||||||
|
) -> None:
|
||||||
|
self._forward_reward_weight = forward_reward_weight
|
||||||
|
self._ctrl_cost_weight = ctrl_cost_weight
|
||||||
|
self._forward_reward_fn = forward_reward_fn
|
||||||
|
super().__init__(file_path, 4)
|
||||||
|
|
||||||
|
def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
|
||||||
|
xy_pos_after = self.sim.data.qpos[:2].copy()
|
||||||
|
xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
|
||||||
|
return self._forward_reward_fn(xy_velocity)
|
||||||
|
|
||||||
|
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
|
||||||
|
xy_pos_before = self.sim.data.qpos[:2].copy()
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
|
||||||
|
forward_reward = self._forward_reward(xy_pos_before)
|
||||||
|
ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
||||||
|
return (
|
||||||
|
self._get_obs(),
|
||||||
|
self._forward_reward_weight * forward_reward - ctrl_cost,
|
||||||
|
False,
|
||||||
|
dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_obs(self) -> np.ndarray:
|
||||||
|
position = self.sim.data.qpos.flat.copy()
|
||||||
|
velocity = self.sim.data.qvel.flat.copy()
|
||||||
|
observation = np.concatenate([position, velocity]).ravel()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
def reset_model(self) -> np.ndarray:
|
||||||
|
qpos = self.init_qpos + self.np_random.uniform(
|
||||||
|
low=-0.1, high=0.1, size=self.model.nq,
|
||||||
|
)
|
||||||
|
qvel = self.init_qvel + self.np_random.uniform(
|
||||||
|
low=-0.1, high=0.1, size=self.model.nv,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
return self._get_obs()
|
||||||
|
|
||||||
|
def set_xy(self, xy: np.ndarray) -> None:
|
||||||
|
qpos = self.sim.data.qpos.copy()
|
||||||
|
qpos[:2] = xy
|
||||||
|
self.set_state(qpos, self.sim.data.qvel)
|
||||||
|
|
||||||
|
def get_xy(self) -> np.ndarray:
|
||||||
|
return np.copy(self.sim.data.qpos[:2])
|
@ -26,6 +26,19 @@ def test_point_maze(maze_id):
|
|||||||
assert s.shape == (7,)
|
assert s.shape == (7,)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
||||||
|
def test_swimmer_maze(maze_id):
|
||||||
|
if "Fall" in maze_id or "Push" in maze_id:
|
||||||
|
return
|
||||||
|
for i in range(2):
|
||||||
|
env = gym.make(f"Swimmer{maze_id}-v{i}")
|
||||||
|
s0 = env.reset()
|
||||||
|
s, _, _, _ = env.step(env.action_space.sample())
|
||||||
|
if not env.unwrapped._top_down_view:
|
||||||
|
assert s0.shape == (11,)
|
||||||
|
assert s.shape == (11,)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("v", [0, 1])
|
@pytest.mark.parametrize("v", [0, 1])
|
||||||
def test_maze_args(v):
|
def test_maze_args(v):
|
||||||
env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goals": [(-2.0, -3.0)]})
|
env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goals": [(-2.0, -3.0)]})
|
||||||
|
Loading…
Reference in New Issue
Block a user