diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py index 7eb34c5..2871f1e 100644 --- a/mujoco_maze/__init__.py +++ b/mujoco_maze/__init__.py @@ -16,19 +16,6 @@ from mujoco_maze.swimmer import SwimmerEnv for maze_id in TaskRegistry.keys(): for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)): - # Ant - gym.envs.register( - id=f"Ant{maze_id}-v{i}", - entry_point="mujoco_maze.maze_env:MazeEnv", - kwargs=dict( - model_cls=AntEnv, - maze_task=task_cls, - maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant, - inner_reward_scaling=task_cls.INNER_REWARD_SCALING, - ), - max_episode_steps=1000, - reward_threshold=task_cls.REWARD_THRESHOLD, - ) # Point gym.envs.register( id=f"Point{maze_id}-v{i}", @@ -42,6 +29,21 @@ for maze_id in TaskRegistry.keys(): max_episode_steps=1000, reward_threshold=task_cls.REWARD_THRESHOLD, ) + if "Rolling" in maze_id: + continue + # Ant + gym.envs.register( + id=f"Ant{maze_id}-v{i}", + entry_point="mujoco_maze.maze_env:MazeEnv", + kwargs=dict( + model_cls=AntEnv, + maze_task=task_cls, + maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant, + inner_reward_scaling=task_cls.INNER_REWARD_SCALING, + ), + max_episode_steps=1000, + reward_threshold=task_cls.REWARD_THRESHOLD, + ) skip_swimmer = False for inhibited in ["Fall", "Push", "Block"]: if inhibited in maze_id: diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py index 248a457..7c023ef 100644 --- a/mujoco_maze/maze_env.py +++ b/mujoco_maze/maze_env.py @@ -48,6 +48,8 @@ class MazeEnv(gym.Env): self.t = 0 # time steps self._observe_blocks = self._task.OBSERVE_BLOCKS self._put_spin_near_agent = self._task.PUT_SPIN_NEAR_AGENT + # Observe other objectives + self._observe_balls = self._task.OBSERVE_BALLS self._top_down_view = self._task.TOP_DOWN_VIEW self._restitution_coef = restitution_coef @@ -93,17 +95,17 @@ class MazeEnv(gym.Env): default.find(".//geom").set("solimp", ".995 .995 .01") self.movable_blocks = [] + self.object_balls = [] for i in range(len(structure)): for j in range(len(structure[0])): struct = structure[i][j] if struct.is_robot() and self._put_spin_near_agent: - struct = maze_env_utils.MazeCell.SpinXY + struct = maze_env_utils.MazeCell.SPIN + x, y = j * size_scaling - torso_x, i * size_scaling - torso_y + h = height / 2 * size_scaling + size = size_scaling * 0.5 if self.elevated and not struct.is_chasm(): # Create elevated platform. - x = j * size_scaling - torso_x - y = i * size_scaling - torso_y - h = height / 2 * size_scaling - size = 0.5 * size_scaling ET.SubElement( worldbody, "geom", @@ -119,10 +121,6 @@ class MazeEnv(gym.Env): if struct.is_block(): # Unmovable block. # Offset all coordinates so that robot starts at the origin. - x = j * size_scaling - torso_x - y = i * size_scaling - torso_y - h = height / 2 * size_scaling - size = 0.5 * size_scaling ET.SubElement( worldbody, "geom", @@ -137,92 +135,14 @@ class MazeEnv(gym.Env): ) elif struct.can_move(): # Movable block. - # The "falling" blocks are shrunk slightly and increased in mass to - # ensure it can fall easily through a gap in the platform blocks. - name = "movable_%d_%d" % (i, j) - self.movable_blocks.append((name, struct)) - falling = struct.can_move_z() - spinning = struct.can_spin() - shrink = 0.1 if spinning else 0.99 if falling else 1.0 - height_shrink = 0.1 if spinning else 1.0 - x_offset = 0.25 * size_scaling if spinning else 0.0 - x = j * size_scaling - torso_x + x_offset - y = i * size_scaling - torso_y - h = height / 2 * size_scaling * height_shrink - size = 0.5 * size_scaling * shrink - movable_body = ET.SubElement( - worldbody, - "body", - name=name, - pos=f"{x} {y} {height_offset + h}", + self.movable_blocks.append(f"movable_{i}_{j}") + _add_movable_block( + worldbody, struct, i, j, size_scaling, x, y, h, height_offset, ) - ET.SubElement( - movable_body, - "geom", - name=f"block_{i}_{j}", - pos="0 0 0", - size=f"{size} {size} {h}", - type="box", - material="", - mass="0.001" if falling else "0.0002", - contype="1", - conaffinity="1", - rgba="0.9 0.1 0.1 1", - ) - if struct.can_move_x(): - ET.SubElement( - movable_body, - "joint", - armature="0", - axis="1 0 0", - damping="0.0", - limited="true" if falling else "false", - range=f"{-size_scaling} {size_scaling}", - margin="0.01", - name=f"movable_x_{i}_{j}", - pos="0 0 0", - type="slide", - ) - if struct.can_move_y(): - ET.SubElement( - movable_body, - "joint", - armature="0", - axis="0 1 0", - damping="0.0", - limited="true" if falling else "false", - range=f"{-size_scaling} {size_scaling}", - margin="0.01", - name=f"movable_y_{i}_{j}", - pos="0 0 0", - type="slide", - ) - if struct.can_move_z(): - ET.SubElement( - movable_body, - "joint", - armature="0", - axis="0 0 1", - damping="0.0", - limited="true", - range=f"{-height_offset} 0", - margin="0.01", - name=f"movable_z_{i}_{j}", - pos="0 0 0", - type="slide", - ) - if struct.can_spin(): - ET.SubElement( - movable_body, - "joint", - armature="0", - axis="0 0 1", - damping="0.0", - limited="false", - name=f"spinable_{i}_{j}", - pos="0 0 0", - type="ball", - ) + elif struct.is_object_ball(): + # Movable Ball + self.object_balls.append(f"objball_{i}_{j}") + _add_object_ball(worldbody, i, j, x, y) torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") @@ -231,17 +151,19 @@ class MazeEnv(gym.Env): raise Exception("Every geom of the torso must have a name") # Set goals - asset = tree.find(".//asset") for i, goal in enumerate(self._task.goals): - ET.SubElement(asset, "material", name=f"goal{i}", rgba=goal.rbga_str()) z = goal.pos[2] if goal.dim >= 3 else 0.0 + if goal.custom_size is None: + size = f"{maze_size_scaling * 0.1}" + else: + size = f"{goal.custom_size}" ET.SubElement( worldbody, "site", name=f"goal_site{i}", pos=f"{goal.pos[0]} {goal.pos[1]} {z}", size=f"{maze_size_scaling * 0.1}", - material=f"goal{i}", + rgba=goal.rbga_str(), ) _, file_path = tempfile.mkstemp(text=True, suffix=".xml") @@ -252,7 +174,7 @@ class MazeEnv(gym.Env): @property def has_extended_obs(self) -> bool: - return self._top_down_view or self._observe_blocks + return self._top_down_view or self._observe_blocks or self._observe_balls def get_ori(self) -> float: return self.wrapped_env.get_ori() @@ -367,7 +289,7 @@ class MazeEnv(gym.Env): ) # Draw movable blocks. - for block_name, block_type in self.movable_blocks: + for block_name in self.movable_blocks: block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2] update_view(block_x, block_y, 2) @@ -380,15 +302,18 @@ class MazeEnv(gym.Env): else: view = [] - if self._observe_blocks: - additional_obs = [] - for block_name, block_type in self.movable_blocks: - additional_obs.append(self.wrapped_env.get_body_com(block_name)) - wrapped_obs = np.concatenate( - [wrapped_obs[:3]] + additional_obs + [wrapped_obs[3:]] - ) + additional_obs = [] - return np.concatenate([wrapped_obs, *view, np.array([self.t * 0.001])]) + if self._observe_balls: + for name in self.object_balls: + additional_obs.append(self.wrapped_env.get_body_com(name)) + + if self._observe_blocks: + for name in self.movable_blocks: + additional_obs.append(self.wrapped_env.get_body_com(name)) + + obs = np.concatenate([wrapped_obs[:3]] + additional_obs + [wrapped_obs[3:]]) + return np.concatenate([obs, *view, np.array([self.t * 0.001])]) def reset(self) -> np.ndarray: self.t = 0 @@ -461,3 +386,142 @@ class MazeEnv(gym.Env): def close(self) -> None: self.wrapped_env.close() + + +def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None: + body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",) + ET.SubElement( + body, + "geom", + type="sphere", + name=f"objball_geom_{i}_{j}", + size="1", + pos="0.0 0.0 0.5", + rgba="0.1 0.1 0.7 1", + contype="1", + conaffinity="1", + mass="0.00004", + solimp="0.9 0.99 0.001" + ) + ET.SubElement( + body, + "joint", + name=f"objball_x_{i}_{j}", + axis="1 0 0", + pos="0 0 0.0", + type="slide", + ) + ET.SubElement( + body, + "joint", + name=f"objball_y_{i}_{j}", + axis="0 1 0", + pos="0 0 0", + type="slide", + ) + ET.SubElement( + body, + "joint", + name=f"objball_rot_{i}_{j}", + axis="0 0 1", + pos="0 0 0", + type="hinge", + limited="false", + ) + + +def _add_movable_block( + worldbody: ET.Element, + struct: maze_env_utils.MazeCell, + i: str, + j: str, + size_scaling: float, + x: float, + y: float, + h: float, + height_offset: float, +) -> None: + falling = struct.can_move_z() + if struct.can_spin(): + h *= 0.1 + x += size_scaling * 0.25 + shrink = 0.1 + elif falling: + # The "falling" blocks are shrunk slightly and increased in mass to + # ensure it can fall easily through a gap in the platform blocks. + shrink = 0.99 + elif struct.is_half_block(): + shrink = 0.5 + else: + shrink = 1.0 + size = size_scaling * 0.5 * shrink + movable_body = ET.SubElement( + worldbody, "body", name=f"movable_{i}_{j}", pos=f"{x} {y} {h}", + ) + ET.SubElement( + movable_body, + "geom", + name=f"block_{i}_{j}", + pos="0 0 0", + size=f"{size} {size} {h}", + type="box", + material="", + mass="0.001" if falling else "0.0002", + contype="1", + conaffinity="1", + rgba="0.9 0.1 0.1 1", + ) + if struct.can_move_x(): + ET.SubElement( + movable_body, + "joint", + axis="1 0 0", + name=f"movable_x_{i}_{j}", + armature="0", + damping="0.0", + limited="true" if falling else "false", + range=f"{-size_scaling} {size_scaling}", + margin="0.01", + pos="0 0 0", + type="slide", + ) + if struct.can_move_y(): + ET.SubElement( + movable_body, + "joint", + armature="0", + axis="0 1 0", + damping="0.0", + limited="true" if falling else "false", + range=f"{-size_scaling} {size_scaling}", + margin="0.01", + name=f"movable_y_{i}_{j}", + pos="0 0 0", + type="slide", + ) + if struct.can_move_z(): + ET.SubElement( + movable_body, + "joint", + armature="0", + axis="0 0 1", + damping="0.0", + limited="true", + range=f"{-height_offset} 0", + margin="0.01", + name=f"movable_z_{i}_{j}", + pos="0 0 0", + type="slide", + ) + if struct.can_spin(): + ET.SubElement( + movable_body, + "joint", + armature="0", + axis="0 0 1", + damping="0.0", + limited="false", + name=f"spinable_{i}_{j}", + pos="0 0 0", + type="ball", + ) diff --git a/mujoco_maze/maze_env_utils.py b/mujoco_maze/maze_env_utils.py index 98280ef..348b88c 100644 --- a/mujoco_maze/maze_env_utils.py +++ b/mujoco_maze/maze_env_utils.py @@ -23,15 +23,14 @@ class MazeCell(Enum): EMPTY = 0 BLOCK = 1 CHASM = 2 + OBJECT_BALL = 3 # Moves - X = 11 - Y = 12 - Z = 13 - XY = 14 - XZ = 15 - YZ = 16 - XYZ = 17 - SpinXY = 18 + XY_BLOCK = 14 + XZ_BLOCK = 15 + YZ_BLOCK = 16 + XYZ_BLOCK = 17 + XY_HALF_BLOCK = 18 + SPIN = 19 def is_block(self) -> bool: return self == self.BLOCK @@ -39,6 +38,9 @@ class MazeCell(Enum): def is_chasm(self) -> bool: return self == self.CHASM + def is_object_ball(self) -> bool: + return self == self.OBJECT_BALL + def is_empty(self) -> bool: return self == self.ROBOT or self == self.EMPTY @@ -50,31 +52,34 @@ class MazeCell(Enum): def can_move_x(self) -> bool: return self in [ - self.X, - self.XY, - self.XZ, - self.XYZ, - self.SpinXY, + self.XY_BLOCK, + self.XY_HALF_BLOCK, + self.XZ_BLOCK, + self.XYZ_BLOCK, + self.SPIN, ] - def can_move_y(self): + def can_move_y(self) -> bool: return self in [ - self.Y, - self.XY, - self.YZ, - self.XYZ, - self.SpinXY, + self.XY_BLOCK, + self.XY_HALF_BLOCK, + self.YZ_BLOCK, + self.XYZ_BLOCK, + self.SPIN, ] - def can_move_z(self): - return self in [self.Z, self.XZ, self.YZ, self.XYZ] + def can_move_z(self) -> bool: + return self in [self.XZ_BLOCK, self.YZ_BLOCK, self.XYZ_BLOCK] - def can_spin(self): - return self == self.SpinXY + def can_spin(self) -> bool: + return self == self.SPIN - def can_move(self): + def can_move(self) -> bool: return self.can_move_x() or self.can_move_y() or self.can_move_z() + def is_half_block(self) -> bool: + return self in [self.XY_HALF_BLOCK] + class Line: def __init__( diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index d1a35ad..c18a219 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -21,23 +21,28 @@ BLUE = Rgb(0.1, 0.1, 0.7) class MazeGoal: - THRESHOLD: float = 0.6 - def __init__( - self, pos: np.ndarray, reward_scale: float = 1.0, rgb: Rgb = RED + self, + pos: np.ndarray, + reward_scale: float = 1.0, + rgb: Rgb = RED, + threshold: float = 0.6, + custom_size: Optional[float] = None ) -> None: assert 0.0 <= reward_scale <= 1.0 self.pos = pos self.dim = pos.shape[0] self.reward_scale = reward_scale self.rgb = rgb + self.threshold = threshold + self.custom_size = custom_size def rbga_str(self) -> str: r, g, b = self.rgb return f"{r} {g} {b} 1" def neighbor(self, obs: np.ndarray) -> float: - return np.linalg.norm(obs[: self.dim] - self.pos) <= self.THRESHOLD + return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold def euc_dist(self, obs: np.ndarray) -> float: return np.sum(np.square(obs[: self.dim] - self.pos)) ** 0.5 @@ -56,6 +61,7 @@ class MazeTask(ABC): INNER_REWARD_SCALING: float = 0.01 TOP_DOWN_VIEW: bool = False OBSERVE_BLOCKS: bool = False + OBSERVE_BALLS: bool = False PUT_SPIN_NEAR_AGENT: bool = False def __init__(self, scale: float) -> None: @@ -145,11 +151,11 @@ class GoalRewardPush(GoalRewardUMaze): @staticmethod def create_maze() -> List[List[MazeCell]]: - E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT + E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY_BLOCK return [ [B, B, B, B, B], [B, E, R, B, B], - [B, E, MazeCell.XY, E, B], + [B, E, M, E, B], [B, B, E, B, B], [B, B, B, B, B], ] @@ -169,10 +175,11 @@ class GoalRewardFall(GoalRewardUMaze): @staticmethod def create_maze() -> List[List[MazeCell]]: E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT + M = MazeCell.YZ_BLOCK return [ [B, B, B, B], [B, R, E, B], - [B, E, MazeCell.YZ, B], + [B, E, M, B], [B, C, C, B], [B, E, E, B], [B, B, B, B], @@ -183,26 +190,6 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn): pass -class GoalRewardFall(GoalRewardUMaze): - TOP_DOWN_VIEW = True - - def __init__(self, scale: float) -> None: - super().__init__(scale) - self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))] - - @staticmethod - def create_maze() -> List[List[MazeCell]]: - E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT - return [ - [B, B, B, B], - [B, R, E, B], - [B, E, MazeCell.YZ, B], - [B, C, C, B], - [B, E, E, B], - [B, B, B, B], - ] - - class GoalReward2Rooms(MazeTask): REWARD_THRESHOLD: float = 0.9 PENALTY: float = -0.0001 @@ -291,13 +278,9 @@ class GoalRewardTRoom(MazeTask): PENALTY: float = -0.0001 MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) - def __init__( - self, scale: float, goals: List[Tuple[float, float]] = [(2.0, -3.0)], - ) -> None: + def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None: super().__init__(scale) - self.goals = [] - for x, y in goals: - self.goals.append(MazeGoal(np.array([x * scale, y * scale]))) + self.goals = [MazeGoal(np.array(goal) * scale)] def reward(self, obs: np.ndarray) -> float: for goal in self.goals: @@ -331,7 +314,8 @@ class GoalRewardBlockMaze(GoalRewardUMaze): @staticmethod def create_maze() -> List[List[MazeCell]]: - E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY + E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT + M = MazeCell.XY_BLOCK return [ [B, B, B, B, B], [B, R, E, E, B], @@ -346,6 +330,41 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn): pass +class GoalRewardRolling(MazeTask): + REWARD_THRESHOLD: float = 0.9 + PENALTY: float = -0.0001 + MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0) + OBSERVE_BALLS: bool = True + + def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None: + super().__init__(scale) + goal = np.array(goal) * scale + self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)] + + def reward(self, obs: np.ndarray) -> float: + return 1.0 if self.termination(obs) else self.PENALTY + + def termination(self, obs: np.ndarray) -> bool: + return super().termination(obs[3:6]) + + @staticmethod + def create_maze() -> List[List[MazeCell]]: + E, B = MazeCell.EMPTY, MazeCell.BLOCK + R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL + return [ + [B, B, B, B, B], + [B, E, E, E, B], + [B, E, M, E, B], + [B, E, R, E, B], + [B, B, B, B, B], + ] + + +class DistRewardRolling(GoalRewardRolling): + def reward(self, obs: np.ndarray) -> float: + return -self.goals[0].euc_dist(obs[3:6]) / self.scale + + class TaskRegistry: REGISTRY: Dict[str, List[Type[MazeTask]]] = { "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom], @@ -356,6 +375,7 @@ class TaskRegistry: "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], "TRoom": [DistRewardTRoom, GoalRewardTRoom], "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], + "Rolling": [DistRewardRolling, GoalRewardRolling], } @staticmethod diff --git a/tests/test_envs.py b/tests/test_envs.py index 79866a0..f3b7eab 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -6,11 +6,13 @@ import mujoco_maze @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) def test_ant_maze(maze_id): + if "Rolling" in maze_id: + return for i in range(2): env = gym.make(f"Ant{maze_id}-v{i}") s0 = env.reset() s, _, _, _ = env.step(env.action_space.sample()) - if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: + if not env.unwrapped.has_extended_obs: assert s0.shape == (30,) assert s.shape == (30,) @@ -21,9 +23,12 @@ def test_point_maze(maze_id): env = gym.make(f"Point{maze_id}-v{i}") s0 = env.reset() s, r, _, _ = env.step(env.action_space.sample()) - if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: + if not env.unwrapped.has_extended_obs: assert s0.shape == (7,) assert s.shape == (7,) + if env.unwrapped._observe_balls: + assert s0.shape == (10,) + assert s.shape == (10,) if i == 0: assert r != 0.0 else: @@ -33,42 +38,35 @@ def test_point_maze(maze_id): @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) def test_reacher_maze(maze_id): - for inhibited in ["Fall", "Push", "Block"]: + for inhibited in ["Fall", "Push", "Block", "Rolling"]: if inhibited in maze_id: return for i in range(2): env = gym.make(f"Reacher{maze_id}-v{i}") s0 = env.reset() s, _, _, _ = env.step(env.action_space.sample()) - if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: + if not env.unwrapped.has_extended_obs: assert s0.shape == (9,) assert s.shape == (9,) @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) def test_swimmer_maze(maze_id): - for inhibited in ["Fall", "Push", "Block"]: + for inhibited in ["Fall", "Push", "Block", "Rolling"]: if inhibited in maze_id: return for i in range(2): env = gym.make(f"Swimmer{maze_id}-v{i}") s0 = env.reset() s, _, _, _ = env.step(env.action_space.sample()) - if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks: + if not env.unwrapped.has_extended_obs: assert s0.shape == (11,) assert s.shape == (11,) @pytest.mark.parametrize("v", [0, 1]) def test_maze_args(v): - env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goals": [(-2.0, -3.0)]}) - assert env.reset().shape == (7,) - s, _, _, _ = env.step(env.action_space.sample()) - assert s.shape == (7,) - - -def test_getting_movable(v): - env = gym.make("PointBlockMaze-v1") + env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goal": (-2.0, -3.0)}) assert env.reset().shape == (7,) s, _, _, _ = env.step(env.action_space.sample()) assert s.shape == (7,)