diff --git a/README.md b/README.md index 244a93f..87c1292 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@ Some maze environments for reinforcement learning(RL) using [mujoco-py] and [openai gym][gym]. -Thankfully, this project is based on the code from [tensorflow/models][models], [rllab] -and [deep-skill-chaining][dsc]. +Thankfully, this project is based on the code from [rllab], +[tensorflow/models][models], and [deep-skill-chaining][dsc]. ## License This project is licensed under Apache License, Version 2.0 diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py index a50c1f5..9779c0b 100644 --- a/mujoco_maze/__init__.py +++ b/mujoco_maze/__init__.py @@ -5,6 +5,7 @@ MAZE_IDS = ["Maze", "Push", "Fall"] # TODO: Block, BlockMaze def _get_kwargs(maze_id: str) -> tuple: return { + "maze_id": maze_id, "observe_blocks": maze_id in ["Block", "BlockMaze"], "put_spin_near_agent": maze_id in ["Block", "BlockMaze"], } @@ -14,7 +15,7 @@ for maze_id in MAZE_IDS: gym.envs.register( id="Ant{}-v0".format(maze_id), entry_point="mujoco_maze.ant_maze_env:AntMazeEnv", - kwargs=dict(maze_id=maze_id, maze_size_scaling=8, **_get_kwargs(maze_id)), + kwargs=dict(maze_size_scaling=8.0, **_get_kwargs(maze_id)), max_episode_steps=1000, reward_threshold=-1000, ) @@ -23,12 +24,7 @@ for maze_id in MAZE_IDS: gym.envs.register( id="Point{}-v0".format(maze_id), entry_point="mujoco_maze.point_maze_env:PointMazeEnv", - kwargs=dict( - maze_id=maze_id, - maze_size_scaling=4, - manual_collision=True, - **_get_kwargs(maze_id), - ), + kwargs=_get_kwargs(maze_id), max_episode_steps=1000, reward_threshold=-1000, ) diff --git a/mujoco_maze/assets/point.xml b/mujoco_maze/assets/point.xml index 5ad5f87..c382e16 100755 --- a/mujoco_maze/assets/point.xml +++ b/mujoco_maze/assets/point.xml @@ -16,8 +16,8 @@ - - + + diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py index fed6c67..6a83afb 100644 --- a/mujoco_maze/maze_env.py +++ b/mujoco_maze/maze_env.py @@ -15,12 +15,13 @@ """Adapted from rllab maze_env.py.""" -import os -import tempfile -import xml.etree.ElementTree as ET +import itertools as it import math import numpy as np import gym +import os +import tempfile +import xml.etree.ElementTree as ET from typing import Callable, Type, Union @@ -34,22 +35,20 @@ MODEL_DIR = os.path.dirname(os.path.abspath(__file__)) + "/assets" class MazeEnv(gym.Env): MODEL_CLASS: Type[AgentModel] = AgentModel - MAZE_HEIGHT = None - MAZE_SIZE_SCALING = None + MANUAL_COLLISION: bool = False def __init__( self, maze_id=None, - maze_height=0.5, - maze_size_scaling=8, n_bins=0, sensor_range=3.0, sensor_span=2 * math.pi, observe_blocks=False, put_spin_near_agent=False, top_down_view=False, - manual_collision=False, dense_reward=True, + maze_height: float = 0.5, + maze_size_scaling: float = 4.0, goal_sampler: Union[str, np.ndarray, Callable[[], np.ndarray]] = "default", *args, **kwargs, @@ -60,8 +59,8 @@ class MazeEnv(gym.Env): tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") - self.MAZE_HEIGHT = height = maze_height - self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling + self._maze_height = height = maze_height + self._maze_size_scaling = size_scaling = maze_size_scaling self.t = 0 # time steps self._n_bins = n_bins self._sensor_range = sensor_range * size_scaling @@ -69,17 +68,16 @@ class MazeEnv(gym.Env): self._observe_blocks = observe_blocks self._put_spin_near_agent = put_spin_near_agent self._top_down_view = top_down_view - self._manual_collision = manual_collision - self.MAZE_STRUCTURE = structure = maze_env_utils.construct_maze( + self._maze_structure = structure = maze_env_utils.construct_maze( maze_id=self._maze_id ) - self.elevated = any( - -1 in row for row in structure - ) # Elevate the maze to allow for falling. + # Elevate the maze to allow for falling. + self.elevated = any(-1 in row for row in structure) + # Are there any movable blocks? self.blocks = any( any(maze_env_utils.can_move(r) for r in row) for row in structure - ) # Are there any movable blocks? + ) torso_x, torso_y = self._find_robot() self._init_torso_x = torso_x @@ -88,13 +86,16 @@ class MazeEnv(gym.Env): (x - torso_x, y - torso_y) for x, y in self._find_all_robots() ] + self._collision = maze_env_utils.Collision( + structure, size_scaling, torso_x, torso_y, + ) + self._xy_to_rowcol = lambda x, y: ( 2 + (y + size_scaling / 2) / size_scaling, 2 + (x + size_scaling / 2) / size_scaling, ) - self._view = np.zeros( - [5, 5, 3] - ) # walls (immovable), chasms (fall), movable blocks + # walls (immovable), chasms (fall), movable blocks + self._view = np.zeros([5, 5, 3]) height_offset = 0.0 if self.elevated: @@ -275,7 +276,7 @@ class MazeEnv(gym.Env): if goal_sampler == "random": self._goal_sampler = lambda: np.random.uniform((-4, -4), (20, 20)) elif goal_sampler == "default": - default_goal = _default_goal(maze_id) + default_goal = _default_goal(maze_id, size_scaling) self._goal_sampler = lambda: default_goal else: raise NotImplementedError(f"Unknown goal_sampler: {goal_sampler}") @@ -357,8 +358,8 @@ class MazeEnv(gym.Env): self._robot_y = robot_y self._robot_ori = self.get_ori() - structure = self.MAZE_STRUCTURE - size_scaling = self.MAZE_SIZE_SCALING + structure = self._maze_structure + size_scaling = self._maze_size_scaling # Draw immovable blocks and chasms. for i in range(len(structure)): @@ -388,9 +389,9 @@ class MazeEnv(gym.Env): robot_x, robot_y, robot_z = self.wrapped_env.get_body_com("torso")[:3] ori = self.get_ori() - structure = self.MAZE_STRUCTURE - size_scaling = self.MAZE_SIZE_SCALING - height = self.MAZE_HEIGHT + structure = self._maze_structure + size_scaling = self._maze_size_scaling + height = self._maze_height segments = [] # Get line segments (corresponding to outer boundary) of each immovable @@ -523,49 +524,28 @@ class MazeEnv(gym.Env): return self.wrapped_env.action_space def _find_robot(self): - structure = self.MAZE_STRUCTURE - size_scaling = self.MAZE_SIZE_SCALING - for i in range(len(structure)): - for j in range(len(structure[0])): - if structure[i][j] == "r": - return j * size_scaling, i * size_scaling - assert False, "No robot in maze specification." + structure = self._maze_structure + size_scaling = self._maze_size_scaling + for i, j in it.product(range(len(structure)), range(len(structure[0]))): + if structure[i][j] == "r": + return j * size_scaling, i * size_scaling + raise ValueError("No robot in maze specification.") def _find_all_robots(self): - structure = self.MAZE_STRUCTURE - size_scaling = self.MAZE_SIZE_SCALING + structure = self._maze_structure + size_scaling = self._maze_size_scaling coords = [] - for i in range(len(structure)): - for j in range(len(structure[0])): - if structure[i][j] == "r": - coords.append((j * size_scaling, i * size_scaling)) + for i, j in it.product(range(len(structure)), range(len(structure[0]))): + if structure[i][j] == "r": + coords.append((j * size_scaling, i * size_scaling)) return coords - def _is_in_collision(self, pos): - x, y = pos - structure = self.MAZE_STRUCTURE - size_scaling = self.MAZE_SIZE_SCALING - for i in range(len(structure)): - for j in range(len(structure[0])): - if structure[i][j] == 1: - minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x - maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x - miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y - maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y - if minx <= x <= maxx and miny <= y <= maxy: - return True - return False - - def _is_in_goal(self, pos): - (np.linalg.norm(obs[:3] - goal) <= 0.6) - def step(self, action): self.t += 1 - if self._manual_collision: + if self.MANUAL_COLLISION: old_pos = self.wrapped_env.get_xy() inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action) - new_pos = self.wrapped_env.get_xy() - if self._is_in_collision(new_pos): + if self._collision.is_in(self.wrapped_env.get_xy()): self.wrapped_env.set_xy(old_pos) else: inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action) @@ -601,12 +581,12 @@ def _reward_fn(maze_id: str, dense: str) -> callable: raise NotImplementedError(f"Unknown maze id: {maze_id}") -def _default_goal(maze_id: str) -> np.ndarray: +def _default_goal(maze_id: str, scale: float) -> np.ndarray: if maze_id == "Maze" or maze_id == "BlockMaze": - return np.array([0.0, 16.0]) + return np.array([0.0, 2.0 * scale]) elif maze_id == "Push": - return np.array([0.0, 19.0]) + return np.array([0.0, 2.375 * scale]) elif maze_id == "Fall": - return np.array([0.0, 27.0, 4.5]) + return np.array([0.0, 3.375 * scale, 4.5]) else: raise NotImplementedError(f"Unknown maze id: {maze_id}") diff --git a/mujoco_maze/maze_env_utils.py b/mujoco_maze/maze_env_utils.py index 5a7667f..91873e4 100644 --- a/mujoco_maze/maze_env_utils.py +++ b/mujoco_maze/maze_env_utils.py @@ -14,10 +14,12 @@ # ============================================================================== """Adapted from rllab maze_env_utils.py.""" +import itertools as it import math +import numpy as np -class Move(object): +class Move: X = 11 Y = 12 Z = 13 @@ -49,10 +51,11 @@ def can_move(movable): def construct_maze(maze_id="Maze"): + R = "r" if maze_id == "Maze": structure = [ [1, 1, 1, 1, 1], - [1, "r", 0, 0, 1], + [1, R, 0, 0, 1], [1, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1], @@ -60,7 +63,7 @@ def construct_maze(maze_id="Maze"): elif maze_id == "Push": structure = [ [1, 1, 1, 1, 1], - [1, 0, "r", 1, 1], + [1, 0, R, 1, 1], [1, 0, Move.XY, 0, 1], [1, 1, 0, 1, 1], [1, 1, 1, 1, 1], @@ -68,26 +71,24 @@ def construct_maze(maze_id="Maze"): elif maze_id == "Fall": structure = [ [1, 1, 1, 1], - [1, "r", 0, 1], + [1, R, 0, 1], [1, 0, Move.YZ, 1], [1, -1, -1, 1], [1, 0, 0, 1], [1, 1, 1, 1], ] elif maze_id == "Block": - O = "r" structure = [ [1, 1, 1, 1, 1], - [1, O, 0, 0, 1], + [1, R, 0, 0, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1], ] elif maze_id == "BlockMaze": - O = "r" structure = [ [1, 1, 1, 1], - [1, O, 0, 1], + [1, R, 0, 1], [1, 1, 0, 1], [1, 0, 0, 1], [1, 1, 1, 1], @@ -98,12 +99,54 @@ def construct_maze(maze_id="Maze"): return structure +class Collision: + """For manual collision detection. + """ + + ARROUND = np.array([[-1, 0], [1, 0], [0, -1], [0, 1]]) + OFFSET = {False: 0.45, True: 0.5} + + def __init__( + self, structure: list, size_scaling: float, torso_x: float, torso_y: float, + ) -> None: + h, w = len(structure), len(structure[0]) + self.objects = [] + + def is_block(pos) -> bool: + i, j = pos + if 0 <= i < h and 0 <= j < w: + return structure[i][j] == 1 + else: + return False + + def offset(pos, index) -> float: + return self.OFFSET[is_block(pos + self.ARROUND[index])] + + for i, j in it.product(range(len(structure)), range(len(structure[0]))): + if structure[i][j] != 1: + continue + pos = np.array([i, j]) + y_base = i * size_scaling - torso_y + min_y = y_base - size_scaling * offset(pos, 0) + max_y = y_base + size_scaling * offset(pos, 1) + x_base = j * size_scaling - torso_x + min_x = x_base - size_scaling * offset(pos, 2) + max_x = x_base + size_scaling * offset(pos, 3) + self.objects.append((min_y, max_y, min_x, max_x)) + + def is_in(self, pos) -> bool: + x, y = pos + for min_y, max_y, min_x, max_x in self.objects: + if min_x <= x <= max_x and min_y <= y <= max_y: + return True + return False + + def line_intersect(pt1, pt2, ptA, ptB): """ - Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html - - this returns the intersection of Line(pt1,pt2) and Line(ptA,ptB) - """ + Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html + Returns the intersection of Line(pt1,pt2) and Line(ptA,ptB). + """ DET_TOLERANCE = 0.00000001 @@ -142,14 +185,13 @@ def line_intersect(pt1, pt2, ptA, ptB): def ray_segment_intersect(ray, segment): """ - Check if the ray originated from (x, y) with direction theta intersects the line segment (x1, y1) -- (x2, y2), - and return the intersection point if there is one - """ + Check if the ray originated from (x, y) with direction theta intersect the line + segment (x1, y1) -- (x2, y2), and return the intersection point if there is one. + """ (x, y), theta = ray # (x1, y1), (x2, y2) = segment pt1 = (x, y) - len = 1 - pt2 = (x + len * math.cos(theta), y + len * math.sin(theta)) + pt2 = (x + math.cos(theta), y + math.sin(theta)) xo, yo, valid, r, s = line_intersect(pt1, pt2, *segment) if valid and r >= 0 and 0 <= s <= 1: return (xo, yo) diff --git a/mujoco_maze/point.py b/mujoco_maze/point.py index 7deb23a..14a8cb7 100644 --- a/mujoco_maze/point.py +++ b/mujoco_maze/point.py @@ -79,7 +79,7 @@ class PointEnv(AgentModel): def get_xy(self): qpos = self.sim.data.qpos - return qpos[0], qpos[0] + return qpos[0], qpos[1] def set_xy(self, xy): qpos = np.copy(self.sim.data.qpos) diff --git a/mujoco_maze/point_maze_env.py b/mujoco_maze/point_maze_env.py index a7706b8..6d92cf8 100644 --- a/mujoco_maze/point_maze_env.py +++ b/mujoco_maze/point_maze_env.py @@ -19,3 +19,4 @@ from mujoco_maze.point import PointEnv class PointMazeEnv(MazeEnv): MODEL_CLASS = PointEnv + MANUAL_COLLISION = True diff --git a/tests/test_envs.py b/tests/test_envs.py index f630385..209ab24 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -5,11 +5,11 @@ import pytest @pytest.mark.parametrize("maze_id", mujoco_maze.MAZE_IDS) def test_ant_maze(maze_id): - env = gym.make("AntMaze{}-v0".format(maze_id)) + env = gym.make("Ant{}-v0".format(maze_id)) assert env.reset().shape == (30,) @pytest.mark.parametrize("maze_id", mujoco_maze.MAZE_IDS) def test_point_maze(maze_id): - env = gym.make("PointMaze{}-v0".format(maze_id)) + env = gym.make("Point{}-v0".format(maze_id)) assert env.reset().shape == (7,)