Rolling
This commit is contained in:
parent
720f535682
commit
ee440ca636
@ -16,19 +16,6 @@ from mujoco_maze.swimmer import SwimmerEnv
|
|||||||
|
|
||||||
for maze_id in TaskRegistry.keys():
|
for maze_id in TaskRegistry.keys():
|
||||||
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
||||||
# Ant
|
|
||||||
gym.envs.register(
|
|
||||||
id=f"Ant{maze_id}-v{i}",
|
|
||||||
entry_point="mujoco_maze.maze_env:MazeEnv",
|
|
||||||
kwargs=dict(
|
|
||||||
model_cls=AntEnv,
|
|
||||||
maze_task=task_cls,
|
|
||||||
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
|
|
||||||
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
|
||||||
),
|
|
||||||
max_episode_steps=1000,
|
|
||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
|
||||||
)
|
|
||||||
# Point
|
# Point
|
||||||
gym.envs.register(
|
gym.envs.register(
|
||||||
id=f"Point{maze_id}-v{i}",
|
id=f"Point{maze_id}-v{i}",
|
||||||
@ -42,6 +29,21 @@ for maze_id in TaskRegistry.keys():
|
|||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
)
|
)
|
||||||
|
if "Rolling" in maze_id:
|
||||||
|
continue
|
||||||
|
# Ant
|
||||||
|
gym.envs.register(
|
||||||
|
id=f"Ant{maze_id}-v{i}",
|
||||||
|
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||||
|
kwargs=dict(
|
||||||
|
model_cls=AntEnv,
|
||||||
|
maze_task=task_cls,
|
||||||
|
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
|
||||||
|
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||||
|
),
|
||||||
|
max_episode_steps=1000,
|
||||||
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
|
)
|
||||||
skip_swimmer = False
|
skip_swimmer = False
|
||||||
for inhibited in ["Fall", "Push", "Block"]:
|
for inhibited in ["Fall", "Push", "Block"]:
|
||||||
if inhibited in maze_id:
|
if inhibited in maze_id:
|
||||||
|
@ -48,6 +48,8 @@ class MazeEnv(gym.Env):
|
|||||||
self.t = 0 # time steps
|
self.t = 0 # time steps
|
||||||
self._observe_blocks = self._task.OBSERVE_BLOCKS
|
self._observe_blocks = self._task.OBSERVE_BLOCKS
|
||||||
self._put_spin_near_agent = self._task.PUT_SPIN_NEAR_AGENT
|
self._put_spin_near_agent = self._task.PUT_SPIN_NEAR_AGENT
|
||||||
|
# Observe other objectives
|
||||||
|
self._observe_balls = self._task.OBSERVE_BALLS
|
||||||
self._top_down_view = self._task.TOP_DOWN_VIEW
|
self._top_down_view = self._task.TOP_DOWN_VIEW
|
||||||
self._restitution_coef = restitution_coef
|
self._restitution_coef = restitution_coef
|
||||||
|
|
||||||
@ -93,17 +95,17 @@ class MazeEnv(gym.Env):
|
|||||||
default.find(".//geom").set("solimp", ".995 .995 .01")
|
default.find(".//geom").set("solimp", ".995 .995 .01")
|
||||||
|
|
||||||
self.movable_blocks = []
|
self.movable_blocks = []
|
||||||
|
self.object_balls = []
|
||||||
for i in range(len(structure)):
|
for i in range(len(structure)):
|
||||||
for j in range(len(structure[0])):
|
for j in range(len(structure[0])):
|
||||||
struct = structure[i][j]
|
struct = structure[i][j]
|
||||||
if struct.is_robot() and self._put_spin_near_agent:
|
if struct.is_robot() and self._put_spin_near_agent:
|
||||||
struct = maze_env_utils.MazeCell.SpinXY
|
struct = maze_env_utils.MazeCell.SPIN
|
||||||
|
x, y = j * size_scaling - torso_x, i * size_scaling - torso_y
|
||||||
|
h = height / 2 * size_scaling
|
||||||
|
size = size_scaling * 0.5
|
||||||
if self.elevated and not struct.is_chasm():
|
if self.elevated and not struct.is_chasm():
|
||||||
# Create elevated platform.
|
# Create elevated platform.
|
||||||
x = j * size_scaling - torso_x
|
|
||||||
y = i * size_scaling - torso_y
|
|
||||||
h = height / 2 * size_scaling
|
|
||||||
size = 0.5 * size_scaling
|
|
||||||
ET.SubElement(
|
ET.SubElement(
|
||||||
worldbody,
|
worldbody,
|
||||||
"geom",
|
"geom",
|
||||||
@ -119,10 +121,6 @@ class MazeEnv(gym.Env):
|
|||||||
if struct.is_block():
|
if struct.is_block():
|
||||||
# Unmovable block.
|
# Unmovable block.
|
||||||
# Offset all coordinates so that robot starts at the origin.
|
# Offset all coordinates so that robot starts at the origin.
|
||||||
x = j * size_scaling - torso_x
|
|
||||||
y = i * size_scaling - torso_y
|
|
||||||
h = height / 2 * size_scaling
|
|
||||||
size = 0.5 * size_scaling
|
|
||||||
ET.SubElement(
|
ET.SubElement(
|
||||||
worldbody,
|
worldbody,
|
||||||
"geom",
|
"geom",
|
||||||
@ -137,92 +135,14 @@ class MazeEnv(gym.Env):
|
|||||||
)
|
)
|
||||||
elif struct.can_move():
|
elif struct.can_move():
|
||||||
# Movable block.
|
# Movable block.
|
||||||
# The "falling" blocks are shrunk slightly and increased in mass to
|
self.movable_blocks.append(f"movable_{i}_{j}")
|
||||||
# ensure it can fall easily through a gap in the platform blocks.
|
_add_movable_block(
|
||||||
name = "movable_%d_%d" % (i, j)
|
worldbody, struct, i, j, size_scaling, x, y, h, height_offset,
|
||||||
self.movable_blocks.append((name, struct))
|
|
||||||
falling = struct.can_move_z()
|
|
||||||
spinning = struct.can_spin()
|
|
||||||
shrink = 0.1 if spinning else 0.99 if falling else 1.0
|
|
||||||
height_shrink = 0.1 if spinning else 1.0
|
|
||||||
x_offset = 0.25 * size_scaling if spinning else 0.0
|
|
||||||
x = j * size_scaling - torso_x + x_offset
|
|
||||||
y = i * size_scaling - torso_y
|
|
||||||
h = height / 2 * size_scaling * height_shrink
|
|
||||||
size = 0.5 * size_scaling * shrink
|
|
||||||
movable_body = ET.SubElement(
|
|
||||||
worldbody,
|
|
||||||
"body",
|
|
||||||
name=name,
|
|
||||||
pos=f"{x} {y} {height_offset + h}",
|
|
||||||
)
|
)
|
||||||
ET.SubElement(
|
elif struct.is_object_ball():
|
||||||
movable_body,
|
# Movable Ball
|
||||||
"geom",
|
self.object_balls.append(f"objball_{i}_{j}")
|
||||||
name=f"block_{i}_{j}",
|
_add_object_ball(worldbody, i, j, x, y)
|
||||||
pos="0 0 0",
|
|
||||||
size=f"{size} {size} {h}",
|
|
||||||
type="box",
|
|
||||||
material="",
|
|
||||||
mass="0.001" if falling else "0.0002",
|
|
||||||
contype="1",
|
|
||||||
conaffinity="1",
|
|
||||||
rgba="0.9 0.1 0.1 1",
|
|
||||||
)
|
|
||||||
if struct.can_move_x():
|
|
||||||
ET.SubElement(
|
|
||||||
movable_body,
|
|
||||||
"joint",
|
|
||||||
armature="0",
|
|
||||||
axis="1 0 0",
|
|
||||||
damping="0.0",
|
|
||||||
limited="true" if falling else "false",
|
|
||||||
range=f"{-size_scaling} {size_scaling}",
|
|
||||||
margin="0.01",
|
|
||||||
name=f"movable_x_{i}_{j}",
|
|
||||||
pos="0 0 0",
|
|
||||||
type="slide",
|
|
||||||
)
|
|
||||||
if struct.can_move_y():
|
|
||||||
ET.SubElement(
|
|
||||||
movable_body,
|
|
||||||
"joint",
|
|
||||||
armature="0",
|
|
||||||
axis="0 1 0",
|
|
||||||
damping="0.0",
|
|
||||||
limited="true" if falling else "false",
|
|
||||||
range=f"{-size_scaling} {size_scaling}",
|
|
||||||
margin="0.01",
|
|
||||||
name=f"movable_y_{i}_{j}",
|
|
||||||
pos="0 0 0",
|
|
||||||
type="slide",
|
|
||||||
)
|
|
||||||
if struct.can_move_z():
|
|
||||||
ET.SubElement(
|
|
||||||
movable_body,
|
|
||||||
"joint",
|
|
||||||
armature="0",
|
|
||||||
axis="0 0 1",
|
|
||||||
damping="0.0",
|
|
||||||
limited="true",
|
|
||||||
range=f"{-height_offset} 0",
|
|
||||||
margin="0.01",
|
|
||||||
name=f"movable_z_{i}_{j}",
|
|
||||||
pos="0 0 0",
|
|
||||||
type="slide",
|
|
||||||
)
|
|
||||||
if struct.can_spin():
|
|
||||||
ET.SubElement(
|
|
||||||
movable_body,
|
|
||||||
"joint",
|
|
||||||
armature="0",
|
|
||||||
axis="0 0 1",
|
|
||||||
damping="0.0",
|
|
||||||
limited="false",
|
|
||||||
name=f"spinable_{i}_{j}",
|
|
||||||
pos="0 0 0",
|
|
||||||
type="ball",
|
|
||||||
)
|
|
||||||
|
|
||||||
torso = tree.find(".//body[@name='torso']")
|
torso = tree.find(".//body[@name='torso']")
|
||||||
geoms = torso.findall(".//geom")
|
geoms = torso.findall(".//geom")
|
||||||
@ -231,17 +151,19 @@ class MazeEnv(gym.Env):
|
|||||||
raise Exception("Every geom of the torso must have a name")
|
raise Exception("Every geom of the torso must have a name")
|
||||||
|
|
||||||
# Set goals
|
# Set goals
|
||||||
asset = tree.find(".//asset")
|
|
||||||
for i, goal in enumerate(self._task.goals):
|
for i, goal in enumerate(self._task.goals):
|
||||||
ET.SubElement(asset, "material", name=f"goal{i}", rgba=goal.rbga_str())
|
|
||||||
z = goal.pos[2] if goal.dim >= 3 else 0.0
|
z = goal.pos[2] if goal.dim >= 3 else 0.0
|
||||||
|
if goal.custom_size is None:
|
||||||
|
size = f"{maze_size_scaling * 0.1}"
|
||||||
|
else:
|
||||||
|
size = f"{goal.custom_size}"
|
||||||
ET.SubElement(
|
ET.SubElement(
|
||||||
worldbody,
|
worldbody,
|
||||||
"site",
|
"site",
|
||||||
name=f"goal_site{i}",
|
name=f"goal_site{i}",
|
||||||
pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
|
pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
|
||||||
size=f"{maze_size_scaling * 0.1}",
|
size=f"{maze_size_scaling * 0.1}",
|
||||||
material=f"goal{i}",
|
rgba=goal.rbga_str(),
|
||||||
)
|
)
|
||||||
|
|
||||||
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
|
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
|
||||||
@ -252,7 +174,7 @@ class MazeEnv(gym.Env):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def has_extended_obs(self) -> bool:
|
def has_extended_obs(self) -> bool:
|
||||||
return self._top_down_view or self._observe_blocks
|
return self._top_down_view or self._observe_blocks or self._observe_balls
|
||||||
|
|
||||||
def get_ori(self) -> float:
|
def get_ori(self) -> float:
|
||||||
return self.wrapped_env.get_ori()
|
return self.wrapped_env.get_ori()
|
||||||
@ -367,7 +289,7 @@ class MazeEnv(gym.Env):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Draw movable blocks.
|
# Draw movable blocks.
|
||||||
for block_name, block_type in self.movable_blocks:
|
for block_name in self.movable_blocks:
|
||||||
block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2]
|
block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2]
|
||||||
update_view(block_x, block_y, 2)
|
update_view(block_x, block_y, 2)
|
||||||
|
|
||||||
@ -380,15 +302,18 @@ class MazeEnv(gym.Env):
|
|||||||
else:
|
else:
|
||||||
view = []
|
view = []
|
||||||
|
|
||||||
if self._observe_blocks:
|
additional_obs = []
|
||||||
additional_obs = []
|
|
||||||
for block_name, block_type in self.movable_blocks:
|
|
||||||
additional_obs.append(self.wrapped_env.get_body_com(block_name))
|
|
||||||
wrapped_obs = np.concatenate(
|
|
||||||
[wrapped_obs[:3]] + additional_obs + [wrapped_obs[3:]]
|
|
||||||
)
|
|
||||||
|
|
||||||
return np.concatenate([wrapped_obs, *view, np.array([self.t * 0.001])])
|
if self._observe_balls:
|
||||||
|
for name in self.object_balls:
|
||||||
|
additional_obs.append(self.wrapped_env.get_body_com(name))
|
||||||
|
|
||||||
|
if self._observe_blocks:
|
||||||
|
for name in self.movable_blocks:
|
||||||
|
additional_obs.append(self.wrapped_env.get_body_com(name))
|
||||||
|
|
||||||
|
obs = np.concatenate([wrapped_obs[:3]] + additional_obs + [wrapped_obs[3:]])
|
||||||
|
return np.concatenate([obs, *view, np.array([self.t * 0.001])])
|
||||||
|
|
||||||
def reset(self) -> np.ndarray:
|
def reset(self) -> np.ndarray:
|
||||||
self.t = 0
|
self.t = 0
|
||||||
@ -461,3 +386,142 @@ class MazeEnv(gym.Env):
|
|||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
self.wrapped_env.close()
|
self.wrapped_env.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None:
|
||||||
|
body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",)
|
||||||
|
ET.SubElement(
|
||||||
|
body,
|
||||||
|
"geom",
|
||||||
|
type="sphere",
|
||||||
|
name=f"objball_geom_{i}_{j}",
|
||||||
|
size="1",
|
||||||
|
pos="0.0 0.0 0.5",
|
||||||
|
rgba="0.1 0.1 0.7 1",
|
||||||
|
contype="1",
|
||||||
|
conaffinity="1",
|
||||||
|
mass="0.00004",
|
||||||
|
solimp="0.9 0.99 0.001"
|
||||||
|
)
|
||||||
|
ET.SubElement(
|
||||||
|
body,
|
||||||
|
"joint",
|
||||||
|
name=f"objball_x_{i}_{j}",
|
||||||
|
axis="1 0 0",
|
||||||
|
pos="0 0 0.0",
|
||||||
|
type="slide",
|
||||||
|
)
|
||||||
|
ET.SubElement(
|
||||||
|
body,
|
||||||
|
"joint",
|
||||||
|
name=f"objball_y_{i}_{j}",
|
||||||
|
axis="0 1 0",
|
||||||
|
pos="0 0 0",
|
||||||
|
type="slide",
|
||||||
|
)
|
||||||
|
ET.SubElement(
|
||||||
|
body,
|
||||||
|
"joint",
|
||||||
|
name=f"objball_rot_{i}_{j}",
|
||||||
|
axis="0 0 1",
|
||||||
|
pos="0 0 0",
|
||||||
|
type="hinge",
|
||||||
|
limited="false",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_movable_block(
|
||||||
|
worldbody: ET.Element,
|
||||||
|
struct: maze_env_utils.MazeCell,
|
||||||
|
i: str,
|
||||||
|
j: str,
|
||||||
|
size_scaling: float,
|
||||||
|
x: float,
|
||||||
|
y: float,
|
||||||
|
h: float,
|
||||||
|
height_offset: float,
|
||||||
|
) -> None:
|
||||||
|
falling = struct.can_move_z()
|
||||||
|
if struct.can_spin():
|
||||||
|
h *= 0.1
|
||||||
|
x += size_scaling * 0.25
|
||||||
|
shrink = 0.1
|
||||||
|
elif falling:
|
||||||
|
# The "falling" blocks are shrunk slightly and increased in mass to
|
||||||
|
# ensure it can fall easily through a gap in the platform blocks.
|
||||||
|
shrink = 0.99
|
||||||
|
elif struct.is_half_block():
|
||||||
|
shrink = 0.5
|
||||||
|
else:
|
||||||
|
shrink = 1.0
|
||||||
|
size = size_scaling * 0.5 * shrink
|
||||||
|
movable_body = ET.SubElement(
|
||||||
|
worldbody, "body", name=f"movable_{i}_{j}", pos=f"{x} {y} {h}",
|
||||||
|
)
|
||||||
|
ET.SubElement(
|
||||||
|
movable_body,
|
||||||
|
"geom",
|
||||||
|
name=f"block_{i}_{j}",
|
||||||
|
pos="0 0 0",
|
||||||
|
size=f"{size} {size} {h}",
|
||||||
|
type="box",
|
||||||
|
material="",
|
||||||
|
mass="0.001" if falling else "0.0002",
|
||||||
|
contype="1",
|
||||||
|
conaffinity="1",
|
||||||
|
rgba="0.9 0.1 0.1 1",
|
||||||
|
)
|
||||||
|
if struct.can_move_x():
|
||||||
|
ET.SubElement(
|
||||||
|
movable_body,
|
||||||
|
"joint",
|
||||||
|
axis="1 0 0",
|
||||||
|
name=f"movable_x_{i}_{j}",
|
||||||
|
armature="0",
|
||||||
|
damping="0.0",
|
||||||
|
limited="true" if falling else "false",
|
||||||
|
range=f"{-size_scaling} {size_scaling}",
|
||||||
|
margin="0.01",
|
||||||
|
pos="0 0 0",
|
||||||
|
type="slide",
|
||||||
|
)
|
||||||
|
if struct.can_move_y():
|
||||||
|
ET.SubElement(
|
||||||
|
movable_body,
|
||||||
|
"joint",
|
||||||
|
armature="0",
|
||||||
|
axis="0 1 0",
|
||||||
|
damping="0.0",
|
||||||
|
limited="true" if falling else "false",
|
||||||
|
range=f"{-size_scaling} {size_scaling}",
|
||||||
|
margin="0.01",
|
||||||
|
name=f"movable_y_{i}_{j}",
|
||||||
|
pos="0 0 0",
|
||||||
|
type="slide",
|
||||||
|
)
|
||||||
|
if struct.can_move_z():
|
||||||
|
ET.SubElement(
|
||||||
|
movable_body,
|
||||||
|
"joint",
|
||||||
|
armature="0",
|
||||||
|
axis="0 0 1",
|
||||||
|
damping="0.0",
|
||||||
|
limited="true",
|
||||||
|
range=f"{-height_offset} 0",
|
||||||
|
margin="0.01",
|
||||||
|
name=f"movable_z_{i}_{j}",
|
||||||
|
pos="0 0 0",
|
||||||
|
type="slide",
|
||||||
|
)
|
||||||
|
if struct.can_spin():
|
||||||
|
ET.SubElement(
|
||||||
|
movable_body,
|
||||||
|
"joint",
|
||||||
|
armature="0",
|
||||||
|
axis="0 0 1",
|
||||||
|
damping="0.0",
|
||||||
|
limited="false",
|
||||||
|
name=f"spinable_{i}_{j}",
|
||||||
|
pos="0 0 0",
|
||||||
|
type="ball",
|
||||||
|
)
|
||||||
|
@ -23,15 +23,14 @@ class MazeCell(Enum):
|
|||||||
EMPTY = 0
|
EMPTY = 0
|
||||||
BLOCK = 1
|
BLOCK = 1
|
||||||
CHASM = 2
|
CHASM = 2
|
||||||
|
OBJECT_BALL = 3
|
||||||
# Moves
|
# Moves
|
||||||
X = 11
|
XY_BLOCK = 14
|
||||||
Y = 12
|
XZ_BLOCK = 15
|
||||||
Z = 13
|
YZ_BLOCK = 16
|
||||||
XY = 14
|
XYZ_BLOCK = 17
|
||||||
XZ = 15
|
XY_HALF_BLOCK = 18
|
||||||
YZ = 16
|
SPIN = 19
|
||||||
XYZ = 17
|
|
||||||
SpinXY = 18
|
|
||||||
|
|
||||||
def is_block(self) -> bool:
|
def is_block(self) -> bool:
|
||||||
return self == self.BLOCK
|
return self == self.BLOCK
|
||||||
@ -39,6 +38,9 @@ class MazeCell(Enum):
|
|||||||
def is_chasm(self) -> bool:
|
def is_chasm(self) -> bool:
|
||||||
return self == self.CHASM
|
return self == self.CHASM
|
||||||
|
|
||||||
|
def is_object_ball(self) -> bool:
|
||||||
|
return self == self.OBJECT_BALL
|
||||||
|
|
||||||
def is_empty(self) -> bool:
|
def is_empty(self) -> bool:
|
||||||
return self == self.ROBOT or self == self.EMPTY
|
return self == self.ROBOT or self == self.EMPTY
|
||||||
|
|
||||||
@ -50,31 +52,34 @@ class MazeCell(Enum):
|
|||||||
|
|
||||||
def can_move_x(self) -> bool:
|
def can_move_x(self) -> bool:
|
||||||
return self in [
|
return self in [
|
||||||
self.X,
|
self.XY_BLOCK,
|
||||||
self.XY,
|
self.XY_HALF_BLOCK,
|
||||||
self.XZ,
|
self.XZ_BLOCK,
|
||||||
self.XYZ,
|
self.XYZ_BLOCK,
|
||||||
self.SpinXY,
|
self.SPIN,
|
||||||
]
|
]
|
||||||
|
|
||||||
def can_move_y(self):
|
def can_move_y(self) -> bool:
|
||||||
return self in [
|
return self in [
|
||||||
self.Y,
|
self.XY_BLOCK,
|
||||||
self.XY,
|
self.XY_HALF_BLOCK,
|
||||||
self.YZ,
|
self.YZ_BLOCK,
|
||||||
self.XYZ,
|
self.XYZ_BLOCK,
|
||||||
self.SpinXY,
|
self.SPIN,
|
||||||
]
|
]
|
||||||
|
|
||||||
def can_move_z(self):
|
def can_move_z(self) -> bool:
|
||||||
return self in [self.Z, self.XZ, self.YZ, self.XYZ]
|
return self in [self.XZ_BLOCK, self.YZ_BLOCK, self.XYZ_BLOCK]
|
||||||
|
|
||||||
def can_spin(self):
|
def can_spin(self) -> bool:
|
||||||
return self == self.SpinXY
|
return self == self.SPIN
|
||||||
|
|
||||||
def can_move(self):
|
def can_move(self) -> bool:
|
||||||
return self.can_move_x() or self.can_move_y() or self.can_move_z()
|
return self.can_move_x() or self.can_move_y() or self.can_move_z()
|
||||||
|
|
||||||
|
def is_half_block(self) -> bool:
|
||||||
|
return self in [self.XY_HALF_BLOCK]
|
||||||
|
|
||||||
|
|
||||||
class Line:
|
class Line:
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -21,23 +21,28 @@ BLUE = Rgb(0.1, 0.1, 0.7)
|
|||||||
|
|
||||||
|
|
||||||
class MazeGoal:
|
class MazeGoal:
|
||||||
THRESHOLD: float = 0.6
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, pos: np.ndarray, reward_scale: float = 1.0, rgb: Rgb = RED
|
self,
|
||||||
|
pos: np.ndarray,
|
||||||
|
reward_scale: float = 1.0,
|
||||||
|
rgb: Rgb = RED,
|
||||||
|
threshold: float = 0.6,
|
||||||
|
custom_size: Optional[float] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
assert 0.0 <= reward_scale <= 1.0
|
assert 0.0 <= reward_scale <= 1.0
|
||||||
self.pos = pos
|
self.pos = pos
|
||||||
self.dim = pos.shape[0]
|
self.dim = pos.shape[0]
|
||||||
self.reward_scale = reward_scale
|
self.reward_scale = reward_scale
|
||||||
self.rgb = rgb
|
self.rgb = rgb
|
||||||
|
self.threshold = threshold
|
||||||
|
self.custom_size = custom_size
|
||||||
|
|
||||||
def rbga_str(self) -> str:
|
def rbga_str(self) -> str:
|
||||||
r, g, b = self.rgb
|
r, g, b = self.rgb
|
||||||
return f"{r} {g} {b} 1"
|
return f"{r} {g} {b} 1"
|
||||||
|
|
||||||
def neighbor(self, obs: np.ndarray) -> float:
|
def neighbor(self, obs: np.ndarray) -> float:
|
||||||
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.THRESHOLD
|
return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
|
||||||
|
|
||||||
def euc_dist(self, obs: np.ndarray) -> float:
|
def euc_dist(self, obs: np.ndarray) -> float:
|
||||||
return np.sum(np.square(obs[: self.dim] - self.pos)) ** 0.5
|
return np.sum(np.square(obs[: self.dim] - self.pos)) ** 0.5
|
||||||
@ -56,6 +61,7 @@ class MazeTask(ABC):
|
|||||||
INNER_REWARD_SCALING: float = 0.01
|
INNER_REWARD_SCALING: float = 0.01
|
||||||
TOP_DOWN_VIEW: bool = False
|
TOP_DOWN_VIEW: bool = False
|
||||||
OBSERVE_BLOCKS: bool = False
|
OBSERVE_BLOCKS: bool = False
|
||||||
|
OBSERVE_BALLS: bool = False
|
||||||
PUT_SPIN_NEAR_AGENT: bool = False
|
PUT_SPIN_NEAR_AGENT: bool = False
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
@ -145,11 +151,11 @@ class GoalRewardPush(GoalRewardUMaze):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_maze() -> List[List[MazeCell]]:
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
|
E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY_BLOCK
|
||||||
return [
|
return [
|
||||||
[B, B, B, B, B],
|
[B, B, B, B, B],
|
||||||
[B, E, R, B, B],
|
[B, E, R, B, B],
|
||||||
[B, E, MazeCell.XY, E, B],
|
[B, E, M, E, B],
|
||||||
[B, B, E, B, B],
|
[B, B, E, B, B],
|
||||||
[B, B, B, B, B],
|
[B, B, B, B, B],
|
||||||
]
|
]
|
||||||
@ -169,10 +175,11 @@ class GoalRewardFall(GoalRewardUMaze):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def create_maze() -> List[List[MazeCell]]:
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT
|
E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT
|
||||||
|
M = MazeCell.YZ_BLOCK
|
||||||
return [
|
return [
|
||||||
[B, B, B, B],
|
[B, B, B, B],
|
||||||
[B, R, E, B],
|
[B, R, E, B],
|
||||||
[B, E, MazeCell.YZ, B],
|
[B, E, M, B],
|
||||||
[B, C, C, B],
|
[B, C, C, B],
|
||||||
[B, E, E, B],
|
[B, E, E, B],
|
||||||
[B, B, B, B],
|
[B, B, B, B],
|
||||||
@ -183,26 +190,6 @@ class DistRewardFall(GoalRewardFall, DistRewardMixIn):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class GoalRewardFall(GoalRewardUMaze):
|
|
||||||
TOP_DOWN_VIEW = True
|
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
|
||||||
super().__init__(scale)
|
|
||||||
self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create_maze() -> List[List[MazeCell]]:
|
|
||||||
E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT
|
|
||||||
return [
|
|
||||||
[B, B, B, B],
|
|
||||||
[B, R, E, B],
|
|
||||||
[B, E, MazeCell.YZ, B],
|
|
||||||
[B, C, C, B],
|
|
||||||
[B, E, E, B],
|
|
||||||
[B, B, B, B],
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class GoalReward2Rooms(MazeTask):
|
class GoalReward2Rooms(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
PENALTY: float = -0.0001
|
PENALTY: float = -0.0001
|
||||||
@ -291,13 +278,9 @@ class GoalRewardTRoom(MazeTask):
|
|||||||
PENALTY: float = -0.0001
|
PENALTY: float = -0.0001
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
|
||||||
self, scale: float, goals: List[Tuple[float, float]] = [(2.0, -3.0)],
|
|
||||||
) -> None:
|
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
self.goals = []
|
self.goals = [MazeGoal(np.array(goal) * scale)]
|
||||||
for x, y in goals:
|
|
||||||
self.goals.append(MazeGoal(np.array([x * scale, y * scale])))
|
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
def reward(self, obs: np.ndarray) -> float:
|
||||||
for goal in self.goals:
|
for goal in self.goals:
|
||||||
@ -331,7 +314,8 @@ class GoalRewardBlockMaze(GoalRewardUMaze):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_maze() -> List[List[MazeCell]]:
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY
|
E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
|
||||||
|
M = MazeCell.XY_BLOCK
|
||||||
return [
|
return [
|
||||||
[B, B, B, B, B],
|
[B, B, B, B, B],
|
||||||
[B, R, E, E, B],
|
[B, R, E, E, B],
|
||||||
@ -346,6 +330,41 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GoalRewardRolling(MazeTask):
|
||||||
|
REWARD_THRESHOLD: float = 0.9
|
||||||
|
PENALTY: float = -0.0001
|
||||||
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
|
||||||
|
OBSERVE_BALLS: bool = True
|
||||||
|
|
||||||
|
def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None:
|
||||||
|
super().__init__(scale)
|
||||||
|
goal = np.array(goal) * scale
|
||||||
|
self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)]
|
||||||
|
|
||||||
|
def reward(self, obs: np.ndarray) -> float:
|
||||||
|
return 1.0 if self.termination(obs) else self.PENALTY
|
||||||
|
|
||||||
|
def termination(self, obs: np.ndarray) -> bool:
|
||||||
|
return super().termination(obs[3:6])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
|
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
||||||
|
R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
|
||||||
|
return [
|
||||||
|
[B, B, B, B, B],
|
||||||
|
[B, E, E, E, B],
|
||||||
|
[B, E, M, E, B],
|
||||||
|
[B, E, R, E, B],
|
||||||
|
[B, B, B, B, B],
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class DistRewardRolling(GoalRewardRolling):
|
||||||
|
def reward(self, obs: np.ndarray) -> float:
|
||||||
|
return -self.goals[0].euc_dist(obs[3:6]) / self.scale
|
||||||
|
|
||||||
|
|
||||||
class TaskRegistry:
|
class TaskRegistry:
|
||||||
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
||||||
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
|
"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
|
||||||
@ -356,6 +375,7 @@ class TaskRegistry:
|
|||||||
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
||||||
"TRoom": [DistRewardTRoom, GoalRewardTRoom],
|
"TRoom": [DistRewardTRoom, GoalRewardTRoom],
|
||||||
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
|
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
|
||||||
|
"Rolling": [DistRewardRolling, GoalRewardRolling],
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -6,11 +6,13 @@ import mujoco_maze
|
|||||||
|
|
||||||
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
||||||
def test_ant_maze(maze_id):
|
def test_ant_maze(maze_id):
|
||||||
|
if "Rolling" in maze_id:
|
||||||
|
return
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
env = gym.make(f"Ant{maze_id}-v{i}")
|
env = gym.make(f"Ant{maze_id}-v{i}")
|
||||||
s0 = env.reset()
|
s0 = env.reset()
|
||||||
s, _, _, _ = env.step(env.action_space.sample())
|
s, _, _, _ = env.step(env.action_space.sample())
|
||||||
if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks:
|
if not env.unwrapped.has_extended_obs:
|
||||||
assert s0.shape == (30,)
|
assert s0.shape == (30,)
|
||||||
assert s.shape == (30,)
|
assert s.shape == (30,)
|
||||||
|
|
||||||
@ -21,9 +23,12 @@ def test_point_maze(maze_id):
|
|||||||
env = gym.make(f"Point{maze_id}-v{i}")
|
env = gym.make(f"Point{maze_id}-v{i}")
|
||||||
s0 = env.reset()
|
s0 = env.reset()
|
||||||
s, r, _, _ = env.step(env.action_space.sample())
|
s, r, _, _ = env.step(env.action_space.sample())
|
||||||
if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks:
|
if not env.unwrapped.has_extended_obs:
|
||||||
assert s0.shape == (7,)
|
assert s0.shape == (7,)
|
||||||
assert s.shape == (7,)
|
assert s.shape == (7,)
|
||||||
|
if env.unwrapped._observe_balls:
|
||||||
|
assert s0.shape == (10,)
|
||||||
|
assert s.shape == (10,)
|
||||||
if i == 0:
|
if i == 0:
|
||||||
assert r != 0.0
|
assert r != 0.0
|
||||||
else:
|
else:
|
||||||
@ -33,42 +38,35 @@ def test_point_maze(maze_id):
|
|||||||
|
|
||||||
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
||||||
def test_reacher_maze(maze_id):
|
def test_reacher_maze(maze_id):
|
||||||
for inhibited in ["Fall", "Push", "Block"]:
|
for inhibited in ["Fall", "Push", "Block", "Rolling"]:
|
||||||
if inhibited in maze_id:
|
if inhibited in maze_id:
|
||||||
return
|
return
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
env = gym.make(f"Reacher{maze_id}-v{i}")
|
env = gym.make(f"Reacher{maze_id}-v{i}")
|
||||||
s0 = env.reset()
|
s0 = env.reset()
|
||||||
s, _, _, _ = env.step(env.action_space.sample())
|
s, _, _, _ = env.step(env.action_space.sample())
|
||||||
if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks:
|
if not env.unwrapped.has_extended_obs:
|
||||||
assert s0.shape == (9,)
|
assert s0.shape == (9,)
|
||||||
assert s.shape == (9,)
|
assert s.shape == (9,)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
|
||||||
def test_swimmer_maze(maze_id):
|
def test_swimmer_maze(maze_id):
|
||||||
for inhibited in ["Fall", "Push", "Block"]:
|
for inhibited in ["Fall", "Push", "Block", "Rolling"]:
|
||||||
if inhibited in maze_id:
|
if inhibited in maze_id:
|
||||||
return
|
return
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
env = gym.make(f"Swimmer{maze_id}-v{i}")
|
env = gym.make(f"Swimmer{maze_id}-v{i}")
|
||||||
s0 = env.reset()
|
s0 = env.reset()
|
||||||
s, _, _, _ = env.step(env.action_space.sample())
|
s, _, _, _ = env.step(env.action_space.sample())
|
||||||
if not env.unwrapped._top_down_view and not env.unwrapped._observe_blocks:
|
if not env.unwrapped.has_extended_obs:
|
||||||
assert s0.shape == (11,)
|
assert s0.shape == (11,)
|
||||||
assert s.shape == (11,)
|
assert s.shape == (11,)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("v", [0, 1])
|
@pytest.mark.parametrize("v", [0, 1])
|
||||||
def test_maze_args(v):
|
def test_maze_args(v):
|
||||||
env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goals": [(-2.0, -3.0)]})
|
env = gym.make(f"PointTRoom-v{v}", task_kwargs={"goal": (-2.0, -3.0)})
|
||||||
assert env.reset().shape == (7,)
|
|
||||||
s, _, _, _ = env.step(env.action_space.sample())
|
|
||||||
assert s.shape == (7,)
|
|
||||||
|
|
||||||
|
|
||||||
def test_getting_movable(v):
|
|
||||||
env = gym.make("PointBlockMaze-v1")
|
|
||||||
assert env.reset().shape == (7,)
|
assert env.reset().shape == (7,)
|
||||||
s, _, _, _ = env.step(env.action_space.sample())
|
s, _, _, _ = env.step(env.action_space.sample())
|
||||||
assert s.shape == (7,)
|
assert s.shape == (7,)
|
||||||
|
Loading…
Reference in New Issue
Block a user