Scaling=None if the environment is not supported

This commit is contained in:
kngwyu 2020-10-05 13:52:21 +09:00
parent 3384934aff
commit bf4e5b1e97
2 changed files with 63 additions and 65 deletions

View File

@ -16,6 +16,8 @@ from mujoco_maze.swimmer import SwimmerEnv
for maze_id in TaskRegistry.keys():
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
point_scale = task_cls.MAZE_SIZE_SCALING.point
if point_scale is not None:
# Point
gym.envs.register(
id=f"Point{maze_id}-v{i}",
@ -23,14 +25,15 @@ for maze_id in TaskRegistry.keys():
kwargs=dict(
model_cls=PointEnv,
maze_task=task_cls,
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.point,
maze_size_scaling=point_scale,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
),
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
if "Billiard" in maze_id:
continue
ant_scale = task_cls.MAZE_SIZE_SCALING.ant
if ant_scale is not None:
# Ant
gym.envs.register(
id=f"Ant{maze_id}-v{i}",
@ -38,20 +41,15 @@ for maze_id in TaskRegistry.keys():
kwargs=dict(
model_cls=AntEnv,
maze_task=task_cls,
maze_size_scaling=task_cls.MAZE_SIZE_SCALING.ant,
maze_size_scaling=ant_scale,
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
),
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
skip_swimmer = False
for inhibited in ["Fall", "Push", "Block"]:
if inhibited in maze_id:
skip_swimmer = True
if skip_swimmer:
continue
swimmer_scale = task_cls.MAZE_SIZE_SCALING.swimmer
if swimmer_scale is not None:
# Reacher
gym.envs.register(
id=f"Reacher{maze_id}-v{i}",
@ -65,7 +63,6 @@ for maze_id in TaskRegistry.keys():
max_episode_steps=1000,
reward_threshold=task_cls.REWARD_THRESHOLD,
)
# Swimmer
gym.envs.register(
id=f"Swimmer{maze_id}-v{i}",

View File

@ -48,9 +48,9 @@ class MazeGoal:
class Scaling(NamedTuple):
ant: float
point: float
swimmer: float
ant: Optional[float]
point: Optional[float]
swimmer: Optional[float]
class MazeTask(ABC):
@ -330,6 +330,7 @@ class SubGoalTRoom(GoalRewardTRoom):
class GoalRewardBlockMaze(GoalRewardUMaze):
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None)
OBSERVE_BLOCKS: bool = True
def __init__(self, scale: float) -> None:
@ -357,7 +358,7 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
class GoalRewardBilliard(MazeTask):
REWARD_THRESHOLD: float = 0.9
PENALTY: float = -0.0001
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
MAZE_SIZE_SCALING: Scaling = Scaling(None, 3.0, None)
OBSERVE_BALLS: bool = True
GOAL_SIZE: float = 0.3