From 72fa14d786462e95c5f9b5ba653a16e714f65826 Mon Sep 17 00:00:00 2001 From: kngwyu Date: Sun, 11 Apr 2021 18:37:10 +0900 Subject: [PATCH] [Experimental] NoRewardRoom-v0 --- mujoco_maze/maze_task.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index e953cb1..828e140 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -329,6 +329,29 @@ class SubGoalTRoom(GoalRewardTRoom): ) +class NoRewardRoom(MazeTask): + REWARD_THRESHOLD: float = 0.0 + MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) + + def reward(self, obs: np.ndarray) -> float: + return 0.0 + + @staticmethod + def create_maze() -> List[List[MazeCell]]: + E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT + return [ + [B, B, B, B, B, B, B, B, B], + [B, E, E, B, E, E, E, E, B], + [B, E, E, B, E, E, E, E, B], + [B, E, E, E, E, E, B, B, B], + [B, E, E, E, R, E, E, E, B], + [B, B, B, E, E, E, E, E, B], + [B, E, E, E, E, B, E, E, B], + [B, E, E, E, E, B, E, E, B], + [B, B, B, B, B, B, B, B, B], + ] + + class GoalRewardBlockMaze(GoalRewardUMaze): MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None) OBSERVE_BLOCKS: bool = True @@ -460,6 +483,7 @@ class TaskRegistry: "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], "TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom], "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], + "NoRewardRoom": [NoRewardRoom], "Billiard": [ DistRewardBilliard, GoalRewardBilliard,