diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index 09666c3..310c13b 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -329,7 +329,7 @@ class SubGoalTRoom(GoalRewardTRoom): ) -class NoRewardRoom(MazeTask): +class NoRewardCorridor(MazeTask): REWARD_THRESHOLD: float = 0.0 MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 1.0) @@ -352,6 +352,25 @@ class NoRewardRoom(MazeTask): ] +class GoalRewardCorridor(NoRewardCorridor): + REWARD_THRESHOLD: float = 0.9 + PENALTY: float = -0.0001 + + def __init__(self, scale: float, goal: Tuple[float, float] = (3.0, -3.0)) -> None: + super().__init__(scale) + self.goals.append(MazeGoal(np.array(goal) * scale)) + + def reward(self, obs: np.ndarray) -> float: + for goal in self.goals: + if goal.neighbor(obs): + return goal.reward_scale + return self.PENALTY + + +class DistRewardCorridor(GoalRewardCorridor, DistRewardMixIn): + pass + + class GoalRewardBlockMaze(GoalRewardUMaze): MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, None) OBSERVE_BLOCKS: bool = True @@ -483,7 +502,7 @@ class TaskRegistry: "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], "TRoom": [DistRewardTRoom, GoalRewardTRoom, SubGoalTRoom], "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], - "NoRewardRoom": [NoRewardRoom], + "Corridor": [DistRewardCorridor, GoalRewardCorridor, NoRewardCorridor], "Billiard": [ DistRewardBilliard, GoalRewardBilliard,