diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index 8fcaba2..52aed2d 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -230,6 +230,36 @@ class NoRewardMultiPush(GoalRewardMultiPush): return 0.0 +class GoalRewardPushMaze(GoalRewardUMaze): + OBSERVE_BLOCKS: bool = True + MAZE_SIZE_SCALING: Scaling = Scaling(ant=2.0, point=6.0, swimmer=None) + + def __init__(self, scale: float, goal: Tuple[float, float] = (3.0, 0.0)) -> None: + super().__init__(scale) + self.goals = [MazeGoal(np.array(goal) * scale)] + + @staticmethod + def create_maze() -> List[List[MazeCell]]: + E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY_BLOCK + return [ + [B, B, B, B, B, B, B], + [B, E, E, R, M, E, B], + [B, B, B, B, E, B, B], + [B, E, M, E, M, B, B], + [B, B, E, B, E, B, B], + [B, B, B, B, B, B, B], + ] + + +class DistRewardPushMaze(GoalRewardPushMaze, DistRewardMixIn): + pass + + +class NoRewardPushMaze(GoalRewardPushMaze): + def reward(self, _obs: np.ndarray) -> float: + return 0.0 + + class GoalRewardFall(GoalRewardUMaze): OBSERVE_BLOCKS: bool = True @@ -712,6 +742,7 @@ class TaskRegistry: "UMaze": [DistRewardUMaze, GoalRewardUMaze], "Push": [DistRewardPush, GoalRewardPush], "MultiPush": [DistRewardMultiPush, GoalRewardMultiPush, NoRewardMultiPush], + "PushMaze": [DistRewardPushMaze, GoalRewardPushMaze, NoRewardPushMaze], "Fall": [DistRewardFall, GoalRewardFall], "MultiFall": [DistRewardMultiFall, GoalRewardMultiFall, NoRewardMultiFall], "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],