mujoco_maze/mujoco_maze/maze_task.py

"""Maze tasks that are defined by their map, termination condition, and goals.
"""

from abc import ABC, abstractmethod
from typing import Dict, List, NamedTuple, Optional, Tuple, Type

import numpy as np

from mujoco_maze.maze_env_utils import MazeCell


class Rgb(NamedTuple):
    red: float
    green: float
    blue: float

    def rgba_str(self) -> str:
        return f"{self.red} {self.green} {self.blue} 1"


RED = Rgb(0.7, 0.1, 0.1)
GREEN = Rgb(0.1, 0.7, 0.1)
BLUE = Rgb(0.1, 0.1, 0.7)


class MazeGoal:
    def __init__(
        self,
        pos: np.ndarray,
        reward_scale: float = 1.0,
        rgb: Rgb = RED,
        threshold: float = 0.6,
        custom_size: Optional[float] = None,
    ) -> None:
        assert 0.0 <= reward_scale <= 1.0
        self.pos = pos
        self.dim = pos.shape[0]
        self.reward_scale = reward_scale
        self.rgb = rgb
        self.threshold = threshold
        self.custom_size = custom_size

    def neighbor(self, obs: np.ndarray) -> float:
        return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold

    def euc_dist(self, obs: np.ndarray) -> float:
        return np.sum(np.square(obs[: self.dim] - self.pos)) ** 0.5


class Scaling(NamedTuple):
    ant: float
    point: float
    swimmer: float


class MazeTask(ABC):
    REWARD_THRESHOLD: float
    PENALTY: Optional[float] = None
    MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)
    INNER_REWARD_SCALING: float = 0.01
    # For Fall/Push/BlockMaze
    OBSERVE_BLOCKS: bool = False
    # For Billiard
    OBSERVE_BALLS: bool = False
    OBJECT_BALL_SIZE: float = 1.0
    # Unused now
    PUT_SPIN_NEAR_AGENT: bool = False
    TOP_DOWN_VIEW: bool = False

    def __init__(self, scale: float) -> None:
        self.goals = []
        self.scale = scale

    def sample_goals(self) -> bool:
        return False

    def termination(self, obs: np.ndarray) -> bool:
        for goal in self.goals:
            if goal.neighbor(obs):
                return True
        return False

    @abstractmethod
    def reward(self, obs: np.ndarray) -> float:
        pass

    @staticmethod
    @abstractmethod
    def create_maze() -> List[List[MazeCell]]:
        pass


class DistRewardMixIn:
    REWARD_THRESHOLD: float = -1000.0
    goals: List[MazeGoal]
    scale: float

    def reward(self, obs: np.ndarray) -> float:
        return -self.goals[0].euc_dist(obs) / self.scale


class GoalRewardUMaze(MazeTask):
    REWARD_THRESHOLD: float = 0.9
    PENALTY: float = -0.0001

    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array([0.0, 2.0 * scale]))]

    def reward(self, obs: np.ndarray) -> float:
        return 1.0 if self.termination(obs) else self.PENALTY

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        return [
            [B, B, B, B, B],
            [B, R, E, E, B],
            [B, B, B, E, B],
            [B, E, E, E, B],
            [B, B, B, B, B],
        ]


class DistRewardUMaze(GoalRewardUMaze, DistRewardMixIn):
    pass


class GoalRewardSimpleRoom(GoalRewardUMaze):
    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))]

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        return [
            [B, B, B, B, B],
            [B, R, E, E, B],
            [B, B, B, B, B],
        ]


class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):
    pass


class GoalRewardPush(GoalRewardUMaze):
    OBSERVE_BLOCKS: bool = True

    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array([0.0, 2.375 * scale]))]

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY_BLOCK
        return [
            [B, B, B, B, B],
            [B, E, R, B, B],
            [B, E, M, E, B],
            [B, B, E, B, B],
            [B, B, B, B, B],
        ]


class DistRewardPush(GoalRewardPush, DistRewardMixIn):
    pass


class GoalRewardFall(GoalRewardUMaze):
    OBSERVE_BLOCKS: bool = True

    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))]

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT
        M = MazeCell.YZ_BLOCK
        return [
            [B, B, B, B],
            [B, R, E, B],
            [B, E, M, B],
            [B, C, C, B],
            [B, E, E, B],
            [B, B, B, B],
        ]


class DistRewardFall(GoalRewardFall, DistRewardMixIn):
    pass


class GoalReward2Rooms(MazeTask):
    REWARD_THRESHOLD: float = 0.9
    PENALTY: float = -0.0001
    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)

    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))]

    def reward(self, obs: np.ndarray) -> float:
        for goal in self.goals:
            if goal.neighbor(obs):
                return goal.reward_scale
        return self.PENALTY

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        return [
            [B, B, B, B, B, B, B, B],
            [B, R, E, E, E, E, E, B],
            [B, E, E, E, E, E, E, B],
            [B, B, B, B, B, E, B, B],
            [B, E, E, E, E, E, E, B],
            [B, E, E, E, E, E, E, B],
            [B, B, B, B, B, B, B, B],
        ]


class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
    pass


class SubGoal2Rooms(GoalReward2Rooms):
    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))


class GoalReward4Rooms(MazeTask):
    REWARD_THRESHOLD: float = 0.9
    PENALTY: float = -0.0001
    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)

    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array([6.0 * scale, -6.0 * scale]))]

    def reward(self, obs: np.ndarray) -> float:
        for goal in self.goals:
            if goal.neighbor(obs):
                return goal.reward_scale
        return self.PENALTY

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        return [
            [B, B, B, B, B, B, B, B, B],
            [B, E, E, E, B, E, E, E, B],
            [B, E, E, E, E, E, E, E, B],
            [B, E, E, E, B, E, E, E, B],
            [B, B, E, B, B, B, E, B, B],
            [B, E, E, E, B, E, E, E, B],
            [B, E, E, E, E, E, E, E, B],
            [B, R, E, E, B, E, E, E, B],
            [B, B, B, B, B, B, B, B, B],
        ]


class DistReward4Rooms(GoalReward4Rooms, DistRewardMixIn):
    pass


class SubGoal4Rooms(GoalReward4Rooms):
    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals += [
            MazeGoal(np.array([0.0 * scale, -6.0 * scale]), 0.5, GREEN),
            MazeGoal(np.array([6.0 * scale, 0.0 * scale]), 0.5, GREEN),
        ]


class GoalRewardTRoom(MazeTask):
    REWARD_THRESHOLD: float = 0.9
    PENALTY: float = -0.0001
    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)

    def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array(goal) * scale)]

    def reward(self, obs: np.ndarray) -> float:
        for goal in self.goals:
            if goal.neighbor(obs):
                return goal.reward_scale
        return self.PENALTY

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        return [
            [B, B, B, B, B, B, B],
            [B, E, E, B, E, E, B],
            [B, E, E, B, E, E, B],
            [B, E, B, B, B, E, B],
            [B, E, E, R, E, E, B],
            [B, B, B, B, B, B, B],
        ]


class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):
    pass


class GoalRewardBlockMaze(GoalRewardUMaze):
    OBSERVE_BLOCKS: bool = True

    def __init__(self, scale: float) -> None:
        super().__init__(scale)
        self.goals = [MazeGoal(np.array([0.0, 3.0 * scale]))]

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT
        M = MazeCell.XY_BLOCK
        return [
            [B, B, B, B, B],
            [B, R, E, E, B],
            [B, B, B, M, B],
            [B, E, E, E, B],
            [B, E, E, E, B],
            [B, B, B, B, B],
        ]


class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):
    pass


class GoalRewardBilliard(MazeTask):
    REWARD_THRESHOLD: float = 0.9
    PENALTY: float = -0.0001
    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
    OBSERVE_BALLS: bool = True
    GOAL_SIZE: float = 0.3

    def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
        super().__init__(scale)
        goal = np.array(goal) * scale
        self.goals.append(
            MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
        )

    def _threshold(self) -> float:
        return self.OBJECT_BALL_SIZE + self.GOAL_SIZE

    def reward(self, obs: np.ndarray) -> float:
        object_pos = obs[3:6]
        for goal in self.goals:
            if goal.neighbor(object_pos):
                return goal.reward_scale
        return self.PENALTY

    def termination(self, obs: np.ndarray) -> bool:
        object_pos = obs[3:6]
        for goal in self.goals:
            if goal.neighbor(object_pos):
                return True
        return False

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B = MazeCell.EMPTY, MazeCell.BLOCK
        R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
        return [
            [B, B, B, B, B, B, B],
            [B, E, E, E, E, E, B],
            [B, E, E, E, E, E, B],
            [B, E, E, M, E, E, B],
            [B, E, E, R, E, E, B],
            [B, B, B, B, B, B, B],
        ]


class DistRewardBilliard(GoalRewardBilliard):
    def reward(self, obs: np.ndarray) -> float:
        return -self.goals[0].euc_dist(obs[3:6]) / self.scale


class SubGoalBilliard(GoalRewardBilliard):
    def __init__(
        self,
        scale: float,
        primary_goal: Tuple[float, float] = (2.0, -3.0),
        subgoal: Tuple[float, float] = (-2.0, -3.0),
    ) -> None:
        super().__init__(scale, primary_goal)
        self.goals.append(
            MazeGoal(
                np.array(subgoal) * scale,
                reward_scale=0.5,
                rgb=GREEN,
                threshold=self._threshold(),
                custom_size=self.GOAL_SIZE,
            )
        )

    @staticmethod
    def create_maze() -> List[List[MazeCell]]:
        E, B = MazeCell.EMPTY, MazeCell.BLOCK
        R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
        return [
            [B, B, B, B, B, B, B],
            [B, E, E, E, E, E, B],
            [B, E, E, E, B, B, B],
            [B, E, E, M, E, E, B],
            [B, E, E, R, E, E, B],
            [B, B, B, B, B, B, B],
        ]


class TaskRegistry:
    REGISTRY: Dict[str, List[Type[MazeTask]]] = {
        "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
        "UMaze": [DistRewardUMaze, GoalRewardUMaze],
        "Push": [DistRewardPush, GoalRewardPush],
        "Fall": [DistRewardFall, GoalRewardFall],
        "2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
        "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
        "TRoom": [DistRewardTRoom, GoalRewardTRoom],
        "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
        "Billiard": [DistRewardBilliard, GoalRewardBilliard, SubGoalBilliard],
    }

    @staticmethod
    def keys() -> List[str]:
        return list(TaskRegistry.REGISTRY.keys())

    @staticmethod
    def tasks(key: str) -> List[Type[MazeTask]]:
        return TaskRegistry.REGISTRY[key]
Remove tensorflow headers and Add doc comments 2020-07-01 07:12:06 +02:00			`"""Maze tasks that are defined by their map, termination condition, and goals.`
			`"""`

Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`from abc import ABC, abstractmethod`
Block Maze 2020-09-26 11:37:20 +02:00			`from typing import Dict, List, NamedTuple, Optional, Tuple, Type`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
			`import numpy as np`

			`from mujoco_maze.maze_env_utils import MazeCell`

Implement 4Rooms 2020-06-22 18:13:05 +02:00
Fix gym warning 2020-06-30 09:33:07 +02:00			`class Rgb(NamedTuple):`
			`red: float`
			`green: float`
			`blue: float`

Add Billiard-v2 2020-09-29 10:52:46 +02:00			`def rgba_str(self) -> str:`
			`return f"{self.red} {self.green} {self.blue} 1"`

Fix gym warning 2020-06-30 09:33:07 +02:00
			`RED = Rgb(0.7, 0.1, 0.1)`
			`GREEN = Rgb(0.1, 0.7, 0.1)`
			`BLUE = Rgb(0.1, 0.1, 0.7)`
Implement 4Rooms 2020-06-22 18:13:05 +02:00
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
			`class MazeGoal:`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def __init__(`
Rolling 2020-09-27 06:33:14 +02:00			`self,`
			`pos: np.ndarray,`
			`reward_scale: float = 1.0,`
			`rgb: Rgb = RED,`
			`threshold: float = 0.6,`
Add manuall collision detection for billiard 2020-09-28 13:05:08 +02:00			`custom_size: Optional[float] = None,`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`) -> None:`
			`assert 0.0 <= reward_scale <= 1.0`
			`self.pos = pos`
			`self.dim = pos.shape[0]`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`self.reward_scale = reward_scale`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`self.rgb = rgb`
Rolling 2020-09-27 06:33:14 +02:00			`self.threshold = threshold`
			`self.custom_size = custom_size`
Implement 4Rooms 2020-06-22 18:13:05 +02:00
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`def neighbor(self, obs: np.ndarray) -> float:`
Rolling 2020-09-27 06:33:14 +02:00			`return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
			`def euc_dist(self, obs: np.ndarray) -> float:`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`return np.sum(np.square(obs[: self.dim] - self.pos)) ** 0.5`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00

Make some configurations class attributes 2020-06-30 15:42:22 +02:00			`class Scaling(NamedTuple):`
			`ant: float`
			`point: float`
Swimmer 2020-09-24 16:40:33 +02:00			`swimmer: float`
Make some configurations class attributes 2020-06-30 15:42:22 +02:00

Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`class MazeTask(ABC):`
			`REWARD_THRESHOLD: float`
Block Maze 2020-09-26 11:37:20 +02:00			`PENALTY: Optional[float] = None`
Swimmer 2020-09-24 16:40:33 +02:00			`MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0)`
Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`INNER_REWARD_SCALING: float = 0.01`
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`# For Fall/Push/BlockMaze`
Make some configurations class attributes 2020-06-30 15:42:22 +02:00			`OBSERVE_BLOCKS: bool = False`
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`# For Billiard`
Rolling 2020-09-27 06:33:14 +02:00			`OBSERVE_BALLS: bool = False`
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`OBJECT_BALL_SIZE: float = 1.0`
			`# Unused now`
Make some configurations class attributes 2020-06-30 15:42:22 +02:00			`PUT_SPIN_NEAR_AGENT: bool = False`
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`TOP_DOWN_VIEW: bool = False`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def __init__(self, scale: float) -> None:`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`self.goals = []`
Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`self.scale = scale`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def sample_goals(self) -> bool:`
			`return False`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def termination(self, obs: np.ndarray) -> bool:`
			`for goal in self.goals:`
			`if goal.neighbor(obs):`
			`return True`
			`return False`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
			`@abstractmethod`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def reward(self, obs: np.ndarray) -> float:`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`pass`

			`@staticmethod`
			`@abstractmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`pass`


Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class DistRewardMixIn:`
			`REWARD_THRESHOLD: float = -1000.0`
			`goals: List[MazeGoal]`
			`scale: float`

			`def reward(self, obs: np.ndarray) -> float:`
			`return -self.goals[0].euc_dist(obs) / self.scale`


Block Maze 2020-09-26 11:37:20 +02:00			`class GoalRewardUMaze(MazeTask):`
Swimmer 2020-09-24 16:40:33 +02:00			`REWARD_THRESHOLD: float = 0.9`
Block Maze 2020-09-26 11:37:20 +02:00			`PENALTY: float = -0.0001`
Swimmer 2020-09-24 16:40:33 +02:00
			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
Block Maze 2020-09-26 11:37:20 +02:00			`self.goals = [MazeGoal(np.array([0.0, 2.0 * scale]))]`
Swimmer 2020-09-24 16:40:33 +02:00
			`def reward(self, obs: np.ndarray) -> float:`
Block Maze 2020-09-26 11:37:20 +02:00			`return 1.0 if self.termination(obs) else self.PENALTY`
Swimmer 2020-09-24 16:40:33 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT`
			`return [`
			`[B, B, B, B, B],`
			`[B, R, E, E, B],`
Block Maze 2020-09-26 11:37:20 +02:00			`[B, B, B, E, B],`
			`[B, E, E, E, B],`
Swimmer 2020-09-24 16:40:33 +02:00			`[B, B, B, B, B],`
			`]`


Block Maze 2020-09-26 11:37:20 +02:00			`class DistRewardUMaze(GoalRewardUMaze, DistRewardMixIn):`
Swimmer 2020-09-24 16:40:33 +02:00			`pass`


Block Maze 2020-09-26 11:37:20 +02:00			`class GoalRewardSimpleRoom(GoalRewardUMaze):`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
Block Maze 2020-09-26 11:37:20 +02:00			`self.goals = [MazeGoal(np.array([2.0 * scale, 0.0]))]`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT`
			`return [`
			`[B, B, B, B, B],`
			`[B, R, E, E, B],`
			`[B, B, B, B, B],`
			`]`


Block Maze 2020-09-26 11:37:20 +02:00			`class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn):`
Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`pass`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00

Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class GoalRewardPush(GoalRewardUMaze):`
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`OBSERVE_BLOCKS: bool = True`
Use top_down_view in Push and Fall 2020-09-21 06:27:41 +02:00
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
			`self.goals = [MazeGoal(np.array([0.0, 2.375 * scale]))]`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
Rolling 2020-09-27 06:33:14 +02:00			`E, B, R, M = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT, MazeCell.XY_BLOCK`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`return [`
			`[B, B, B, B, B],`
			`[B, E, R, B, B],`
Rolling 2020-09-27 06:33:14 +02:00			`[B, E, M, E, B],`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`[B, B, E, B, B],`
			`[B, B, B, B, B],`
			`]`


Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class DistRewardPush(GoalRewardPush, DistRewardMixIn):`
			`pass`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00

Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class GoalRewardFall(GoalRewardUMaze):`
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`OBSERVE_BLOCKS: bool = True`
Use top_down_view in Push and Fall 2020-09-21 06:27:41 +02:00
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
			`self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))]`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B, C, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.CHASM, MazeCell.ROBOT`
Rolling 2020-09-27 06:33:14 +02:00			`M = MazeCell.YZ_BLOCK`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`return [`
			`[B, B, B, B],`
			`[B, R, E, B],`
Rolling 2020-09-27 06:33:14 +02:00			`[B, E, M, B],`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`[B, C, C, B],`
			`[B, E, E, B],`
			`[B, B, B, B],`
			`]`


Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class DistRewardFall(GoalRewardFall, DistRewardMixIn):`
			`pass`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00

Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class GoalReward2Rooms(MazeTask):`
Add 2Rooms 2020-06-29 18:38:02 +02:00			`REWARD_THRESHOLD: float = 0.9`
Block Maze 2020-09-26 11:37:20 +02:00			`PENALTY: float = -0.0001`
Swimmer 2020-09-24 16:40:33 +02:00			`MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)`
Add 2Rooms 2020-06-29 18:38:02 +02:00
			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
			`self.goals = [MazeGoal(np.array([0.0, 4.0 * scale]))]`

			`def reward(self, obs: np.ndarray) -> float:`
Fix 2Rooms Reward 2020-06-30 06:17:11 +02:00			`for goal in self.goals:`
			`if goal.neighbor(obs):`
			`return goal.reward_scale`
Block Maze 2020-09-26 11:37:20 +02:00			`return self.PENALTY`
Add 2Rooms 2020-06-29 18:38:02 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT`
			`return [`
			`[B, B, B, B, B, B, B, B],`
			`[B, R, E, E, E, E, E, B],`
			`[B, E, E, E, E, E, E, B],`
			`[B, B, B, B, B, E, B, B],`
			`[B, E, E, E, E, E, E, B],`
			`[B, E, E, E, E, E, E, B],`
			`[B, B, B, B, B, B, B, B],`
			`]`


Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):`
			`pass`
Add 2Rooms 2020-06-29 18:38:02 +02:00

Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class SubGoal2Rooms(GoalReward2Rooms):`
Add 2Rooms 2020-06-29 18:38:02 +02:00			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
			`self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))`


Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class GoalReward4Rooms(MazeTask):`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`REWARD_THRESHOLD: float = 0.9`
Block Maze 2020-09-26 11:37:20 +02:00			`PENALTY: float = -0.0001`
Swimmer 2020-09-24 16:40:33 +02:00			`MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)`
Implement 4Rooms 2020-06-22 18:13:05 +02:00
			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
[Four Rooms] Change the start position 2020-09-07 10:44:57 +02:00			`self.goals = [MazeGoal(np.array([6.0 * scale, -6.0 * scale]))]`
Implement 4Rooms 2020-06-22 18:13:05 +02:00
			`def reward(self, obs: np.ndarray) -> float:`
			`for goal in self.goals:`
			`if goal.neighbor(obs):`
			`return goal.reward_scale`
Block Maze 2020-09-26 11:37:20 +02:00			`return self.PENALTY`
Implement 4Rooms 2020-06-22 18:13:05 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT`
			`return [`
			`[B, B, B, B, B, B, B, B, B],`
[Four Rooms] Change the start position 2020-09-07 10:44:57 +02:00			`[B, E, E, E, B, E, E, E, B],`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`[B, E, E, E, E, E, E, E, B],`
			`[B, E, E, E, B, E, E, E, B],`
			`[B, B, E, B, B, B, E, B, B],`
			`[B, E, E, E, B, E, E, E, B],`
			`[B, E, E, E, E, E, E, E, B],`
[Four Rooms] Change the start position 2020-09-07 10:44:57 +02:00			`[B, R, E, E, B, E, E, E, B],`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`[B, B, B, B, B, B, B, B, B],`
			`]`


Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class DistReward4Rooms(GoalReward4Rooms, DistRewardMixIn):`
			`pass`
Add 2Rooms 2020-06-29 18:38:02 +02:00

Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`class SubGoal4Rooms(GoalReward4Rooms):`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
Add 2Rooms 2020-06-29 18:38:02 +02:00			`self.goals += [`
[Four Rooms] Change the start position 2020-09-07 10:44:57 +02:00			`MazeGoal(np.array([0.0 * scale, -6.0 * scale]), 0.5, GREEN),`
Implement 4Rooms 2020-06-22 18:13:05 +02:00			`MazeGoal(np.array([6.0 * scale, 0.0 * scale]), 0.5, GREEN),`
			`]`


Add TRoom 2020-09-16 18:27:38 +02:00			`class GoalRewardTRoom(MazeTask):`
			`REWARD_THRESHOLD: float = 0.9`
Block Maze 2020-09-26 11:37:20 +02:00			`PENALTY: float = -0.0001`
Swimmer 2020-09-24 16:40:33 +02:00			`MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0)`
Add TRoom 2020-09-16 18:27:38 +02:00
Rolling 2020-09-27 06:33:14 +02:00			`def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:`
Add TRoom 2020-09-16 18:27:38 +02:00			`super().__init__(scale)`
Rolling 2020-09-27 06:33:14 +02:00			`self.goals = [MazeGoal(np.array(goal) * scale)]`
Add TRoom 2020-09-16 18:27:38 +02:00
			`def reward(self, obs: np.ndarray) -> float:`
			`for goal in self.goals:`
			`if goal.neighbor(obs):`
			`return goal.reward_scale`
Block Maze 2020-09-26 11:37:20 +02:00			`return self.PENALTY`
Add TRoom 2020-09-16 18:27:38 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT`
			`return [`
			`[B, B, B, B, B, B, B],`
			`[B, E, E, B, E, E, B],`
			`[B, E, E, B, E, E, B],`
			`[B, E, B, B, B, E, B],`
			`[B, E, E, R, E, E, B],`
			`[B, B, B, B, B, B, B],`
			`]`


			`class DistRewardTRoom(GoalRewardTRoom, DistRewardMixIn):`
			`pass`


Block Maze 2020-09-26 11:37:20 +02:00			`class GoalRewardBlockMaze(GoalRewardUMaze):`
			`OBSERVE_BLOCKS: bool = True`

			`def __init__(self, scale: float) -> None:`
			`super().__init__(scale)`
			`self.goals = [MazeGoal(np.array([0.0, 3.0 * scale]))]`

			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
Rolling 2020-09-27 06:33:14 +02:00			`E, B, R = MazeCell.EMPTY, MazeCell.BLOCK, MazeCell.ROBOT`
			`M = MazeCell.XY_BLOCK`
Block Maze 2020-09-26 11:37:20 +02:00			`return [`
			`[B, B, B, B, B],`
			`[B, R, E, E, B],`
			`[B, B, B, M, B],`
			`[B, E, E, E, B],`
			`[B, E, E, E, B],`
			`[B, B, B, B, B],`
			`]`


			`class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn):`
			`pass`


Add manuall collision detection for billiard 2020-09-28 13:05:08 +02:00			`class GoalRewardBilliard(MazeTask):`
Rolling 2020-09-27 06:33:14 +02:00			`REWARD_THRESHOLD: float = 0.9`
			`PENALTY: float = -0.0001`
			`MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)`
			`OBSERVE_BALLS: bool = True`
Add Billiard-v2 2020-09-29 10:52:46 +02:00			`GOAL_SIZE: float = 0.3`
Rolling 2020-09-27 06:33:14 +02:00
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:`
Rolling 2020-09-27 06:33:14 +02:00			`super().__init__(scale)`
			`goal = np.array(goal) * scale`
Add Billiard-v2 2020-09-29 10:52:46 +02:00			`self.goals.append(`
			`MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)`
			`)`

			`def _threshold(self) -> float:`
			`return self.OBJECT_BALL_SIZE + self.GOAL_SIZE`
Rolling 2020-09-27 06:33:14 +02:00
			`def reward(self, obs: np.ndarray) -> float:`
Add Billiard-v2 2020-09-29 10:52:46 +02:00			`object_pos = obs[3:6]`
			`for goal in self.goals:`
			`if goal.neighbor(object_pos):`
			`return goal.reward_scale`
			`return self.PENALTY`
Rolling 2020-09-27 06:33:14 +02:00
			`def termination(self, obs: np.ndarray) -> bool:`
Add Billiard-v2 2020-09-29 10:52:46 +02:00			`object_pos = obs[3:6]`
			`for goal in self.goals:`
			`if goal.neighbor(object_pos):`
			`return True`
			`return False`
Rolling 2020-09-27 06:33:14 +02:00
			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B = MazeCell.EMPTY, MazeCell.BLOCK`
			`R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL`
			`return [`
Make the ball size configurable at task level 2020-09-28 17:47:19 +02:00			`[B, B, B, B, B, B, B],`
			`[B, E, E, E, E, E, B],`
			`[B, E, E, E, E, E, B],`
			`[B, E, E, M, E, E, B],`
			`[B, E, E, R, E, E, B],`
			`[B, B, B, B, B, B, B],`
Rolling 2020-09-27 06:33:14 +02:00			`]`


Add manuall collision detection for billiard 2020-09-28 13:05:08 +02:00			`class DistRewardBilliard(GoalRewardBilliard):`
Rolling 2020-09-27 06:33:14 +02:00			`def reward(self, obs: np.ndarray) -> float:`
			`return -self.goals[0].euc_dist(obs[3:6]) / self.scale`


Add Billiard-v2 2020-09-29 10:52:46 +02:00			`class SubGoalBilliard(GoalRewardBilliard):`
			`def __init__(`
			`self,`
			`scale: float,`
			`primary_goal: Tuple[float, float] = (2.0, -3.0),`
			`subgoal: Tuple[float, float] = (-2.0, -3.0),`
			`) -> None:`
			`super().__init__(scale, primary_goal)`
			`self.goals.append(`
			`MazeGoal(`
			`np.array(subgoal) * scale,`
			`reward_scale=0.5,`
			`rgb=GREEN,`
			`threshold=self._threshold(),`
			`custom_size=self.GOAL_SIZE,`
			`)`
			`)`

			`@staticmethod`
			`def create_maze() -> List[List[MazeCell]]:`
			`E, B = MazeCell.EMPTY, MazeCell.BLOCK`
			`R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL`
			`return [`
			`[B, B, B, B, B, B, B],`
			`[B, E, E, E, E, E, B],`
			`[B, E, E, E, B, B, B],`
			`[B, E, E, M, E, E, B],`
			`[B, E, E, R, E, E, B],`
			`[B, B, B, B, B, B, B],`
			`]`


Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`class TaskRegistry:`
			`REGISTRY: Dict[str, List[Type[MazeTask]]] = {`
Swimmer 2020-09-24 16:40:33 +02:00			`"SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],`
Use DistReward/GoalReward instead of Dense/Sparse 2020-07-05 17:52:28 +02:00			`"UMaze": [DistRewardUMaze, GoalRewardUMaze],`
			`"Push": [DistRewardPush, GoalRewardPush],`
			`"Fall": [DistRewardFall, GoalRewardFall],`
			`"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],`
			`"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],`
Add TRoom 2020-09-16 18:27:38 +02:00			`"TRoom": [DistRewardTRoom, GoalRewardTRoom],`
Block Maze 2020-09-26 11:37:20 +02:00			`"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],`
Add Billiard-v2 2020-09-29 10:52:46 +02:00			`"Billiard": [DistRewardBilliard, GoalRewardBilliard, SubGoalBilliard],`
Introduce MazeTask for customizability 2020-06-16 06:47:40 +02:00			`}`
Add 2Rooms 2020-06-29 18:38:02 +02:00
			`@staticmethod`
			`def keys() -> List[str]:`
			`return list(TaskRegistry.REGISTRY.keys())`

			`@staticmethod`
			`def tasks(key: str) -> List[Type[MazeTask]]:`
			`return TaskRegistry.REGISTRY[key]`