Introduce BanditBilliard and change SubGoalBilliard to a more normal one
This commit is contained in:
parent
0ec69ab4e2
commit
28711cee19
@ -396,10 +396,11 @@ class SubGoalBilliard(GoalRewardBilliard):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
scale: float,
|
scale: float,
|
||||||
primary_goal: Tuple[float, float] = (4.0, -2.0),
|
primary_goal: Tuple[float, float] = (2.0, -3.0),
|
||||||
subgoal: Tuple[float, float] = (4.0, 2.0),
|
subgoals: List[Tuple[float, float]] = [(-2.0, -3.0), (-2.0, 1.0), (2.0, 1.0)],
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(scale, primary_goal)
|
super().__init__(scale, primary_goal)
|
||||||
|
for subgoal in subgoals:
|
||||||
self.goals.append(
|
self.goals.append(
|
||||||
MazeGoal(
|
MazeGoal(
|
||||||
np.array(subgoal) * scale,
|
np.array(subgoal) * scale,
|
||||||
@ -410,6 +411,16 @@ class SubGoalBilliard(GoalRewardBilliard):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BanditBilliard(SubGoalBilliard):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
scale: float,
|
||||||
|
primary_goal: Tuple[float, float] = (4.0, -2.0),
|
||||||
|
subgoals: List[Tuple[float, float]] = [(4.0, 2.0)],
|
||||||
|
) -> None:
|
||||||
|
super().__init__(scale, primary_goal, subgoals)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_maze() -> List[List[MazeCell]]:
|
def create_maze() -> List[List[MazeCell]]:
|
||||||
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
E, B = MazeCell.EMPTY, MazeCell.BLOCK
|
||||||
@ -435,7 +446,12 @@ class TaskRegistry:
|
|||||||
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
||||||
"TRoom": [DistRewardTRoom, GoalRewardTRoom],
|
"TRoom": [DistRewardTRoom, GoalRewardTRoom],
|
||||||
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
|
"BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
|
||||||
"Billiard": [DistRewardBilliard, GoalRewardBilliard, SubGoalBilliard],
|
"Billiard": [
|
||||||
|
DistRewardBilliard,
|
||||||
|
GoalRewardBilliard,
|
||||||
|
SubGoalBilliard,
|
||||||
|
BanditBilliard,
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
Loading…
Reference in New Issue
Block a user