Use DistReward/GoalReward instead of Dense/Sparse
This commit is contained in:
parent
cb9dcc554e
commit
d3855607a0
@ -51,13 +51,13 @@ class Scaling(NamedTuple):
|
|||||||
class MazeTask(ABC):
|
class MazeTask(ABC):
|
||||||
REWARD_THRESHOLD: float
|
REWARD_THRESHOLD: float
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0)
|
||||||
INNER_REWARD_SCALING: float = 0.0
|
INNER_REWARD_SCALING: float = 0.01
|
||||||
OBSERVE_BLOCKS: bool = False
|
OBSERVE_BLOCKS: bool = False
|
||||||
PUT_SPIN_NEAR_AGENT: bool = False
|
PUT_SPIN_NEAR_AGENT: bool = False
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
self.scale = scale
|
|
||||||
self.goals = []
|
self.goals = []
|
||||||
|
self.scale = scale
|
||||||
|
|
||||||
def sample_goals(self) -> bool:
|
def sample_goals(self) -> bool:
|
||||||
return False
|
return False
|
||||||
@ -78,7 +78,16 @@ class MazeTask(ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalSparseUMaze(MazeTask):
|
class DistRewardMixIn:
|
||||||
|
REWARD_THRESHOLD: float = -1000.0
|
||||||
|
goals: List[MazeGoal]
|
||||||
|
scale: float
|
||||||
|
|
||||||
|
def reward(self, obs: np.ndarray) -> float:
|
||||||
|
return -self.goals[0].euc_dist(obs) / self.scale
|
||||||
|
|
||||||
|
|
||||||
|
class GoalRewardUMaze(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
|
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
@ -100,14 +109,11 @@ class SingleGoalSparseUMaze(MazeTask):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalDenseUMaze(SingleGoalSparseUMaze):
|
class DistRewardUMaze(GoalRewardUMaze, DistRewardMixIn):
|
||||||
REWARD_THRESHOLD: float = 1000.0
|
pass
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
|
||||||
return -self.goals[0].euc_dist(obs)
|
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalSparsePush(SingleGoalSparseUMaze):
|
class GoalRewardPush(GoalRewardUMaze):
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
self.goals = [MazeGoal(np.array([0.0, 2.375 * scale]))]
|
self.goals = [MazeGoal(np.array([0.0, 2.375 * scale]))]
|
||||||
@ -124,14 +130,11 @@ class SingleGoalSparsePush(SingleGoalSparseUMaze):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalDensePush(SingleGoalSparsePush):
|
class DistRewardPush(GoalRewardPush, DistRewardMixIn):
|
||||||
REWARD_THRESHOLD: float = 1000.0
|
pass
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
|
||||||
return -self.goals[0].euc_dist(obs)
|
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalSparseFall(SingleGoalSparseUMaze):
|
class GoalRewardFall(GoalRewardUMaze):
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))]
|
self.goals = [MazeGoal(np.array([0.0, 3.375 * scale, 4.5]))]
|
||||||
@ -149,14 +152,11 @@ class SingleGoalSparseFall(SingleGoalSparseUMaze):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalDenseFall(SingleGoalSparseFall):
|
class DistRewardFall(GoalRewardFall, DistRewardMixIn):
|
||||||
REWARD_THRESHOLD: float = 1000.0
|
pass
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
|
||||||
return -self.goals[0].euc_dist(obs)
|
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalSparse2Rooms(MazeTask):
|
class GoalReward2Rooms(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
|
||||||
|
|
||||||
@ -184,20 +184,17 @@ class SingleGoalSparse2Rooms(MazeTask):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalDense2Rooms(SingleGoalSparse2Rooms):
|
class DistReward2Rooms(GoalReward2Rooms, DistRewardMixIn):
|
||||||
REWARD_THRESHOLD: float = 1000.0
|
pass
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
|
||||||
return -self.goals[0].euc_dist(obs)
|
|
||||||
|
|
||||||
|
|
||||||
class SubGoalSparse2Rooms(SingleGoalSparse2Rooms):
|
class SubGoal2Rooms(GoalReward2Rooms):
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))
|
self.goals.append(MazeGoal(np.array([5.0 * scale, 0.0 * scale]), 0.5, GREEN))
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalSparse4Rooms(MazeTask):
|
class GoalReward4Rooms(MazeTask):
|
||||||
REWARD_THRESHOLD: float = 0.9
|
REWARD_THRESHOLD: float = 0.9
|
||||||
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
|
MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0)
|
||||||
|
|
||||||
@ -227,14 +224,11 @@ class SingleGoalSparse4Rooms(MazeTask):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class SingleGoalDense4Rooms(SingleGoalSparse4Rooms):
|
class DistReward4Rooms(GoalReward4Rooms, DistRewardMixIn):
|
||||||
REWARD_THRESHOLD: float = 1000.0
|
pass
|
||||||
|
|
||||||
def reward(self, obs: np.ndarray) -> float:
|
|
||||||
return -self.goals[0].euc_dist(obs)
|
|
||||||
|
|
||||||
|
|
||||||
class SubGoalSparse4Rooms(SingleGoalSparse4Rooms):
|
class SubGoal4Rooms(GoalReward4Rooms):
|
||||||
def __init__(self, scale: float) -> None:
|
def __init__(self, scale: float) -> None:
|
||||||
super().__init__(scale)
|
super().__init__(scale)
|
||||||
self.goals += [
|
self.goals += [
|
||||||
@ -245,11 +239,11 @@ class SubGoalSparse4Rooms(SingleGoalSparse4Rooms):
|
|||||||
|
|
||||||
class TaskRegistry:
|
class TaskRegistry:
|
||||||
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
REGISTRY: Dict[str, List[Type[MazeTask]]] = {
|
||||||
"UMaze": [SingleGoalDenseUMaze, SingleGoalSparseUMaze],
|
"UMaze": [DistRewardUMaze, GoalRewardUMaze],
|
||||||
"Push": [SingleGoalDensePush, SingleGoalSparsePush],
|
"Push": [DistRewardPush, GoalRewardPush],
|
||||||
"Fall": [SingleGoalDenseFall, SingleGoalSparseFall],
|
"Fall": [DistRewardFall, GoalRewardFall],
|
||||||
"2Rooms": [SingleGoalDense2Rooms, SingleGoalSparse2Rooms, SubGoalSparse2Rooms],
|
"2Rooms": [DistReward2Rooms, GoalReward2Rooms, SubGoal2Rooms],
|
||||||
"4Rooms": [SingleGoalSparse4Rooms, SingleGoalDense4Rooms, SubGoalSparse4Rooms],
|
"4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
Loading…
Reference in New Issue
Block a user