22 lines
690 B
Python
22 lines
690 B
Python
class AlrReward:
|
|
"""
|
|
A base class for non-Markovian reward functions which may need trajectory information to calculate an episodic
|
|
reward. Call the methods in reset() and step() of the environment.
|
|
"""
|
|
|
|
# methods to override:
|
|
# ----------------------------
|
|
def reset(self, *args, **kwargs):
|
|
"""
|
|
Reset the reward function, empty state buffers before an episode, set contexts that influence reward, etc.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def compute_reward(self, *args, **kwargs):
|
|
"""
|
|
|
|
Returns: Useful things to return are reward values, success flags or crash flags
|
|
|
|
"""
|
|
raise NotImplementedError
|