Allowed weighted objectives
This commit is contained in:
parent
dc8bafbbfe
commit
d580f77fce
@ -112,7 +112,7 @@ from nucon.rl import NuconEnv, Parameterized_Objectives
|
|||||||
|
|
||||||
env = NuconEnv(objectives=['max_power'], seconds_per_step=5)
|
env = NuconEnv(objectives=['max_power'], seconds_per_step=5)
|
||||||
# env2 = gym.make('Nucon-max_power-v0')
|
# env2 = gym.make('Nucon-max_power-v0')
|
||||||
# env3 = NuconEnv(objectives=[Parameterized_Objectives['target_temperature'](goal_temp=600)], seconds_per_step=5)
|
# env3 = NuconEnv(objectives=[Parameterized_Objectives['target_temperature'](goal_temp=600)], objective_weights=[1.0], seconds_per_step=5)
|
||||||
|
|
||||||
obs, info = env.reset()
|
obs, info = env.reset()
|
||||||
for _ in range(1000):
|
for _ in range(1000):
|
||||||
@ -124,7 +124,7 @@ for _ in range(1000):
|
|||||||
env.close()
|
env.close()
|
||||||
```
|
```
|
||||||
|
|
||||||
Objectives takes either strings of the name of predefined objectives, or lambda functions which take an observation and return a scalar reward. Final rewards are summed across all objectives. `info['objectives']` contains all objectives and their values.
|
Objectives takes either strings of the name of predefined objectives, or lambda functions which take an observation and return a scalar reward. Final rewards are (weighted) summed across all objectives. `info['objectives']` contains all objectives and their values.
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
|
17
nucon/rl.py
17
nucon/rl.py
@ -7,8 +7,6 @@ from .core import Nucon, BreakerStatus, PumpStatus, PumpDryStatus, PumpOverloadS
|
|||||||
|
|
||||||
Objectives = {
|
Objectives = {
|
||||||
"null": lambda obs: 0,
|
"null": lambda obs: 0,
|
||||||
"coeff": lambda obj, coeff: lambda obs: obj(obs) * coeff,
|
|
||||||
|
|
||||||
"max_power": lambda obs: obs["GENERATOR_0_KW"] + obs["GENERATOR_1_KW"] + obs["GENERATOR_2_KW"],
|
"max_power": lambda obs: obs["GENERATOR_0_KW"] + obs["GENERATOR_1_KW"] + obs["GENERATOR_2_KW"],
|
||||||
"episode_time": lambda obs: obs["EPISODE_TIME"],
|
"episode_time": lambda obs: obs["EPISODE_TIME"],
|
||||||
}
|
}
|
||||||
@ -20,11 +18,14 @@ Parameterized_Objectives = {
|
|||||||
class NuconEnv(gym.Env):
|
class NuconEnv(gym.Env):
|
||||||
metadata = {'render_modes': ['human']}
|
metadata = {'render_modes': ['human']}
|
||||||
|
|
||||||
def __init__(self, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], terminate_above=0):
|
def __init__(self, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], objective_weights=None, terminate_above=0):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.render_mode = render_mode
|
self.render_mode = render_mode
|
||||||
self.seconds_per_step = seconds_per_step
|
self.seconds_per_step = seconds_per_step
|
||||||
|
if objective_weights is None:
|
||||||
|
objective_weights = [1.0 for objective in objectives]
|
||||||
|
self.objective_weights = objective_weights
|
||||||
self.terminate_at = terminate_at
|
self.terminate_at = terminate_at
|
||||||
|
|
||||||
# Define observation space
|
# Define observation space
|
||||||
@ -92,9 +93,11 @@ class NuconEnv(gym.Env):
|
|||||||
return obs
|
return obs
|
||||||
|
|
||||||
def _get_info(self):
|
def _get_info(self):
|
||||||
info = {'objectives': {}}
|
info = {'objectives': {}, 'objectives_weighted': {}}
|
||||||
for objective in self.objectives:
|
for objective, weight in zip(self.objectives, self.objective_weights):
|
||||||
info['objectives'][objective.__name__] = objective(self._get_obs())
|
obj = objective(self._get_obs())
|
||||||
|
info['objectives'][objective.__name__] = obj
|
||||||
|
info['objectives_weighted'][objective.__name__] = obj * weight
|
||||||
return info
|
return info
|
||||||
|
|
||||||
def reset(self, seed=None, options=None):
|
def reset(self, seed=None, options=None):
|
||||||
@ -120,7 +123,7 @@ class NuconEnv(gym.Env):
|
|||||||
terminated = np.sum([terminator(observation) for terminator in self.terminators]) > self.terminate_above
|
terminated = np.sum([terminator(observation) for terminator in self.terminators]) > self.terminate_above
|
||||||
truncated = False
|
truncated = False
|
||||||
info = self._get_info()
|
info = self._get_info()
|
||||||
reward = sum(obj for obj in info['objectives'].values())
|
reward = sum(obj for obj in info['objectives_weighted'].values())
|
||||||
|
|
||||||
self._total_steps += 1
|
self._total_steps += 1
|
||||||
time.sleep(self.seconds_per_step)
|
time.sleep(self.seconds_per_step)
|
||||||
|
Loading…
Reference in New Issue
Block a user