diff --git a/README.md b/README.md index 2db6a00..661bb17 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ from nucon.rl import NuconEnv, Parameterized_Objectives env = NuconEnv(objectives=['max_power'], seconds_per_step=5) # env2 = gym.make('Nucon-max_power-v0') -# env3 = NuconEnv(objectives=[Parameterized_Objectives['target_temperature'](goal_temp=600)], seconds_per_step=5) +# env3 = NuconEnv(objectives=[Parameterized_Objectives['target_temperature'](goal_temp=600)], objective_weights=[1.0], seconds_per_step=5) obs, info = env.reset() for _ in range(1000): @@ -124,7 +124,7 @@ for _ in range(1000): env.close() ``` -Objectives takes either strings of the name of predefined objectives, or lambda functions which take an observation and return a scalar reward. Final rewards are summed across all objectives. `info['objectives']` contains all objectives and their values. +Objectives takes either strings of the name of predefined objectives, or lambda functions which take an observation and return a scalar reward. Final rewards are (weighted) summed across all objectives. `info['objectives']` contains all objectives and their values. ## Testing diff --git a/nucon/rl.py b/nucon/rl.py index fa9158c..4e321b7 100644 --- a/nucon/rl.py +++ b/nucon/rl.py @@ -7,8 +7,6 @@ from .core import Nucon, BreakerStatus, PumpStatus, PumpDryStatus, PumpOverloadS Objectives = { "null": lambda obs: 0, - "coeff": lambda obj, coeff: lambda obs: obj(obs) * coeff, - "max_power": lambda obs: obs["GENERATOR_0_KW"] + obs["GENERATOR_1_KW"] + obs["GENERATOR_2_KW"], "episode_time": lambda obs: obs["EPISODE_TIME"], } @@ -20,11 +18,14 @@ Parameterized_Objectives = { class NuconEnv(gym.Env): metadata = {'render_modes': ['human']} - def __init__(self, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], terminate_above=0): + def __init__(self, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], objective_weights=None, terminate_above=0): super().__init__() self.render_mode = render_mode self.seconds_per_step = seconds_per_step + if objective_weights is None: + objective_weights = [1.0 for objective in objectives] + self.objective_weights = objective_weights self.terminate_at = terminate_at # Define observation space @@ -92,9 +93,11 @@ class NuconEnv(gym.Env): return obs def _get_info(self): - info = {'objectives': {}} - for objective in self.objectives: - info['objectives'][objective.__name__] = objective(self._get_obs()) + info = {'objectives': {}, 'objectives_weighted': {}} + for objective, weight in zip(self.objectives, self.objective_weights): + obj = objective(self._get_obs()) + info['objectives'][objective.__name__] = obj + info['objectives_weighted'][objective.__name__] = obj * weight return info def reset(self, seed=None, options=None): @@ -120,7 +123,7 @@ class NuconEnv(gym.Env): terminated = np.sum([terminator(observation) for terminator in self.terminators]) > self.terminate_above truncated = False info = self._get_info() - reward = sum(obj for obj in info['objectives'].values()) + reward = sum(obj for obj in info['objectives_weighted'].values()) self._total_steps += 1 time.sleep(self.seconds_per_step)