From d580f77fcea4bb2e771c79d28a38fb8ef1aa0b73 Mon Sep 17 00:00:00 2001
From: Dominik Roth <dominik.roth.dev@gmail.com>
Date: Wed, 2 Oct 2024 19:31:19 +0200
Subject: [PATCH] Allowed weighted objectives

---
 README.md   |  4 ++--
 nucon/rl.py | 17 ++++++++++-------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 2db6a00..661bb17 100644
--- a/README.md
+++ b/README.md
@@ -112,7 +112,7 @@ from nucon.rl import NuconEnv, Parameterized_Objectives
 
 env = NuconEnv(objectives=['max_power'], seconds_per_step=5)
 # env2 = gym.make('Nucon-max_power-v0')
-# env3 = NuconEnv(objectives=[Parameterized_Objectives['target_temperature'](goal_temp=600)], seconds_per_step=5)
+# env3 = NuconEnv(objectives=[Parameterized_Objectives['target_temperature'](goal_temp=600)], objective_weights=[1.0], seconds_per_step=5)
 
 obs, info = env.reset()
 for _ in range(1000):
@@ -124,7 +124,7 @@ for _ in range(1000):
 env.close()
 ```
 
-Objectives takes either strings of the name of predefined objectives, or lambda functions which take an observation and return a scalar reward. Final rewards are summed across all objectives. `info['objectives']` contains all objectives and their values.
+Objectives takes either strings of the name of predefined objectives, or lambda functions which take an observation and return a scalar reward. Final rewards are (weighted) summed across all objectives. `info['objectives']` contains all objectives and their values.
 
 ## Testing
 
diff --git a/nucon/rl.py b/nucon/rl.py
index fa9158c..4e321b7 100644
--- a/nucon/rl.py
+++ b/nucon/rl.py
@@ -7,8 +7,6 @@ from .core import Nucon, BreakerStatus, PumpStatus, PumpDryStatus, PumpOverloadS
 
 Objectives = {
     "null": lambda obs: 0,
-    "coeff": lambda obj, coeff: lambda obs: obj(obs) * coeff,
-
     "max_power": lambda obs: obs["GENERATOR_0_KW"] + obs["GENERATOR_1_KW"] + obs["GENERATOR_2_KW"],
     "episode_time": lambda obs: obs["EPISODE_TIME"],
 }
@@ -20,11 +18,14 @@ Parameterized_Objectives = {
 class NuconEnv(gym.Env):
     metadata = {'render_modes': ['human']}
 
-    def __init__(self, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], terminate_above=0):
+    def __init__(self, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], objective_weights=None, terminate_above=0):
         super().__init__()
 
         self.render_mode = render_mode
         self.seconds_per_step = seconds_per_step
+        if objective_weights is None:
+            objective_weights = [1.0 for objective in objectives]
+        self.objective_weights = objective_weights
         self.terminate_at = terminate_at
         
         # Define observation space
@@ -92,9 +93,11 @@ class NuconEnv(gym.Env):
         return obs
 
     def _get_info(self):
-        info = {'objectives': {}}
-        for objective in self.objectives:
-            info['objectives'][objective.__name__] = objective(self._get_obs())
+        info = {'objectives': {}, 'objectives_weighted': {}}
+        for objective, weight in zip(self.objectives, self.objective_weights):
+            obj = objective(self._get_obs())
+            info['objectives'][objective.__name__] = obj
+            info['objectives_weighted'][objective.__name__] = obj * weight 
         return info
     
     def reset(self, seed=None, options=None):
@@ -120,7 +123,7 @@ class NuconEnv(gym.Env):
         terminated = np.sum([terminator(observation) for terminator in self.terminators]) > self.terminate_above
         truncated = False
         info = self._get_info()
-        reward = sum(obj for obj in info['objectives'].values())
+        reward = sum(obj for obj in info['objectives_weighted'].values())
 
         self._total_steps += 1
         time.sleep(self.seconds_per_step)