From 65190dffeaa38aec7026182b55ab611605d23469 Mon Sep 17 00:00:00 2001
From: Dominik Roth <mail@dominik-roth.eu>
Date: Thu, 12 Mar 2026 18:37:09 +0100
Subject: [PATCH] feat: uncertainty-aware training with penalty and abort

sim.py:
- simulator.update(return_uncertainty=True) calls forward_with_uncertainty
  on kNN models and returns the GP std; returns None for NN or when not
  requested (no extra cost if unused)
- No state stored on simulator; caller decides what to do with the value

rl.py (NuconEnv and NuconGoalEnv):
- uncertainty_penalty_start: above this GP std, subtract a linear penalty
  from the reward (scaled by uncertainty_penalty_scale, default 1.0)
- uncertainty_abort: at or above this GP std, set truncated=True
- Only calls update(return_uncertainty=True) when either threshold is set
- Uncertainty only applies when using a simulator (kNN model); ignored otherwise

Example:
    simulator = NuconSimulator()
    simulator.load_model('reactor_knn.pkl')
    env = NuconGoalEnv(..., simulator=simulator,
                       uncertainty_penalty_start=0.3,
                       uncertainty_abort=0.7,
                       uncertainty_penalty_scale=2.0)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 nucon/rl.py  | 42 +++++++++++++++++++++++-------------------
 nucon/sim.py | 42 ++++++++++++++++++------------------------
 2 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/nucon/rl.py b/nucon/rl.py
index 3485e89..f3115c2 100644
--- a/nucon/rl.py
+++ b/nucon/rl.py
@@ -5,10 +5,6 @@ import time
 from typing import Dict, Any
 from enum import Enum
 from nucon import Nucon, BreakerStatus, PumpStatus, PumpDryStatus, PumpOverloadStatus
-try:
-    from nucon.sim import HighUncertaintyError
-except ImportError:
-    HighUncertaintyError = None
 
 Objectives = {
     "null": lambda obs: 0,
@@ -27,7 +23,8 @@ Parameterized_Objectives = {
 class NuconEnv(gym.Env):
     metadata = {'render_modes': ['human']}
 
-    def __init__(self, nucon=None, simulator=None, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], objective_weights=None, terminate_above=0):
+    def __init__(self, nucon=None, simulator=None, render_mode=None, seconds_per_step=5, objectives=['null'], terminators=['null'], objective_weights=None, terminate_above=0,
+                 uncertainty_penalty_start: float = None, uncertainty_abort: float = None, uncertainty_penalty_scale: float = 1.0):
         super().__init__()
 
         self.render_mode = render_mode
@@ -37,6 +34,9 @@ class NuconEnv(gym.Env):
         self.objective_weights = objective_weights
         self.terminate_above = terminate_above
         self.simulator = simulator
+        self.uncertainty_penalty_start = uncertainty_penalty_start
+        self.uncertainty_abort = uncertainty_abort
+        self.uncertainty_penalty_scale = uncertainty_penalty_scale
 
         if nucon is None:
             if simulator:
@@ -131,16 +131,14 @@ class NuconEnv(gym.Env):
 
         self._total_steps += 1
         if self.simulator:
-            try:
-                self.simulator.update(self.seconds_per_step)
-            except Exception as e:
-                if HighUncertaintyError and isinstance(e, HighUncertaintyError):
+            needs_uncertainty = self.uncertainty_penalty_start is not None or self.uncertainty_abort is not None
+            uncertainty = self.simulator.update(self.seconds_per_step, return_uncertainty=needs_uncertainty)
+            if uncertainty is not None:
+                if self.uncertainty_abort is not None and uncertainty >= self.uncertainty_abort:
                     truncated = True
-                else:
-                    raise
+                if self.uncertainty_penalty_start is not None and uncertainty > self.uncertainty_penalty_start:
+                    reward -= self.uncertainty_penalty_scale * (uncertainty - self.uncertainty_penalty_start)
         else:
-            # Sleep to let the game advance seconds_per_step game-seconds,
-            # accounting for the game's simulation speed multiplier.
             sim_speed = self.nucon.GAME_SIM_SPEED.value or 1.0
             time.sleep(self.seconds_per_step / sim_speed)
         return observation, reward, terminated, truncated, info
@@ -222,6 +220,9 @@ class NuconGoalEnv(gym.Env):
         seconds_per_step=5,
         terminators=None,
         terminate_above=0,
+        uncertainty_penalty_start: float = None,
+        uncertainty_abort: float = None,
+        uncertainty_penalty_scale: float = 1.0,
     ):
         super().__init__()
 
@@ -286,6 +287,9 @@ class NuconGoalEnv(gym.Env):
 
         # Terminators
         self._terminators = terminators or []
+        self.uncertainty_penalty_start = uncertainty_penalty_start
+        self.uncertainty_abort = uncertainty_abort
+        self.uncertainty_penalty_scale = uncertainty_penalty_scale
 
         self._desired_goal = np.zeros(n_goals, dtype=np.float32)
         self._total_steps = 0
@@ -360,13 +364,13 @@ class NuconGoalEnv(gym.Env):
 
         self._total_steps += 1
         if self.simulator:
-            try:
-                self.simulator.update(self.seconds_per_step)
-            except Exception as e:
-                if HighUncertaintyError and isinstance(e, HighUncertaintyError):
+            needs_uncertainty = self.uncertainty_penalty_start is not None or self.uncertainty_abort is not None
+            uncertainty = self.simulator.update(self.seconds_per_step, return_uncertainty=needs_uncertainty)
+            if uncertainty is not None:
+                if self.uncertainty_abort is not None and uncertainty >= self.uncertainty_abort:
                     truncated = True
-                else:
-                    raise
+                if self.uncertainty_penalty_start is not None and uncertainty > self.uncertainty_penalty_start:
+                    reward -= self.uncertainty_penalty_scale * (uncertainty - self.uncertainty_penalty_start)
         else:
             sim_speed = self.nucon.GAME_SIM_SPEED.value or 1.0
             time.sleep(self.seconds_per_step / sim_speed)
diff --git a/nucon/sim.py b/nucon/sim.py
index 1fcc8cf..cd64221 100644
--- a/nucon/sim.py
+++ b/nucon/sim.py
@@ -8,18 +8,6 @@ import torch
 from nucon.model import ReactorDynamicsModel, ReactorKNNModel
 import pickle
 
-class HighUncertaintyError(Exception):
-    """Raised by NuconSimulator when the dynamics model uncertainty exceeds the threshold.
-
-    Caught by NuconEnv/NuconGoalEnv and returned as truncated=True so the RL
-    algorithm bootstraps the value rather than treating it as a terminal state.
-    """
-    def __init__(self, uncertainty: float, threshold: float):
-        self.uncertainty = uncertainty
-        self.threshold = threshold
-        super().__init__(f"Model uncertainty {uncertainty:.3f} exceeded threshold {threshold:.3f}")
-
-
 class OperatingState(Enum):
     # Tuple indicates a range of values, while list indicates a set of possible values
     OFFLINE = {
@@ -175,14 +163,13 @@ class NuconSimulator:
             for param_name in nucon.get_all_readable():
                 setattr(self, param_name, None)
 
-    def __init__(self, host: str = 'localhost', port: int = 8786, uncertainty_threshold: float = None):
+    def __init__(self, host: str = 'localhost', port: int = 8786):
         self._nucon = Nucon()
         self.parameters = self.Parameters(self._nucon)
         self.host = host
         self.port = port
         self.time = 0.0
         self.allow_all_writes = False
-        self.uncertainty_threshold = uncertainty_threshold
         self.set_state(OperatingState.OFFLINE)
         self.model = None
         self.readable_params = list(self._nucon.get_all_readable().keys())
@@ -228,9 +215,16 @@ class NuconSimulator:
     def set_allow_all_writes(self, allow: bool) -> None:
         self.allow_all_writes = allow
 
-    def update(self, time_step: float) -> None:
-        self._update_reactor_state(time_step)
+    def update(self, time_step: float, return_uncertainty: bool = False):
+        """Advance the simulator by time_step game-seconds.
+
+        If return_uncertainty=True and a kNN model is loaded, returns the GP
+        posterior std for this step (0 = on known data, ~1 = OOD).
+        Always returns None when using an NN model.
+        """
+        uncertainty = self._update_reactor_state(time_step, return_uncertainty=return_uncertainty)
         self.time += time_step
+        return uncertainty
 
     def set_model(self, model) -> None:
         """Set a pre-loaded ReactorDynamicsModel or ReactorKNNModel directly."""
@@ -262,7 +256,7 @@ class NuconSimulator:
             print(f"Error loading model: {str(e)}")
             self.model = None
 
-    def _update_reactor_state(self, time_step: float) -> None:
+    def _update_reactor_state(self, time_step: float, return_uncertainty: bool = False):
         if not self.model:
             raise ValueError("Model not set. Please load a model using load_model() or set_model().")
 
@@ -276,17 +270,15 @@ class NuconSimulator:
                 value = 0.0  # fallback for params not initialised in sim state
             state[param_id] = value
 
-        # Forward pass — same interface for both NN and kNN
+        # Forward pass
+        uncertainty = None
         if isinstance(self.model, ReactorDynamicsModel):
             with torch.no_grad():
                 next_state = self.model.forward(state, time_step)
+        elif return_uncertainty:
+            next_state, uncertainty = self.model.forward_with_uncertainty(state, time_step)
         else:
-            if self.uncertainty_threshold is not None:
-                next_state, uncertainty = self.model.forward_with_uncertainty(state, time_step)
-                if uncertainty > self.uncertainty_threshold:
-                    raise HighUncertaintyError(uncertainty, self.uncertainty_threshold)
-            else:
-                next_state = self.model.forward(state, time_step)
+            next_state = self.model.forward(state, time_step)
 
         # Update only the output params the model predicts
         for param_id, value in next_state.items():
@@ -295,6 +287,8 @@ class NuconSimulator:
             except (ValueError, KeyError):
                 pass  # ignore params that can't be set (type mismatch, unknown)
 
+        return uncertainty
+
     def set_state(self, state: OperatingState) -> None:
         self._sample_parameters_from_state(state)