refactor: move UncertaintyPenalty/Abort into Parameterized_Objectives/Terminators dicts

- uncertainty_penalty -> Parameterized_Objectives['uncertainty_penalty'] - uncertainty_abort -> Parameterized_Terminators['uncertainty_abort'] - Add Parameterized_Terminators dict (same pattern as Parameterized_Objectives) - Keep UncertaintyPenalty / UncertaintyAbort as convenience aliases Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 18:57:44 +01:00 · 2026-03-12 18:57:44 +01:00 · 2c1bbc1a31
commit 2c1bbc1a31
parent 041e0ec1bd
1 changed files with 19 additions and 46 deletions
--- a/nucon/rl.py
+++ b/nucon/rl.py
@ -18,35 +18,7 @@ Objectives = {
    "episode_time": lambda obs: obs["EPISODE_TIME"],
 }
-Parameterized_Objectives = {
+def _uncertainty_penalty(start=0.3, scale=1.0, mode='l2'):
    "target_temperature": lambda goal_temp: lambda obs: -((obs["CORE_TEMP"] - goal_temp) ** 2),
    "target_gap":         lambda goal_gap:  lambda obs: -((obs["CORE_TEMP"] - obs["CORE_TEMP_MIN"] - goal_gap) ** 2),
    "temp_below":         lambda max_temp:  lambda obs: -(np.clip(obs["CORE_TEMP"] - max_temp,  0, np.inf) ** 2),
    "temp_above":         lambda min_temp:  lambda obs: -(np.clip(min_temp - obs["CORE_TEMP"],  0, np.inf) ** 2),
    "constant":           lambda constant:  lambda obs: constant,
 }
 def UncertaintyPenalty(start: float = 0.3, scale: float = 1.0, mode: str = 'l2') -> Callable:
    """Objective that penalises high simulator uncertainty.
    Returns a callable ``(obs) -> float`` suitable for use as an objective or
    terminator in NuconEnv / NuconGoalEnv.  Works because ``SIM_UNCERTAINTY``
    is injected into the obs dict whenever a simulator is active.
    Args:
        start: uncertainty level at which the penalty starts (default 0.3).
        scale: penalty coefficient.
        mode:  ``'l2'`` (quadratic, default) or ``'linear'``.
    Example::
        env = NuconEnv(
            objectives=['max_power', UncertaintyPenalty(start=0.3, scale=2.0)],
            objective_weights=[1.0, 1.0],
            simulator=simulator,
        )
    """
    excess = lambda obs: max(0.0, obs.get('SIM_UNCERTAINTY', 0.0) - start)
    if mode == 'l2':
        return lambda obs: -scale * excess(obs) ** 2
@ -55,25 +27,26 @@ def UncertaintyPenalty(start: float = 0.3, scale: float = 1.0, mode: str = 'l2')
    else:
        raise ValueError(f"Unknown mode '{mode}'. Use 'l2' or 'linear'.")
-
+def _uncertainty_abort(threshold=0.7):
 def UncertaintyAbort(threshold: float = 0.7) -> Callable:
    """Terminator that aborts the episode when simulator uncertainty is too high.
    Returns a callable ``(obs) -> float`` for use as a *terminator*.  When
    the GP posterior std exceeds ``threshold`` the episode is truncated
    (``terminated=True``).
    Example::
        env = NuconEnv(
            objectives=['max_power'],
            terminators=[UncertaintyAbort(threshold=0.7)],
            terminate_above=0,
            simulator=simulator,
        )
    """
    return lambda obs: 1.0 if obs.get('SIM_UNCERTAINTY', 0.0) >= threshold else 0.0
 Parameterized_Objectives = {
    "target_temperature":  lambda goal_temp: lambda obs: -((obs["CORE_TEMP"] - goal_temp) ** 2),
    "target_gap":          lambda goal_gap:  lambda obs: -((obs["CORE_TEMP"] - obs["CORE_TEMP_MIN"] - goal_gap) ** 2),
    "temp_below":          lambda max_temp:  lambda obs: -(np.clip(obs["CORE_TEMP"] - max_temp,  0, np.inf) ** 2),
    "temp_above":          lambda min_temp:  lambda obs: -(np.clip(min_temp - obs["CORE_TEMP"],  0, np.inf) ** 2),
    "constant":            lambda constant:  lambda obs: constant,
    "uncertainty_penalty": _uncertainty_penalty,  # (start, scale, mode) -> (obs) -> float
 }
 Parameterized_Terminators = {
    "uncertainty_abort": _uncertainty_abort,  # (threshold,) -> (obs) -> float
 }
 # Convenience aliases
 UncertaintyPenalty = _uncertainty_penalty
 UncertaintyAbort   = _uncertainty_abort
 # ---------------------------------------------------------------------------
 # Internal helpers