Compare commits

..

No commits in common. "2bb4207a9820b178542bd330035565215b407be4" and "88f48960866c8a90a84ea8e1ea3306dfb04c32ef" have entirely different histories.

5 changed files with 96 additions and 295 deletions

View File

@ -18,15 +18,13 @@ Actors = {
# --- NN-based dynamics model --- # --- NN-based dynamics model ---
class ReactorDynamicsNet(nn.Module): class ReactorDynamicsNet(nn.Module):
def __init__(self, input_dim, output_dim, dropout=0.3): def __init__(self, input_dim, output_dim):
super(ReactorDynamicsNet, self).__init__() super(ReactorDynamicsNet, self).__init__()
self.network = nn.Sequential( self.network = nn.Sequential(
nn.Linear(input_dim + 1, 128), # +1 for time_delta nn.Linear(input_dim + 1, 128), # +1 for time_delta
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, 128), nn.Linear(128, 128),
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, output_dim) nn.Linear(128, output_dim)
) )
@ -35,81 +33,23 @@ class ReactorDynamicsNet(nn.Module):
return self.network(x) return self.network(x)
class ReactorDynamicsModel(nn.Module): class ReactorDynamicsModel(nn.Module):
"""
NN dynamics model predicting per-second rates of change (like ReactorKNNModel).
Inputs are z-score normalised; outputs are normalised rates.
forward() returns absolute next-state dict: cur + predicted_rate * time_delta.
forward_with_uncertainty() returns (next_state, 0.0) no uncertainty estimate.
"""
def __init__(self, input_params: List[str], output_params: List[str]): def __init__(self, input_params: List[str], output_params: List[str]):
super(ReactorDynamicsModel, self).__init__() super(ReactorDynamicsModel, self).__init__()
self.input_params = input_params self.input_params = input_params
self.output_params = output_params self.output_params = output_params
self.net = ReactorDynamicsNet(len(input_params), len(output_params)) self.net = ReactorDynamicsNet(len(input_params), len(output_params))
# Normalisation stats set by fit()
self.register_buffer('_in_mean', torch.zeros(len(input_params)))
self.register_buffer('_in_std', torch.ones(len(input_params)))
self.register_buffer('_rate_mean', torch.zeros(len(output_params)))
self.register_buffer('_rate_std', torch.ones(len(output_params)))
def fit_normalisation(self, dataset): def _state_dict_to_tensor(self, state_dict):
"""Compute and store normalisation stats from a dataset.""" return torch.tensor([state_dict[p] for p in self.input_params], dtype=torch.float32)
in_vecs, rate_vecs = [], []
for state, _action, next_state, dt in dataset:
if dt <= 0:
continue
in_vecs.append([state.get(p, 0.0) for p in self.input_params])
rate_vecs.append([(next_state.get(p, 0.0) - state.get(p, 0.0)) / dt
for p in self.output_params])
ins = np.array(in_vecs, dtype=np.float32)
rates = np.array(rate_vecs, dtype=np.float32)
in_std = ins.std(0)
r_std = rates.std(0)
self._in_mean.copy_(torch.from_numpy(ins.mean(0)))
self._in_std.copy_(torch.from_numpy(np.where(in_std < 1e-6, 1.0, in_std)))
self._rate_mean.copy_(torch.from_numpy(rates.mean(0)))
self._rate_std.copy_(torch.from_numpy(np.where(r_std < 1e-6, 1.0, r_std)))
def _normalise_input(self, t: torch.Tensor) -> torch.Tensor: def _tensor_to_state_dict(self, tensor):
return (t - self._in_mean) / self._in_std return {p: tensor[i].item() for i, p in enumerate(self.output_params)}
def _denormalise_rate(self, t: torch.Tensor) -> torch.Tensor:
return t * self._rate_std + self._rate_mean
def forward(self, state_dict, time_delta): def forward(self, state_dict, time_delta):
return self.forward_with_uncertainty(state_dict, time_delta)[0] state_tensor = self._state_dict_to_tensor(state_dict).unsqueeze(0)
time_delta_tensor = torch.tensor([time_delta], dtype=torch.float32).unsqueeze(0)
def forward_with_uncertainty(self, state_dict, time_delta, mc_samples=3): predicted_tensor = self.net(state_tensor, time_delta_tensor)
"""MC-Dropout uncertainty: run mc_samples stochastic forward passes. return self._tensor_to_state_dict(predicted_tensor.squeeze(0))
Uncertainty is the mean normalised std across output dims, clipped to [0, 1].
0 = very confident (low variance), ~1 = high variance / OOD.
"""
s = torch.tensor([state_dict.get(p, 0.0) for p in self.input_params],
dtype=torch.float32).unsqueeze(0)
s_norm = self._normalise_input(s)
dt_t = torch.tensor([[time_delta]], dtype=torch.float32)
# Keep dropout active for uncertainty sampling
self.net.train()
with torch.no_grad():
samples = torch.stack([self.net(s_norm, dt_t).squeeze(0)
for _ in range(mc_samples)]) # (mc_samples, out_dim)
self.net.eval()
rate_norm_mean = samples.mean(0)
rate_norm_std = samples.std(0)
rate = self._denormalise_rate(rate_norm_mean)
cur = torch.tensor([state_dict.get(p, 0.0) for p in self.output_params],
dtype=torch.float32)
predicted = cur + rate * time_delta
pred_dict = {p: float(predicted[i]) for i, p in enumerate(self.output_params)}
# Uncertainty: mean coefficient of variation in normalised space, clipped to [0,1]
uncertainty = float(rate_norm_std.mean().clamp(0.0, 1.0))
return pred_dict, uncertainty
# --- kNN-based dynamics model --- # --- kNN-based dynamics model ---
@ -210,37 +150,6 @@ class ReactorKNNModel:
pred_dict = {p: float(predicted[i]) for i, p in enumerate(self.output_params)} pred_dict = {p: float(predicted[i]) for i, p in enumerate(self.output_params)}
return pred_dict, std return pred_dict, std
# --- Mixture model ---
class MixtureModel:
"""Combines two dynamics models, selecting based on kNN uncertainty.
Uses knn_model when its uncertainty is below threshold (it's confident /
near training data). Falls back to nn_model when kNN is OOD.
Both models must implement forward_with_uncertainty(state_dict, time_delta).
input_params / output_params are taken from knn_model.
"""
def __init__(self, knn_model, nn_model):
self.knn_model = knn_model
self.nn_model = nn_model
self.input_params = knn_model.input_params
self.output_params = knn_model.output_params
def forward(self, state_dict, time_delta):
return self.forward_with_uncertainty(state_dict, time_delta)[0]
def forward_with_uncertainty(self, state_dict, time_delta):
knn_pred, knn_u = self.knn_model.forward_with_uncertainty(state_dict, time_delta)
nn_pred, nn_u = self.nn_model.forward_with_uncertainty(state_dict, time_delta)
w_knn = 1.0 - knn_u # high when kNN is confident
w_nn = knn_u # high when kNN is OOD
blended = {p: w_knn * knn_pred[p] + w_nn * nn_pred[p]
for p in self.output_params}
uncertainty = w_knn * knn_u + w_nn * nn_u # weighted uncertainty
return blended, uncertainty
# --- Learner --- # --- Learner ---
class NuconModelLearner: class NuconModelLearner:
@ -357,14 +266,13 @@ class NuconModelLearner:
self.save_dataset() self.save_dataset()
print(f"Collection complete. {collected} steps, {len(self.dataset)} total samples.") print(f"Collection complete. {collected} steps, {len(self.dataset)} total samples.")
def train_model(self, batch_size=32, num_epochs=10, test_split=0.2, lr=1e-3): def train_model(self, batch_size=32, num_epochs=10, test_split=0.2):
"""Train a neural-network dynamics model on the current dataset.""" """Train a neural-network dynamics model on the current dataset."""
if self.model is None: if self.model is None:
self.model = ReactorDynamicsModel(self.readable_params, self.non_writable_params) self.model = ReactorDynamicsModel(self.readable_params, self.non_writable_params)
self.optimizer = optim.Adam(self.model.parameters())
elif not isinstance(self.model, ReactorDynamicsModel): elif not isinstance(self.model, ReactorDynamicsModel):
raise ValueError("A kNN model is already loaded. Create a new learner to train an NN.") raise ValueError("A kNN model is already loaded. Create a new learner to train an NN.")
self.model.fit_normalisation(self.dataset)
self.optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-4)
random.shuffle(self.dataset) random.shuffle(self.dataset)
split_idx = int(len(self.dataset) * (1 - test_split)) split_idx = int(len(self.dataset) * (1 - test_split))
train_data = self.dataset[:split_idx] train_data = self.dataset[:split_idx]
@ -457,45 +365,37 @@ class NuconModelLearner:
print(f"drop_redundant: kept {len(self.dataset)}, dropped {dropped} samples.") print(f"drop_redundant: kept {len(self.dataset)}, dropped {dropped} samples.")
def _train_epoch(self, data, batch_size): def _train_epoch(self, data, batch_size):
self.model.train() out_indices = [self.readable_params.index(p) if p in self.readable_params else None
for p in self.non_writable_params]
total_loss = 0 total_loss = 0
n_batches = 0
for i in range(0, len(data), batch_size): for i in range(0, len(data), batch_size):
batch = [s for s in data[i:i+batch_size] if s[3] > 0] batch = data[i:i+batch_size]
if not batch:
continue
states = torch.tensor([[s[0].get(p, 0.0) for p in self.readable_params] for s in batch], dtype=torch.float32)
targets = torch.tensor([[(s[2].get(p, 0.0) - s[0].get(p, 0.0)) / s[3] for p in self.non_writable_params] for s in batch], dtype=torch.float32)
dts = torch.tensor([[s[3]] for s in batch], dtype=torch.float32)
s_norm = self.model._normalise_input(states)
rate_norm_pred = self.model.net(s_norm, dts)
rate_norm_target = (targets - self.model._rate_mean) / self.model._rate_std
self.optimizer.zero_grad() self.optimizer.zero_grad()
loss = torch.nn.functional.mse_loss(rate_norm_pred, rate_norm_target) loss = torch.tensor(0.0)
for state, _, next_state, time_delta in batch:
state_t = self.model._state_dict_to_tensor(state).unsqueeze(0)
td_t = torch.tensor([[time_delta]], dtype=torch.float32)
pred = self.model.net(state_t, td_t).squeeze(0)
target = torch.tensor([next_state[p] for p in self.non_writable_params],
dtype=torch.float32)
loss = loss + torch.nn.functional.mse_loss(pred, target)
loss = loss / len(batch)
loss.backward() loss.backward()
self.optimizer.step() self.optimizer.step()
total_loss += loss.item() total_loss += loss.item()
n_batches += 1 return total_loss / max(1, len(data) // batch_size)
self.model.eval()
return total_loss / max(1, n_batches)
def _test_epoch(self, data): def _test_epoch(self, data):
total_loss = 0.0 total_loss = 0.0
n = 0
with torch.no_grad(): with torch.no_grad():
for state, _, next_state, dt in data: for state, _, next_state, time_delta in data:
if dt <= 0: state_t = self.model._state_dict_to_tensor(state).unsqueeze(0)
continue td_t = torch.tensor([[time_delta]], dtype=torch.float32)
s_t = torch.tensor([[state.get(p, 0.0) for p in self.readable_params]], dtype=torch.float32) pred = self.model.net(state_t, td_t).squeeze(0)
s_norm = self.model._normalise_input(s_t) target = torch.tensor([next_state[p] for p in self.non_writable_params],
dt_t = torch.tensor([[dt]], dtype=torch.float32) dtype=torch.float32)
rate_norm_pred = self.model.net(s_norm, dt_t).squeeze(0) total_loss += torch.nn.functional.mse_loss(pred, target).item()
target = torch.tensor([(next_state.get(p, 0.0) - state.get(p, 0.0)) / dt return total_loss / len(data)
for p in self.non_writable_params], dtype=torch.float32)
rate_norm_target = (target - self.model._rate_mean) / self.model._rate_std
total_loss += torch.nn.functional.mse_loss(rate_norm_pred, rate_norm_target).item()
n += 1
return total_loss / max(1, n)
def save_model(self, path): def save_model(self, path):
if self.model is None: if self.model is None:
@ -539,10 +439,6 @@ class NuconModelLearner:
def merge_datasets(self, other_dataset_path): def merge_datasets(self, other_dataset_path):
other_dataset = self.load_dataset(other_dataset_path) other_dataset = self.load_dataset(other_dataset_path)
if not isinstance(other_dataset, list): if other_dataset:
raise ValueError( self.dataset.extend(other_dataset)
f"'{other_dataset_path}' does not contain a dataset (got {type(other_dataset).__name__}). " self.save_dataset()
f"Pass a dataset .pkl file, not a model file."
)
self.dataset.extend(other_dataset)
self.save_dataset()

View File

@ -12,18 +12,10 @@ from nucon import Nucon, BreakerStatus, PumpStatus, PumpDryStatus, PumpOverloadS
# Reward / objective helpers # Reward / objective helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _alarm_penalty(obs):
"""Penalty proportional to number of active alarms. Only meaningful when running against the real game."""
raw = obs.get('ALARMS_ACTIVE', '')
if not raw or not raw.strip():
return 0.0
return -float(len(raw.split(',')))
Objectives = { Objectives = {
"null": lambda obs: 0, "null": lambda obs: 0,
"max_power": lambda obs: obs["GENERATOR_0_KW"] + obs["GENERATOR_1_KW"] + obs["GENERATOR_2_KW"], "max_power": lambda obs: obs["GENERATOR_0_KW"] + obs["GENERATOR_1_KW"] + obs["GENERATOR_2_KW"],
"episode_time": lambda obs: obs["EPISODE_TIME"], "episode_time": lambda obs: obs["EPISODE_TIME"],
"alarm_penalty": _alarm_penalty,
} }
def _uncertainty_penalty(start=0.3, scale=1.0, mode='l2'): def _uncertainty_penalty(start=0.3, scale=1.0, mode='l2'):
@ -43,8 +35,6 @@ Parameterized_Objectives = {
"target_gap": lambda goal_gap: lambda obs: -((obs["CORE_TEMP"] - obs["CORE_TEMP_MIN"] - goal_gap) ** 2), "target_gap": lambda goal_gap: lambda obs: -((obs["CORE_TEMP"] - obs["CORE_TEMP_MIN"] - goal_gap) ** 2),
"temp_below": lambda max_temp: lambda obs: -(np.clip(obs["CORE_TEMP"] - max_temp, 0, np.inf) ** 2), "temp_below": lambda max_temp: lambda obs: -(np.clip(obs["CORE_TEMP"] - max_temp, 0, np.inf) ** 2),
"temp_above": lambda min_temp: lambda obs: -(np.clip(min_temp - obs["CORE_TEMP"], 0, np.inf) ** 2), "temp_above": lambda min_temp: lambda obs: -(np.clip(min_temp - obs["CORE_TEMP"], 0, np.inf) ** 2),
"temp_below_linear": lambda max_temp: lambda obs: -np.clip(obs["CORE_TEMP"] - max_temp, 0, np.inf),
"temp_above_linear": lambda min_temp: lambda obs: -np.clip(min_temp - obs["CORE_TEMP"], 0, np.inf),
"constant": lambda constant: lambda obs: constant, "constant": lambda constant: lambda obs: constant,
"uncertainty_penalty": _uncertainty_penalty, # (start, scale, mode) -> (obs) -> float "uncertainty_penalty": _uncertainty_penalty, # (start, scale, mode) -> (obs) -> float
} }
@ -294,10 +284,8 @@ class NuconGoalEnv(gym.Env):
additional_objectives=None, additional_objectives=None,
additional_objective_weights=None, additional_objective_weights=None,
obs_params=None, obs_params=None,
action_params=None,
init_states=None, init_states=None,
delta_action_scale=None, delta_action_scale=None,
goal_sampling_std=None,
): ):
super().__init__() super().__init__()
@ -365,17 +353,15 @@ class NuconGoalEnv(gym.Env):
'desired_goal': spaces.Box(low=0.0, high=1.0, shape=(n_goals,), dtype=np.float32), 'desired_goal': spaces.Box(low=0.0, high=1.0, shape=(n_goals,), dtype=np.float32),
}) })
# Action space: writable params within the obs param set, or an explicit override list. # Action space: writable params within the obs param set (flat Box for SB3 compatibility).
action_set = set(action_params) if action_params is not None else set(base_params)
self.action_space, self._action_params, self._action_lows, self._action_ranges = \ self.action_space, self._action_params, self._action_lows, self._action_ranges = \
_build_flat_action_space(self.nucon, action_set, delta_action_scale) _build_flat_action_space(self.nucon, set(base_params), delta_action_scale)
self._terminators = terminators or [] self._terminators = terminators or []
_objs = additional_objectives or [] _objs = additional_objectives or []
self._objectives = [Objectives[o] if isinstance(o, str) else o for o in _objs] self._objectives = [Objectives[o] if isinstance(o, str) else o for o in _objs]
self._objective_weights = additional_objective_weights or [1.0] * len(self._objectives) self._objective_weights = additional_objective_weights or [1.0] * len(self._objectives)
self._init_states = init_states # list of state dicts to sample on reset self._init_states = init_states # list of state dicts to sample on reset
self._goal_sampling_std = goal_sampling_std # Gaussian std in normalised goal space; None → uniform
self._desired_goal = np.zeros(n_goals, dtype=np.float32) self._desired_goal = np.zeros(n_goals, dtype=np.float32)
self._total_steps = 0 self._total_steps = 0
@ -437,6 +423,7 @@ class NuconGoalEnv(gym.Env):
super().reset(seed=seed) super().reset(seed=seed)
self._total_steps = 0 self._total_steps = 0
rng = np.random.default_rng(seed) rng = np.random.default_rng(seed)
self._desired_goal = rng.uniform(0.0, 1.0, size=len(self.goal_params)).astype(np.float32)
if self._init_states is not None and self.simulator is not None: if self._init_states is not None and self.simulator is not None:
state = self._init_states[rng.integers(len(self._init_states))] state = self._init_states[rng.integers(len(self._init_states))]
for k, v in state.items(): for k, v in state.items():
@ -444,28 +431,13 @@ class NuconGoalEnv(gym.Env):
self.simulator.set(k, v, force=True) self.simulator.set(k, v, force=True)
except Exception: except Exception:
pass pass
if self._goal_sampling_std is not None:
# Sample goal as Gaussian delta from current state — usually a small change,
# occasionally a large one.
current = np.array([
float(self.simulator.get(p) if self.simulator else 0.0)
for p in self.goal_params
], dtype=np.float32)
current_norm = np.clip((current - self._goal_low) / self._goal_range, 0.0, 1.0)
delta = rng.normal(0.0, self._goal_sampling_std, size=len(self.goal_params))
self._desired_goal = np.clip(current_norm + delta, 0.0, 1.0).astype(np.float32)
else:
self._desired_goal = rng.uniform(0.0, 1.0, size=len(self.goal_params)).astype(np.float32)
gym_obs, _ = self._read_obs() gym_obs, _ = self._read_obs()
return gym_obs, {} return gym_obs, {}
def step(self, action): def step(self, action):
flat = np.asarray(action, dtype=np.float32) flat = np.asarray(action, dtype=np.float32)
if self._delta_action_scale is not None: if self._delta_action_scale is not None:
# Compute absolute values from deltas, reading current state # Compute absolute values from deltas, reading current state directly if possible
if self.simulator is None:
raw_current = self.nucon._batch_query(self._action_params)
all_params = self.nucon.get_all_readable()
absolute = {} absolute = {}
for i, pid in enumerate(self._action_params): for i, pid in enumerate(self._action_params):
param = self.nucon._parameters[pid] param = self.nucon._parameters[pid]
@ -476,11 +448,7 @@ class NuconGoalEnv(gym.Env):
v = self.simulator.get(pid) v = self.simulator.get(pid)
current = float(v.value if isinstance(v, Enum) else v) if v is not None else 0.0 current = float(v.value if isinstance(v, Enum) else v) if v is not None else 0.0
else: else:
try: current = 0.0 # fallback; batch read not worth it for actions alone
v = self.nucon._parse_value(all_params[pid], raw_current.get(pid, '0'))
current = float(v.value if isinstance(v, Enum) else v)
except Exception:
current = 0.0
delta = float(flat[i]) * self._delta_action_scale * self._action_ranges[i] delta = float(flat[i]) * self._delta_action_scale * self._action_ranges[i]
absolute[pid] = float(np.clip(current + delta, absolute[pid] = float(np.clip(current + delta,
self._action_lows[i], self._action_lows[i],

View File

@ -271,7 +271,10 @@ class NuconSimulator:
# Forward pass # Forward pass
uncertainty = None uncertainty = None
if return_uncertainty: if isinstance(self.model, ReactorDynamicsModel):
with torch.no_grad():
next_state = self.model.forward(state, time_step)
elif return_uncertainty:
next_state, uncertainty = self.model.forward_with_uncertainty(state, time_step) next_state, uncertainty = self.model.forward_with_uncertainty(state, time_step)
else: else:
next_state = self.model.forward(state, time_step) next_state = self.model.forward(state, time_step)

View File

@ -5,16 +5,14 @@ Architecture:
- Rod PID: keeps CORE_TEMP at setpoint via ROD_BANK_POS_0_ORDERED - Rod PID: keeps CORE_TEMP at setpoint via ROD_BANK_POS_0_ORDERED
Per-train control (trains 1/2/3, 0-indexed as 0/1/2 in param names): Per-train control (trains 1/2/3, 0-indexed as 0/1/2 in param names):
- Primary pump: fixed 65% (higher than 50% improves heat transfer per manual) - Primary pump: fixed 50%; hill-climbs only when deficit > 5 MW
- MSCV PI: drives train power output, gated on steam availability - MSCV PI: drives train power output, gated on steam availability
- Secondary pump feedforward: half of steam outlet + level PID - Secondary pump feedforward: half of steam outlet + level PID
- Bypass: hold at 0 - Bypass: hold near 0
Auxiliary: Auxiliary:
- Vacuum pump: on continuously; turned off only during retention tank drain
- Condenser circulation pump: fixed 25% (prevents overcooling of return water)
- Retention tank: drain via ejector return valve when > 75%, stop at 50% - Retention tank: drain via ejector return valve when > 75%, stop at 50%
- Condenser fill: run FREIGHT_PUMP_CONDENSER below 45%, stop at 60% - Condenser fill: run FREIGHT_PUMP_CONDENSER below 33%, stop at 50%
Usage: Usage:
python3.14 scripts/reactor_control.py --trains 3 --target 50000 python3.14 scripts/reactor_control.py --trains 3 --target 50000
@ -128,11 +126,16 @@ class TrainController:
out_min=-2.0, out_max=2.0, integral_max=3.0) out_min=-2.0, out_max=2.0, integral_max=3.0)
self.sec_level_target = 25_000.0 self.sec_level_target = 25_000.0
self.prim_pump = 65.0 self.prim_pump = 50.0
self.mscv = 9.0 self.mscv = 9.0
self.sec_pump = 40.0 self.sec_pump = 40.0
set_param(f'COOLANT_CORE_CIRCULATION_PUMP_{self.i}_ORDERED_SPEED', self.prim_pump) self._pump_hill_cycle = 0
self._pump_hill_dir = +1.0
self._pump_hill_steam = None
self.PUMP_SWEEP_THRESHOLD = 5_000.0
self.sweeping = False
set_param(f'STEAM_TURBINE_{self.i}_BYPASS_ORDERED', 0.0) set_param(f'STEAM_TURBINE_{self.i}_BYPASS_ORDERED', 0.0)
self._params = [ self._params = [
@ -172,6 +175,24 @@ class TrainController:
self.mscv = float(np.clip(new_mscv, 0.5, 100.0)) self.mscv = float(np.clip(new_mscv, 0.5, 100.0))
set_param(f'MSCV_{self.i}_OPENING_ORDERED', self.mscv) set_param(f'MSCV_{self.i}_OPENING_ORDERED', self.mscv)
self.sweeping = power_error > self.PUMP_SWEEP_THRESHOLD
if self.sweeping:
self._pump_hill_cycle += 1
if self._pump_hill_cycle >= 12:
self._pump_hill_cycle = 0
if self._pump_hill_steam is not None:
if steam_out < self._pump_hill_steam - 0.2:
self._pump_hill_dir = -self._pump_hill_dir
self._pump_hill_steam = steam_out
self.prim_pump = float(np.clip(self.prim_pump + self._pump_hill_dir, 15.0, 90.0))
set_param(f'COOLANT_CORE_CIRCULATION_PUMP_{self.i}_ORDERED_SPEED', self.prim_pump)
else:
if self.prim_pump != 50.0:
self.prim_pump = 50.0
set_param(f'COOLANT_CORE_CIRCULATION_PUMP_{self.i}_ORDERED_SPEED', self.prim_pump)
self._pump_hill_cycle = 0
self._pump_hill_steam = None
sec_ff = steam_out / 2.0 sec_ff = steam_out / 2.0
level = s[f'COOLANT_SEC_{self.i}_LIQUID_VOLUME'] level = s[f'COOLANT_SEC_{self.i}_LIQUID_VOLUME']
level_error = self.sec_level_target - level level_error = self.sec_level_target - level
@ -207,7 +228,6 @@ core_params = [
'CORE_STATE_CRITICALITY', 'CORE_STATE_CRITICALITY',
'VACUUM_RETENTION_TANK_VOLUME', 'VACUUM_RETENTION_TANK_VOLUME',
'CONDENSER_VOLUME', 'CONDENSER_VAPOR_VOLUME', 'CONDENSER_VOLUME', 'CONDENSER_VAPOR_VOLUME',
'CONDENSER_VACUUM', # vacuum level % — monitor for pump health
'POWER_DEMAND_MW', 'POWER_DEMAND_MW',
'CORE_PRIMARY_CIRCUIT_COOLING_TANK_VOLUME', # pressurizer water volume 'CORE_PRIMARY_CIRCUIT_COOLING_TANK_VOLUME', # pressurizer water volume
'COOLANT_CORE_PRIMARY_LOOP_LEVEL', # overall primary loop fill % 'COOLANT_CORE_PRIMARY_LOOP_LEVEL', # overall primary loop fill %
@ -237,8 +257,6 @@ _init = read_state([
'STEAM_EJECTOR_CONDENSER_RETURN_VALVE_ACTUAL', 'STEAM_EJECTOR_CONDENSER_RETURN_VALVE_ACTUAL',
'CONDENSER_VOLUME', 'CONDENSER_VAPOR_VOLUME', 'CONDENSER_VOLUME', 'CONDENSER_VAPOR_VOLUME',
'FREIGHT_PUMP_CONDENSER_ACTIVE', 'FREIGHT_PUMP_CONDENSER_ACTIVE',
'CONDENSER_VACUUM_PUMP_ACTIVE',
'CONDENSER_CIRCULATION_PUMP_ACTIVE',
]) ])
_ret_vol_init = _init.get('VACUUM_RETENTION_TANK_VOLUME', 0.0) _ret_vol_init = _init.get('VACUUM_RETENTION_TANK_VOLUME', 0.0)
_ret_valve_init = _init.get('STEAM_EJECTOR_CONDENSER_RETURN_VALVE_ACTUAL', 0.0) _ret_valve_init = _init.get('STEAM_EJECTOR_CONDENSER_RETURN_VALVE_ACTUAL', 0.0)
@ -254,7 +272,7 @@ _cond_vap_init = _init.get('CONDENSER_VAPOR_VOLUME', 0.0)
_cond_tot_init = _cond_vol_init + _cond_vap_init _cond_tot_init = _cond_vol_init + _cond_vap_init
_cond_pct_init = (_cond_vol_init / _cond_tot_init * 100.0) if _cond_tot_init > 0 else 0.0 _cond_pct_init = (_cond_vol_init / _cond_tot_init * 100.0) if _cond_tot_init > 0 else 0.0
_cond_pump_init = bool(_init.get('FREIGHT_PUMP_CONDENSER_ACTIVE', False)) _cond_pump_init = bool(_init.get('FREIGHT_PUMP_CONDENSER_ACTIVE', False))
if _cond_pump_init and _cond_pct_init >= 60.0: if _cond_pump_init and _cond_pct_init >= 50.0:
nucon.set(nucon._parameters['FREIGHT_PUMP_CONDENSER_SWITCH'], False) nucon.set(nucon._parameters['FREIGHT_PUMP_CONDENSER_SWITCH'], False)
cond_pump_on = False cond_pump_on = False
elif not _cond_pump_init and _cond_pct_init < 45.0: elif not _cond_pump_init and _cond_pct_init < 45.0:
@ -263,20 +281,6 @@ elif not _cond_pump_init and _cond_pct_init < 45.0:
else: else:
cond_pump_on = _cond_pump_init cond_pump_on = _cond_pump_init
# Vacuum pump — keep on continuously; turn off only during retention tank drain.
# (Opening the return valve breaks the suction path so the pump has no effect.)
vac_pump_on = bool(_init.get('CONDENSER_VACUUM_PUMP_ACTIVE', False))
if not vac_pump_on:
nucon.set(nucon._parameters['CONDENSER_VACUUM_PUMP_START_STOP'], True)
vac_pump_on = True
# Condenser circulation pump — run at moderate speed to prevent overcooling
# (manual §Stabilization: "prevent excessive cooling of the coolant returning to the evaporator").
_cond_circ_on = bool(_init.get('CONDENSER_CIRCULATION_PUMP_ACTIVE', False))
if not _cond_circ_on:
nucon.set(nucon._parameters['CONDENSER_CIRCULATION_PUMP_SWITCH'], True)
set_param('CONDENSER_CIRCULATION_PUMP_ORDERED_SPEED', 25.0)
# Pressurizer spray valve — init from live state # Pressurizer spray valve — init from live state
_prsr_live = read_state(['CORE_PRIMARY_CIRCUIT_COOLING_TANK_VOLUME', 'COOLANT_CORE_PRIMARY_LOOP_LEVEL', 'FREIGHT_PUMP_FEEDWATER_ACTIVE']) _prsr_live = read_state(['CORE_PRIMARY_CIRCUIT_COOLING_TANK_VOLUME', 'COOLANT_CORE_PRIMARY_LOOP_LEVEL', 'FREIGHT_PUMP_FEEDWATER_ACTIVE'])
_prsr_level = _prsr_live.get('CORE_PRIMARY_CIRCUIT_COOLING_TANK_VOLUME', PRSR_VOL_MAX * 0.6) / PRSR_VOL_MAX * 100.0 _prsr_level = _prsr_live.get('CORE_PRIMARY_CIRCUIT_COOLING_TANK_VOLUME', PRSR_VOL_MAX * 0.6) / PRSR_VOL_MAX * 100.0
@ -323,7 +327,6 @@ def run_controller(stdscr):
global rod_pos, rod_cycle, rod_integral global rod_pos, rod_cycle, rod_integral
global ret_valve, ret_draining, ret_prev_vol, cond_pump_on global ret_valve, ret_draining, ret_prev_vol, cond_pump_on
global prsr_spraying, feedwater_on global prsr_spraying, feedwater_on
global vac_pump_on
global train_controllers, targets global train_controllers, targets
curses.curs_set(0) curses.curs_set(0)
@ -357,7 +360,6 @@ def run_controller(stdscr):
disp = dict(s={}, dynamic_setpoint=temp_setpoint, temp_auto=temp_auto, criticality=0.0, disp = dict(s={}, dynamic_setpoint=temp_setpoint, temp_auto=temp_auto, criticality=0.0,
ret_pct=0.0, ret_draining=False, ret_valve=0.0, ret_pct=0.0, ret_draining=False, ret_valve=0.0,
cond_pct=0.0, cond_pump_on=False, cond_pct=0.0, cond_pump_on=False,
vac_pump_on=vac_pump_on,
prsr_level=_prsr_level, prsr_spraying=prsr_spraying, prsr_level=_prsr_level, prsr_spraying=prsr_spraying,
prim_level=_prsr_live.get('COOLANT_CORE_PRIMARY_LOOP_LEVEL', 100.0), prim_level=_prsr_live.get('COOLANT_CORE_PRIMARY_LOOP_LEVEL', 100.0),
feedwater_on=feedwater_on, feedwater_on=feedwater_on,
@ -390,7 +392,7 @@ def run_controller(stdscr):
# Right/+ increase Left/- decrease # Right/+ increase Left/- decrease
elif key in (ord('+'), ord('='), curses.KEY_RIGHT): elif key in (ord('+'), ord('='), curses.KEY_RIGHT):
if selected_train == 0 and not temp_auto: if selected_train == 0 and not temp_auto:
temp_setpoint = min(round(temp_setpoint / 5.0) * 5.0 + 5.0, 375.0) temp_setpoint = min(round(temp_setpoint / 5.0) * 5.0 + 5.0, 410.0)
elif selected_train == 4: elif selected_train == 4:
args.grid_buffer = min(args.grid_buffer + 1.0, 100.0) args.grid_buffer = min(args.grid_buffer + 1.0, 100.0)
elif selected_train in train_controllers: elif selected_train in train_controllers:
@ -469,7 +471,7 @@ def run_controller(stdscr):
_safe_addstr(stdscr, row, 10, f'[{auto_str}]', auto_color | BOLD) _safe_addstr(stdscr, row, 10, f'[{auto_str}]', auto_color | BOLD)
_safe_addstr(stdscr, row, 16, 'Temp: ', BOLD) _safe_addstr(stdscr, row, 16, 'Temp: ', BOLD)
_safe_addstr(stdscr, row, 22, f'{core_temp:6.1f}°C', temp_color | BOLD) _safe_addstr(stdscr, row, 22, f'{core_temp:6.1f}°C', temp_color | BOLD)
sp_color = RED if dynamic_setpoint < 306 or dynamic_setpoint > 375 else 0 sp_color = RED if dynamic_setpoint < 306 or dynamic_setpoint > 360 else 0
_safe_addstr(stdscr, row, 32, f'sp=', 0) _safe_addstr(stdscr, row, 32, f'sp=', 0)
_safe_addstr(stdscr, row, 35, f'{dynamic_setpoint:.0f}°C', sp_color | BOLD) _safe_addstr(stdscr, row, 35, f'{dynamic_setpoint:.0f}°C', sp_color | BOLD)
_safe_addstr(stdscr, row, 40, _safe_addstr(stdscr, row, 40,
@ -499,9 +501,10 @@ def run_controller(stdscr):
_safe_addstr(stdscr, row, 32, _safe_addstr(stdscr, row, 32,
f'[{_bar(pwr_pct, 14)}] {power_error/1000:+5.1f}MW {tgt_str}') f'[{_bar(pwr_pct, 14)}] {power_error/1000:+5.1f}MW {tgt_str}')
row += 1 row += 1
sweep_ind = '~' if tc.sweeping else ' '
_safe_addstr(stdscr, row, 16, _safe_addstr(stdscr, row, 16,
f'Steam: {steam_out:5.1f} MSCV: {tc.mscv:4.1f} ' f'Steam: {steam_out:5.1f} MSCV: {tc.mscv:4.1f} '
f'Prim: {tc.prim_pump:3.0f}% Sec: {tc.sec_pump:3.0f}% ' f'Prim: {tc.prim_pump:3.0f}%{sweep_ind} Sec: {tc.sec_pump:3.0f}% '
f'Lvl: {level:.0f}{level_error:+.0f})') f'Lvl: {level:.0f}{level_error:+.0f})')
elif not is_active: elif not is_active:
hint = ' (+/Up to add)' if is_sel else '' hint = ' (+/Up to add)' if is_sel else ''
@ -537,18 +540,12 @@ def run_controller(stdscr):
f' DRAINING valve={ret_valve:.0f}%' if ret_draining else ' OK', f' DRAINING valve={ret_valve:.0f}%' if ret_draining else ' OK',
YELLOW if ret_draining else GREEN) YELLOW if ret_draining else GREEN)
row += 1 row += 1
cond_vac_ = s.get('CONDENSER_VACUUM', 0.0)
cond_color = RED if cond_pct < 25 else YELLOW if cond_pct < 40 else GREEN cond_color = RED if cond_pct < 25 else YELLOW if cond_pct < 40 else GREEN
vac_on_ = disp.get('vac_pump_on', True)
vac_color = (RED if cond_vac_ < 50 else YELLOW if cond_vac_ < 80 else GREEN) if vac_on_ else YELLOW
_safe_addstr(stdscr, row, 2, '◆ CONDENSER FILL ', BOLD) _safe_addstr(stdscr, row, 2, '◆ CONDENSER FILL ', BOLD)
_safe_addstr(stdscr, row, 20, f'[{_bar(cond_pct, 20)}]', cond_color) _safe_addstr(stdscr, row, 20, f'[{_bar(cond_pct, 20)}]', cond_color)
_safe_addstr(stdscr, row, 43, f' {cond_pct:4.0f}%') _safe_addstr(stdscr, row, 43, f' {cond_pct:4.0f}%')
_safe_addstr(stdscr, row, 49, ' PUMP ON' if cond_pump_on else ' OK', _safe_addstr(stdscr, row, 49, ' PUMP ON' if cond_pump_on else ' OK',
YELLOW if cond_pump_on else GREEN) YELLOW if cond_pump_on else GREEN)
_safe_addstr(stdscr, row, 60,
f' VAC:{"OFF" if not vac_on_ else f"{cond_vac_:.0f}%"}',
vac_color)
row += 1 row += 1
prsr_level_ = disp['prsr_level'] prsr_level_ = disp['prsr_level']
prsr_spray_ = disp['prsr_spraying'] prsr_spray_ = disp['prsr_spraying']
@ -639,7 +636,7 @@ def run_controller(stdscr):
if temp_auto and train_data: if temp_auto and train_data:
total_error = sum(train_data[t][1] for t in train_data) # sum of power_errors total_error = sum(train_data[t][1] for t in train_data) # sum of power_errors
sp_delta = float(np.clip(total_error * 0.00002, -0.5, 0.5)) sp_delta = float(np.clip(total_error * 0.00002, -0.5, 0.5))
temp_setpoint = float(np.clip(temp_setpoint + sp_delta, 306.0, 375.0)) temp_setpoint = float(np.clip(temp_setpoint + sp_delta, 306.0, 360.0))
# ---- Aux: retention tank ---- # ---- Aux: retention tank ----
ret_vol = s.get('VACUUM_RETENTION_TANK_VOLUME', 0.0) ret_vol = s.get('VACUUM_RETENTION_TANK_VOLUME', 0.0)
@ -648,17 +645,7 @@ def run_controller(stdscr):
ret_draining = False ret_draining = False
ret_valve = 0.0 ret_valve = 0.0
set_param('STEAM_EJECTOR_CONDENSER_RETURN_VALVE', 0.0) set_param('STEAM_EJECTOR_CONDENSER_RETURN_VALVE', 0.0)
# Drain complete — restart vacuum pump
if not vac_pump_on:
nucon.set(nucon._parameters['CONDENSER_VACUUM_PUMP_START_STOP'], True)
vac_pump_on = True
elif ret_vol > RETENTION_HI: elif ret_vol > RETENTION_HI:
if not ret_draining:
# Starting drain — stop vacuum pump.
# The ejector return valve bypasses the suction path so the pump has no effect
# and wastes power; turn it off for the duration of the drain.
nucon.set(nucon._parameters['CONDENSER_VACUUM_PUMP_START_STOP'], False)
vac_pump_on = False
ret_draining = True ret_draining = True
if ret_prev_vol is not None and ret_vol >= ret_prev_vol - 50.0: if ret_prev_vol is not None and ret_vol >= ret_prev_vol - 50.0:
ret_valve = min(ret_valve + 1.0, 50.0) ret_valve = min(ret_valve + 1.0, 50.0)
@ -675,7 +662,7 @@ def run_controller(stdscr):
if not cond_pump_on and cond_pct < 45.0: if not cond_pump_on and cond_pct < 45.0:
cond_pump_on = True cond_pump_on = True
nucon.set(nucon._parameters['FREIGHT_PUMP_CONDENSER_SWITCH'], True) nucon.set(nucon._parameters['FREIGHT_PUMP_CONDENSER_SWITCH'], True)
elif cond_pump_on and cond_pct >= 60.0: elif cond_pump_on and cond_pct >= 50.0:
cond_pump_on = False cond_pump_on = False
nucon.set(nucon._parameters['FREIGHT_PUMP_CONDENSER_SWITCH'], False) nucon.set(nucon._parameters['FREIGHT_PUMP_CONDENSER_SWITCH'], False)
@ -707,7 +694,6 @@ def run_controller(stdscr):
criticality=criticality, criticality=criticality,
ret_pct=ret_pct, ret_draining=ret_draining, ret_valve=ret_valve, ret_pct=ret_pct, ret_draining=ret_draining, ret_valve=ret_valve,
cond_pct=cond_pct, cond_pump_on=cond_pump_on, cond_pct=cond_pct, cond_pump_on=cond_pump_on,
vac_pump_on=vac_pump_on,
prsr_level=prsr_level, prsr_spraying=prsr_spraying, prsr_level=prsr_level, prsr_spraying=prsr_spraying,
prim_level=prim_level, feedwater_on=feedwater_on, prim_level=prim_level, feedwater_on=feedwater_on,
grid_follow=grid_follow, grid_demand_kw=grid_demand_kw) grid_follow=grid_follow, grid_demand_kw=grid_demand_kw)

View File

@ -11,43 +11,26 @@ Requirements:
""" """
import argparse import argparse
import pickle import pickle
import torch
from gymnasium.wrappers import TimeLimit from gymnasium.wrappers import TimeLimit
from stable_baselines3 import SAC from stable_baselines3 import SAC
from stable_baselines3.her.her_replay_buffer import HerReplayBuffer from stable_baselines3.her.her_replay_buffer import HerReplayBuffer
from stable_baselines3.common.callbacks import CheckpointCallback
from nucon.sim import NuconSimulator from nucon.sim import NuconSimulator
from nucon.model import ReactorDynamicsModel, MixtureModel
from nucon.rl import NuconGoalEnv, Parameterized_Objectives, Parameterized_Terminators from nucon.rl import NuconGoalEnv, Parameterized_Objectives, Parameterized_Terminators
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--load', default=None, help='Path to existing model to hot-start from') parser.add_argument('--load', default=None, help='Path to existing model to hot-start from')
parser.add_argument('--steps', type=int, default=50_000, help='Total timesteps (default: 50000)') parser.add_argument('--steps', type=int, default=50_000, help='Total timesteps (default: 50000)')
parser.add_argument('--out', default='/tmp/sac_nucon_knn', help='Output path for saved model') parser.add_argument('--out', default='/tmp/sac_nucon_knn', help='Output path for saved model')
parser.add_argument('--model', default='/tmp/reactor_knn.pkl', help='Dynamics model (.pkl for kNN, .pt for NN)')
parser.add_argument('--model2', default=None, help='Second dynamics model for mixture (optional)')
parser.add_argument('--dataset', default='/tmp/nucon_dataset.pkl', help='Dataset for init states')
args = parser.parse_args() args = parser.parse_args()
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Load dynamics model(s) and dataset # Load model and dataset
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _load_model(path): with open('/tmp/reactor_knn.pkl', 'rb') as f:
if path.endswith('.pt'): knn_model = pickle.load(f)
ckpt = torch.load(path, weights_only=False)
m = ReactorDynamicsModel(ckpt['input_params'], ckpt['output_params'])
m.load_state_dict(ckpt['state_dict'])
m.eval()
return m
with open(path, 'rb') as f:
return pickle.load(f)
dynamics_model = _load_model(args.model) with open('/tmp/nucon_dataset.pkl', 'rb') as f:
if args.model2:
dynamics_model = MixtureModel(dynamics_model, _load_model(args.model2))
with open(args.dataset, 'rb') as f:
dataset = pickle.load(f) dataset = pickle.load(f)
# Seed resets to in-distribution states from dataset # Seed resets to in-distribution states from dataset
@ -57,40 +40,23 @@ init_states = [s for _, _, s, _ in dataset]
# Build sim + env # Build sim + env
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
sim = NuconSimulator(port=8786) sim = NuconSimulator(port=8786)
sim.set_model(dynamics_model) sim.set_model(knn_model)
BATCH_SIZE = 2048 BATCH_SIZE = 2048
MAX_EPISODE_STEPS = 200 MAX_EPISODE_STEPS = 200
GENERATORS = ['GENERATOR_0_KW', 'GENERATOR_1_KW', 'GENERATOR_2_KW']
POWER_RANGE = {g: (0.0, 100_000.0) for g in GENERATORS} # per-generator kW; ~100 MW upper bound
# Curated obs: physically relevant features for power control (~25 dims vs ~260 full)
OBS_PARAMS = [
'CORE_TEMP', 'CORE_PRESSURE', 'CORE_STATE_CRITICALITY', 'CORE_WEAR', 'CORE_INTEGRITY',
'ROD_BANK_POS_0_ACTUAL', 'ROD_BANK_POS_0_ORDERED',
'COOLANT_CORE_FLOW_SPEED', 'COOLANT_CORE_VESSEL_TEMPERATURE',
'COOLANT_CORE_PRESSURE', 'COOLANT_CORE_QUANTITY_IN_VESSEL',
'STEAM_TURBINE_0_RPM', 'STEAM_TURBINE_0_TEMPERATURE', 'STEAM_TURBINE_0_PRESSURE',
'STEAM_TURBINE_1_RPM', 'STEAM_TURBINE_1_TEMPERATURE', 'STEAM_TURBINE_1_PRESSURE',
'STEAM_TURBINE_2_RPM', 'STEAM_TURBINE_2_TEMPERATURE', 'STEAM_TURBINE_2_PRESSURE',
'GENERATOR_0_V', 'GENERATOR_1_V', 'GENERATOR_2_V',
]
env = NuconGoalEnv( env = NuconGoalEnv(
goal_params=GENERATORS, goal_params=['CORE_TEMP'],
goal_range=POWER_RANGE, goal_range={'CORE_TEMP': (55.0, 550.0)},
tolerance=0.05,
seconds_per_step=10, seconds_per_step=10,
simulator=sim, simulator=sim,
obs_params=OBS_PARAMS,
additional_objectives=[ additional_objectives=[
Parameterized_Objectives['uncertainty_penalty'](start=0.3), Parameterized_Objectives['uncertainty_penalty'](start=0.3),
Parameterized_Objectives['temp_below_linear'](max_temp=420),
], ],
additional_objective_weights=[1.0, 0.01], additional_objective_weights=[1.0],
init_states=init_states, init_states=init_states,
delta_action_scale=0.05, delta_action_scale=0.05,
goal_sampling_std=0.15, # Gaussian delta in normalised space (~180 kW typical)
) )
env = TimeLimit(env, max_episode_steps=MAX_EPISODE_STEPS) env = TimeLimit(env, max_episode_steps=MAX_EPISODE_STEPS)
@ -107,8 +73,7 @@ if args.load:
custom_objects={'learning_rate': 3e-4, 'batch_size': BATCH_SIZE, custom_objects={'learning_rate': 3e-4, 'batch_size': BATCH_SIZE,
'tau': 0.005, 'gamma': 0.98, 'tau': 0.005, 'gamma': 0.98,
'train_freq': 64, 'gradient_steps': 8, 'train_freq': 64, 'gradient_steps': 8,
'learning_starts': MAX_EPISODE_STEPS, 'learning_starts': 0})
'ent_coef': 0.1})
else: else:
model = SAC( model = SAC(
'MultiInputPolicy', 'MultiInputPolicy',
@ -126,26 +91,9 @@ else:
train_freq=64, train_freq=64,
gradient_steps=8, gradient_steps=8,
learning_starts=BATCH_SIZE, learning_starts=BATCH_SIZE,
ent_coef=0.1, # fixed; auto-tuning diverges on this many action dims
device='auto', device='auto',
) )
checkpoint_cb = CheckpointCallback( model.learn(total_timesteps=args.steps)
save_freq=10_000,
save_path=args.out + '_checkpoints/',
name_prefix='sac',
)
import json, os
config = {'obs_params': OBS_PARAMS}
for save_dir in [args.out + '_checkpoints/', os.path.dirname(args.out) or '.']:
os.makedirs(save_dir, exist_ok=True)
with open(os.path.join(save_dir, 'config.json'), 'w') as f:
json.dump(config, f)
model.learn(total_timesteps=args.steps, callback=checkpoint_cb)
model.save(args.out) model.save(args.out)
with open(args.out + '.json', 'w') as f:
json.dump(config, f)
print(f"Saved to {args.out}.zip") print(f"Saved to {args.out}.zip")