fix: make collect_data resilient to game crashes

- Save dataset every N steps (default 10) so a disconnect loses at most
  one checkpoint's worth of samples instead of everything
- Retry _get_state() on ConnectionError/Timeout rather than crashing,
  resuming automatically once the game comes back up

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dominik Moritz Roth 2026-03-12 17:52:17 +01:00
parent ce2019e060
commit 088b7d4733

View File

@ -203,28 +203,56 @@ class NuconModelLearner:
state[key] = valves.get(name, {}).get('Value', 0.0)
return state
def collect_data(self, num_steps):
def collect_data(self, num_steps, save_every=10):
"""
Collect state-transition tuples from the live game.
Sleeps wall_time = target_game_delta / sim_speed so that each stored
game_delta is uniform regardless of the game's simulation speed setting.
Saves the dataset every ``save_every`` steps so a crash doesn't lose
everything. On a connection error the step is skipped and collection
resumes once the game is reachable again (retries every 5 s).
"""
state = self._get_state()
for _ in range(num_steps):
import requests as _requests
def get_state_with_retry():
while True:
try:
return self._get_state()
except (_requests.exceptions.ConnectionError,
_requests.exceptions.Timeout) as e:
print(f"Connection lost ({e}). Retrying in 5 s…")
time.sleep(5)
state = get_state_with_retry()
collected = 0
for i in range(num_steps):
action = self.actor(state)
for param_id, value in action.items():
try:
self.nucon.set(param_id, value)
except Exception:
pass
target_game_delta = self.time_delta()
try:
sim_speed = self.nucon.GAME_SIM_SPEED.value or 1.0
except Exception:
sim_speed = 1.0
time.sleep(target_game_delta / sim_speed)
next_state = self._get_state()
next_state = get_state_with_retry()
self.dataset.append((state, action, next_state, target_game_delta))
state = next_state
collected += 1
if collected % save_every == 0:
self.save_dataset()
print(f" {collected}/{num_steps} steps collected, dataset saved.")
self.save_dataset()
print(f"Collection complete. {collected} steps, {len(self.dataset)} total samples.")
def train_model(self, batch_size=32, num_epochs=10, test_split=0.2):
"""Train a neural-network dynamics model on the current dataset."""