fix: make collect_data resilient to game crashes
- Save dataset every N steps (default 10) so a disconnect loses at most one checkpoint's worth of samples instead of everything - Retry _get_state() on ConnectionError/Timeout rather than crashing, resuming automatically once the game comes back up Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ce2019e060
commit
088b7d4733
@ -203,28 +203,56 @@ class NuconModelLearner:
|
|||||||
state[key] = valves.get(name, {}).get('Value', 0.0)
|
state[key] = valves.get(name, {}).get('Value', 0.0)
|
||||||
return state
|
return state
|
||||||
|
|
||||||
def collect_data(self, num_steps):
|
def collect_data(self, num_steps, save_every=10):
|
||||||
"""
|
"""
|
||||||
Collect state-transition tuples from the live game.
|
Collect state-transition tuples from the live game.
|
||||||
|
|
||||||
Sleeps wall_time = target_game_delta / sim_speed so that each stored
|
Sleeps wall_time = target_game_delta / sim_speed so that each stored
|
||||||
game_delta is uniform regardless of the game's simulation speed setting.
|
game_delta is uniform regardless of the game's simulation speed setting.
|
||||||
|
|
||||||
|
Saves the dataset every ``save_every`` steps so a crash doesn't lose
|
||||||
|
everything. On a connection error the step is skipped and collection
|
||||||
|
resumes once the game is reachable again (retries every 5 s).
|
||||||
"""
|
"""
|
||||||
state = self._get_state()
|
import requests as _requests
|
||||||
for _ in range(num_steps):
|
|
||||||
|
def get_state_with_retry():
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
return self._get_state()
|
||||||
|
except (_requests.exceptions.ConnectionError,
|
||||||
|
_requests.exceptions.Timeout) as e:
|
||||||
|
print(f"Connection lost ({e}). Retrying in 5 s…")
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
state = get_state_with_retry()
|
||||||
|
collected = 0
|
||||||
|
for i in range(num_steps):
|
||||||
action = self.actor(state)
|
action = self.actor(state)
|
||||||
for param_id, value in action.items():
|
for param_id, value in action.items():
|
||||||
self.nucon.set(param_id, value)
|
try:
|
||||||
|
self.nucon.set(param_id, value)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
target_game_delta = self.time_delta()
|
target_game_delta = self.time_delta()
|
||||||
sim_speed = self.nucon.GAME_SIM_SPEED.value or 1.0
|
try:
|
||||||
|
sim_speed = self.nucon.GAME_SIM_SPEED.value or 1.0
|
||||||
|
except Exception:
|
||||||
|
sim_speed = 1.0
|
||||||
time.sleep(target_game_delta / sim_speed)
|
time.sleep(target_game_delta / sim_speed)
|
||||||
next_state = self._get_state()
|
|
||||||
|
|
||||||
|
next_state = get_state_with_retry()
|
||||||
self.dataset.append((state, action, next_state, target_game_delta))
|
self.dataset.append((state, action, next_state, target_game_delta))
|
||||||
state = next_state
|
state = next_state
|
||||||
|
collected += 1
|
||||||
|
|
||||||
|
if collected % save_every == 0:
|
||||||
|
self.save_dataset()
|
||||||
|
print(f" {collected}/{num_steps} steps collected, dataset saved.")
|
||||||
|
|
||||||
self.save_dataset()
|
self.save_dataset()
|
||||||
|
print(f"Collection complete. {collected} steps, {len(self.dataset)} total samples.")
|
||||||
|
|
||||||
def train_model(self, batch_size=32, num_epochs=10, test_split=0.2):
|
def train_model(self, batch_size=32, num_epochs=10, test_split=0.2):
|
||||||
"""Train a neural-network dynamics model on the current dataset."""
|
"""Train a neural-network dynamics model on the current dataset."""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user