"""Collect a dynamics dataset from the running Nucleares game. Play the game normally while this script runs in the background. It records state transitions every `time_delta` game-seconds and saves them incrementally so nothing is lost if you quit early. Usage: python scripts/collect_dataset.py # default settings python scripts/collect_dataset.py --steps 2000 --delta 5 # faster sampling python scripts/collect_dataset.py --out my_dataset.pkl The saved dataset is a list of (state_before, action_dict, state_after, time_delta) tuples compatible with NuconModelLearner.fit_knn() and train_model(). Tips for good data: - Cover a range of operating states: startup, ramp, steady-state, shutdown. - Vary individual rod bank positions, pump speeds, and MSCV setpoints. - Collect at least 500 samples for kNN-GP; 5000+ for the NN backend. - Merge multiple sessions with NuconModelLearner.merge_datasets(). """ import argparse import pickle from nucon.model import NuconModelLearner parser = argparse.ArgumentParser() parser.add_argument('--steps', type=int, default=1000, help='Number of samples to collect (default: 1000)') parser.add_argument('--delta', type=float, default=10.0, help='Game-seconds between samples (default: 10.0)') parser.add_argument('--out', default='reactor_dataset.pkl', help='Output path for dataset (default: reactor_dataset.pkl)') parser.add_argument('--merge', default=None, help='Existing dataset to merge into before saving') args = parser.parse_args() learner = NuconModelLearner( time_delta=args.delta, dataset_path=args.out, ) if args.merge: learner.merge_datasets(args.merge) print(f"Merged existing dataset from {args.merge} ({len(learner.dataset)} samples)") print(f"Collecting {args.steps} samples (Δt={args.delta}s each) → {args.out}") print("Play the game — vary rod positions, pump speeds, and operating states.") print("Press Ctrl-C to stop early; data collected so far will be saved.") try: learner.collect_data(num_steps=args.steps) except KeyboardInterrupt: print("\nInterrupted — saving collected data...") learner.save_dataset(args.out) print(f"Saved {len(learner.dataset)} samples to {args.out}")