From e8e7233d9814fa692896e3d54f753bcfd1382cab Mon Sep 17 00:00:00 2001 From: "ys1087@partner.kit.edu" Date: Wed, 27 Aug 2025 12:19:38 +0200 Subject: [PATCH] Fix WandB config issue and achieve working DPPO setup - Disable WandB in dev script to avoid config object vs string error - Successfully completed development test (Job 3445106) - Confirmed: pre-training works, loss reduces, checkpoints save - Update experiment tracking with successful results --- EXPERIMENT_PLAN.md | 11 ++++++----- slurm/run_dppo_dev.sh | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/EXPERIMENT_PLAN.md b/EXPERIMENT_PLAN.md index d455615..2b2db72 100644 --- a/EXPERIMENT_PLAN.md +++ b/EXPERIMENT_PLAN.md @@ -8,10 +8,11 @@ - All dependencies installed including PyTorch, d4rl, dm-control ### Initial Testing -🔄 **Job ID 3445081**: Development test (30min) - PENDING -- Command: `./submit_job.sh dev` -- Status: Waiting for resources on dev_accelerated partition -- Purpose: Verify DPPO can run on HoReKa with basic pre-training +✅ **DPPO Confirmed Working on HoReKa** +- Successfully completed dev test (Job ID 3445106) +- Pre-training working: 2 epochs, loss reduction 0.2494→0.2010 +- Model checkpoints saved correctly +- Ready for full experiments ## Experiments To Run @@ -71,7 +72,7 @@ TASK=hopper MODE=pretrain sbatch slurm/run_dppo_gym.sh | Job ID | Type | Task | Mode | Status | Duration | Results | |--------|------|------|------|---------|----------|---------| -| - | - | - | - | - | - | - | +| 3445106 | dev test | hopper | pretrain | ✅ SUCCESS | 2m11s | Train loss: 0.2494→0.2010 | ## Configuration Notes diff --git a/slurm/run_dppo_dev.sh b/slurm/run_dppo_dev.sh index 1a9593c..1487154 100755 --- a/slurm/run_dppo_dev.sh +++ b/slurm/run_dppo_dev.sh @@ -41,11 +41,11 @@ echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')" echo "CUDA available: $(python -c 'import torch; print(torch.cuda.is_available())')" echo "" -# Run a quick pre-training test with reduced epochs +# Run a quick pre-training test with reduced epochs (disable WandB for dev test) python script/run.py --config-name=pre_diffusion_mlp \ --config-dir=cfg/gym/pretrain/hopper-medium-v2 \ train.n_epochs=2 \ train.save_model_freq=1 \ - wandb=${WANDB_MODE:-null} + wandb=null echo "Dev test completed!" \ No newline at end of file