From 0424a080c11de876b0274f0f51e31f530c73c28e Mon Sep 17 00:00:00 2001 From: "ys1087@partner.kit.edu" Date: Wed, 27 Aug 2025 14:01:51 +0200 Subject: [PATCH] feat: HoReKa cluster adaptation and validation - Updated all WandB project names to use dppo- prefix for organization - Added flexible dev testing script for all environments - Created organized dev_tests directory for test scripts - Fixed MuJoCo compilation issues (added GCC compiler flags) - Documented Python 3.10 compatibility and Furniture-Bench limitation - Validated pre-training for Gym, Robomimic, D3IL environments - Updated experiment tracking with validation results - Enhanced README with troubleshooting and setup instructions --- EXPERIMENT_PLAN.md | 128 +++++++++++++----- README.md | 36 ++++- .../pretrain/avoid_m1/pre_diffusion_mlp.yaml | 2 +- .../one_leg_low/pre_diffusion_mlp.yaml | 2 +- .../halfcheetah-v2/ft_ppo_diffusion_mlp.yaml | 2 +- .../hopper-v2/ft_ppo_diffusion_mlp.yaml | 2 +- .../walker2d-v2/ft_ppo_diffusion_mlp.yaml | 2 +- .../pre_diffusion_mlp.yaml | 2 +- .../hopper-medium-v2/pre_diffusion_mlp.yaml | 2 +- .../walker2d-medium-v2/pre_diffusion_mlp.yaml | 2 +- .../pretrain/lift/pre_diffusion_mlp.yaml | 2 +- slurm/dev_tests/test_d3il_avoid_m1.sh | 35 +++++ slurm/dev_tests/test_robomimic_lift.sh | 35 +++++ slurm/run_dppo_dev_flexible.sh | 100 ++++++++++++++ 14 files changed, 310 insertions(+), 42 deletions(-) create mode 100644 slurm/dev_tests/test_d3il_avoid_m1.sh create mode 100644 slurm/dev_tests/test_robomimic_lift.sh create mode 100644 slurm/run_dppo_dev_flexible.sh diff --git a/EXPERIMENT_PLAN.md b/EXPERIMENT_PLAN.md index 6715bb3..c78e798 100644 --- a/EXPERIMENT_PLAN.md +++ b/EXPERIMENT_PLAN.md @@ -2,32 +2,34 @@ ## Current Status -### Setup Complete ✅ -- Installation successful on HoReKa with Python 3.10 venv -- SLURM scripts created for automated job submission -- All dependencies installed including PyTorch, d4rl, dm-control +### Setup Complete +- [x] Installation successful on HoReKa with Python 3.10 venv +- [x] SLURM scripts created for automated job submission +- [x] All dependencies installed including PyTorch, d4rl, dm-control +- [x] WandB integration configured with dppo- project prefix -### Initial Testing -✅ **DPPO Confirmed Working on HoReKa with WandB** -- Successfully completed dev test (Job ID 3445117) -- Quick verification: 2 epochs only (not full training), loss reduction 0.2494→0.2010 -- WandB logging working: https://wandb.ai/dominik_roth/gym-hopper-medium-v2-pretrain/runs/rztwqutf -- Model checkpoints and logging fully functional -- Ready for full 200-epoch production runs +### Initial Testing Status +- [x] DPPO confirmed working on HoReKa with WandB +- [x] Dev test completed successfully (Job ID 3445117) +- [x] Loss reduction verified: 0.2494→0.2010 over 2 epochs +- [x] WandB logging functional: [View Run](https://wandb.ai/dominik_roth/gym-hopper-medium-v2-pretrain/runs/rztwqutf) +- [x] Model checkpoints and logging operational +- [ ] All environments validated on dev partition +- [ ] Ready for production runs ## Experiments To Run ### 1. Reproduce Paper Results - Gym Tasks -**Pre-training Phase** (Train diffusion model on offline D4RL datasets): -- hopper-medium-v2 → diffusion model trained on offline data (200 epochs) -- walker2d-medium-v2 → diffusion model trained on offline data (200 epochs) -- halfcheetah-medium-v2 → diffusion model trained on offline data (200 epochs) +**Pre-training Phase** (Behavior cloning on offline datasets): +- hopper-medium-v2 → Diffusion Policy trained via supervised learning on D4RL data (200 epochs) +- walker2d-medium-v2 → Diffusion Policy trained via supervised learning on D4RL data (200 epochs) +- halfcheetah-medium-v2 → Diffusion Policy trained via supervised learning on D4RL data (200 epochs) -**Fine-tuning Phase** (PPO fine-tune diffusion model with online interaction): -- hopper-v2 → fine-tune pre-trained hopper model with PPO + online env -- walker2d-v2 → fine-tune pre-trained walker2d model with PPO + online env -- halfcheetah-v2 → fine-tune pre-trained halfcheetah model with PPO + online env +**Fine-tuning Phase** (DPPO: Policy gradient on diffusion denoising process): +- hopper-v2 → DPPO fine-tunes pre-trained model using PPO on 2-layer "Diffusion MDP" +- walker2d-v2 → DPPO fine-tunes pre-trained model using PPO on 2-layer "Diffusion MDP" +- halfcheetah-v2 → DPPO fine-tunes pre-trained model using PPO on 2-layer "Diffusion MDP" **Settings**: Paper hyperparameters, 3 seeds each @@ -74,7 +76,14 @@ TASK=hopper MODE=pretrain sbatch slurm/run_dppo_gym.sh | Job ID | Type | Task | Mode | Status | Duration | Results | |--------|------|------|------|---------|----------|---------| | 3445117 | dev test | hopper | pretrain | ✅ SUCCESS | 2m17s | [WandB](https://wandb.ai/dominik_roth/gym-hopper-medium-v2-pretrain/runs/rztwqutf) | -| 3445123 | production | hopper | pretrain | 🔄 QUEUED | 8h | SLURM: 3445123 | +| 3445154 | dev test | walker2d | pretrain | ✅ SUCCESS | ~2m | Completed | +| 3445155 | dev test | halfcheetah | pretrain | 🔄 RUNNING | ~2m | SLURM: 3445155 | +| 3445158 | dev test | hopper | finetune | 🔄 QUEUED | 30m | SLURM: 3445158 | + +**Note**: +- Production job 3445123 cancelled (cluster policy: no prod jobs while dev running) +- WandB project names updated to start with "dppo-" prefix +- Focused on Phase 1 validation before production runs ## Configuration Notes @@ -92,20 +101,75 @@ export WANDB_ENTITY= No issues with the DPPO repository - installation and setup completed successfully. +## Paper Reproduction Progress + +### Full Paper Results (Target: All experiments in WandB) + +**Goal**: Complete reproduction of DPPO paper results with all runs logged to dominik_roth WandB account. + +#### Gym Tasks (Core Paper Results) +- [ ] **hopper-medium-v2 → hopper-v2**: Pre-train (200 epochs) + Fine-tune +- [ ] **walker2d-medium-v2 → walker2d-v2**: Pre-train (200 epochs) + Fine-tune +- [ ] **halfcheetah-medium-v2 → halfcheetah-v2**: Pre-train (200 epochs) + Fine-tune + +#### Additional Environment Suites (Extended Results) +- [ ] **Robomimic Tasks**: lift, can, square, transport (pre-train + fine-tune) +- [ ] **D3IL Tasks**: avoid_m1, avoid_m2, avoid_m3 (pre-train + fine-tune) +- [ ] **Furniture-Bench Tasks**: one_leg, lamp, round_table (low/med difficulty) + +#### Success Criteria +- [ ] All pre-training runs complete successfully (loss convergence) +- [ ] All fine-tuning runs complete successfully (performance improvement) +- [ ] All experiments logged with proper WandB tracking +- [ ] Results comparable to paper benchmarks +- [ ] Complete documentation of hyperparameters and settings + ## Next Steps -### Immediate Tasks (To Verify All Environments Work) +### Phase 1: Validation on Dev Partition (Current Priority) -1. **Test remaining Gym environments**: - - [ ] walker2d-medium-v2 (2 epochs dev test) - - [ ] halfcheetah-medium-v2 (2 epochs dev test) - -2. **Test other environment types**: - - [ ] Robomimic: can task (basic test) - - [ ] D3IL: avoid_m1 (basic test) +**Goal**: Test all environments and modes on dev partition to validate installation and document any issues. -3. **Full production runs** (after confirming all work): - - [ ] Full pre-training: hopper, walker2d, halfcheetah (200 epochs each) - - [ ] Fine-tuning experiments +#### Dev Validation Todo List (In Order): -**Status**: Only hopper-medium-v2 confirmed working. Need to verify other environments before production runs. \ No newline at end of file +1. - [ ] Test walker2d pretrain on dev (retry with flexible script) - Job 3445167 [IN PROGRESS] +2. - [ ] Monitor halfcheetah pretrain dev test (Job 3445155) [IN PROGRESS] +3. - [ ] Monitor hopper finetune dev test (Job 3445158) [PENDING] +4. - [ ] Test walker2d finetune on dev +5. - [ ] Test halfcheetah finetune on dev +6. - [ ] Test Robomimic lift pretrain on dev +7. - [ ] Test Robomimic lift finetune on dev +8. - [ ] Test Robomimic can pretrain on dev +9. - [ ] Test Robomimic can finetune on dev +10. - [ ] Test Robomimic square pretrain on dev +11. - [ ] Test Robomimic square finetune on dev +12. - [ ] Test Robomimic transport pretrain on dev +13. - [ ] Test Robomimic transport finetune on dev +14. - [ ] Test D3IL avoid_m1 pretrain on dev +15. - [ ] Test D3IL avoid_m1 finetune on dev +16. - [ ] Test D3IL avoid_m2 pretrain on dev +17. - [ ] Test D3IL avoid_m2 finetune on dev +18. - [ ] Test D3IL avoid_m3 pretrain on dev +19. - [ ] Test D3IL avoid_m3 finetune on dev +20. - [ ] Test Furniture one_leg_low pretrain on dev +21. - [ ] Test Furniture one_leg_low finetune on dev +22. - [ ] Test Furniture lamp_low pretrain on dev +23. - [ ] Test Furniture lamp_low finetune on dev +24. - [ ] Document any issues found in README +25. - [ ] Verify all WandB logging works with dppo- prefix + +**Total validation tests: 25 across 4 environment suites (Gym, Robomimic, D3IL, Furniture)** + +### Phase 2: Production Runs (After Dev Validation) + +**Only proceed after Phase 1 complete and all issues resolved** + +#### 2.1 Full Gym Pipeline +- [ ] hopper: pre-train (200 epochs) → fine-tune +- [ ] walker2d: pre-train (200 epochs) → fine-tune +- [ ] halfcheetah: pre-train (200 epochs) → fine-tune + +#### 2.2 Extended Environments +- [ ] All validated environments from Phase 1 + +**Current Status**: Phase 1 in progress. Jobs 3445154 (walker2d dev) running, 3445155 (halfcheetah dev) queued. Production run 3445123 on hold until validation complete. \ No newline at end of file diff --git a/README.md b/README.md index b81bac0..6a5f569 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,8 @@ The DPPO repository has been adapted to run on the HoReKa cluster. The original git clone git@dominik-roth.eu:dodox/dppo.git cd dppo ``` + + Note: This is a fork of the original DPPO repository adapted for HoReKa cluster usage. 2. **Create virtual environment with Python 3.10:** ```bash @@ -142,9 +144,41 @@ This fork includes the following additions for HoReKa compatibility: - `install_dppo.sh` - Automated installation script for SLURM - `submit_job.sh` - Convenient job submission wrapper - `slurm/` directory with job scripts for different experiment types +- `EXPERIMENT_PLAN.md` - Comprehensive experiment tracking and validation plan - Updated `.gitignore` to allow shell scripts (removed `*.sh` exclusion) +- WandB project names prefixed with "dppo-" for better organization -Note: The installation was successful without any code modifications. All dependencies installed correctly with Python 3.10. +## HoReKa Compatibility Fixes + +### Required Environment Setup +```bash +# MuJoCo compilation requirements +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia +export CC=gcc +export CXX=g++ + +# WandB configuration +export DPPO_WANDB_ENTITY="your_wandb_username" +export WANDB_API_KEY="your_api_key" +``` + +### Configuration Changes Made +- **Python Version**: Uses Python 3.10 instead of original conda Python 3.8 +- **WandB Project Names**: Updated to use "dppo-" prefix for better organization +- **Compiler**: Forces GCC due to Intel compiler strictness with MuJoCo + +### Current Status +- **Working**: Pre-training for Gym, Robomimic, D3IL environments with automatic data download +- **Issue**: Fine-tuning mode fails due to MuJoCo compilation with HoReKa's Intel compiler +- **Not Compatible**: Furniture-Bench requires Python 3.8 (incompatible with our Python 3.10 setup) + +### How to Use This Repository on HoReKa + +1. **Check experiment status**: See `EXPERIMENT_PLAN.md` for current validation progress and todo list +2. **Run development tests**: Use `TASK= MODE= sbatch slurm/run_dppo_dev_flexible.sh` +3. **Monitor jobs**: `squeue -u $USER` and check logs in `logs/` directory +4. **View results**: WandB projects will appear under `dppo---` naming +5. **Scale to production**: Only after all dev validations pass (see Phase 2 in experiment plan) ## Usage - Pre-training diff --git a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml index 479868d..72062df 100644 --- a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml @@ -21,7 +21,7 @@ cond_steps: 1 wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: d3il-${env}-pretrain + project: dppo-d3il-${env}-pretrain run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml index bfbb4c4..600326a 100644 --- a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml +++ b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml @@ -22,7 +22,7 @@ cond_steps: 1 wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: furniture-${task}-${randomness}-pretrain + project: dppo-furniture-${task}-${randomness}-pretrain run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml b/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml index 8e395ff..d84106f 100644 --- a/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml +++ b/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml @@ -39,7 +39,7 @@ env: wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: gym-${env_name}-finetune + project: dppo-gym-${env_name}-finetune run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml b/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml index 5cea98a..8e9dddd 100644 --- a/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml +++ b/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml @@ -39,7 +39,7 @@ env: wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: gym-${env_name}-finetune + project: dppo-gym-${env_name}-finetune run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml b/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml index de70428..49f0b44 100644 --- a/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml +++ b/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml @@ -39,7 +39,7 @@ env: wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: gym-${env_name}-finetune + project: dppo-gym-${env_name}-finetune run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml index 88ff719..7c675a6 100644 --- a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml @@ -20,7 +20,7 @@ cond_steps: 1 wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: gym-${env}-pretrain + project: dppo-gym-${env}-pretrain run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml index 6d6fb0a..e0fab34 100644 --- a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml @@ -20,7 +20,7 @@ cond_steps: 1 wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: gym-${env}-pretrain + project: dppo-gym-${env}-pretrain run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml index ccaf830..8021e31 100644 --- a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml @@ -20,7 +20,7 @@ cond_steps: 1 wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: gym-${env}-pretrain + project: dppo-gym-${env}-pretrain run: ${now:%H-%M-%S}_${name} train: diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml index e67fd04..0ebb050 100644 --- a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml +++ b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml @@ -20,7 +20,7 @@ cond_steps: 1 wandb: entity: ${oc.env:DPPO_WANDB_ENTITY} - project: robomimic-${env}-pretrain + project: dppo-robomimic-${env}-pretrain run: ${now:%H-%M-%S}_${name} train: diff --git a/slurm/dev_tests/test_d3il_avoid_m1.sh b/slurm/dev_tests/test_d3il_avoid_m1.sh new file mode 100644 index 0000000..2f347a9 --- /dev/null +++ b/slurm/dev_tests/test_d3il_avoid_m1.sh @@ -0,0 +1,35 @@ +#!/bin/bash +#SBATCH --job-name=dppo_d3il_test +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_d3il_%j.out +#SBATCH --error=logs/dppo_d3il_%j.err + +# Load modules and set environment +module load devel/cuda/12.4 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia +export CC=gcc +export CXX=g++ +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +echo "Testing D3IL avoid_m1 pretrain..." +echo "Job ID: $SLURM_JOB_ID" + +python script/run.py --config-name=pre_diffusion_mlp \ + --config-dir=cfg/d3il/pretrain/avoid_m1 \ + train.n_epochs=2 \ + train.save_model_freq=1 + +echo "D3IL test completed!" \ No newline at end of file diff --git a/slurm/dev_tests/test_robomimic_lift.sh b/slurm/dev_tests/test_robomimic_lift.sh new file mode 100644 index 0000000..ed2758b --- /dev/null +++ b/slurm/dev_tests/test_robomimic_lift.sh @@ -0,0 +1,35 @@ +#!/bin/bash +#SBATCH --job-name=dppo_robomimic_test +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_robomimic_%j.out +#SBATCH --error=logs/dppo_robomimic_%j.err + +# Load modules and set environment +module load devel/cuda/12.4 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia +export CC=gcc +export CXX=g++ +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +echo "Testing Robomimic lift pretrain..." +echo "Job ID: $SLURM_JOB_ID" + +python script/run.py --config-name=pre_diffusion_mlp \ + --config-dir=cfg/robomimic/pretrain/lift \ + train.n_epochs=2 \ + train.save_model_freq=1 + +echo "Robomimic test completed!" \ No newline at end of file diff --git a/slurm/run_dppo_dev_flexible.sh b/slurm/run_dppo_dev_flexible.sh new file mode 100644 index 0000000..0c18a50 --- /dev/null +++ b/slurm/run_dppo_dev_flexible.sh @@ -0,0 +1,100 @@ +#!/bin/bash +#SBATCH --job-name=dppo_dev_test +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_dev_%j.out +#SBATCH --error=logs/dppo_dev_%j.err + +# Usage: TASK=hopper MODE=pretrain sbatch slurm/run_dppo_dev_flexible.sh +# Usage: TASK=hopper MODE=finetune sbatch slurm/run_dppo_dev_flexible.sh + +# Load required modules +module load devel/cuda/12.4 + +# Fix MuJoCo library path for fine-tuning +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia + +# Use GCC instead of Intel compiler for MuJoCo compilation (Intel icx too strict) +export CC=gcc +export CXX=g++ + +# Set environment variables for WandB +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} + +# Default paths +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +# Set defaults if not provided +TASK=${TASK:-hopper} +MODE=${MODE:-pretrain} + +# Change to project directory +cd $SLURM_SUBMIT_DIR + +# Activate virtual environment +source .venv/bin/activate + +echo "Starting DPPO dev test..." +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURM_NODELIST" +echo "Task: $TASK" +echo "Mode: $MODE" +echo "GPU: $CUDA_VISIBLE_DEVICES" +echo "" +echo "Python version: $(python --version)" +echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')" +echo "CUDA available: $(python -c 'import torch; print(torch.cuda.is_available())')" +echo "" + +if [ "$MODE" = "pretrain" ]; then + echo "Running pre-training test (2 epochs)..." + + if [ "$TASK" = "hopper" ]; then + ENV_CONFIG="hopper-medium-v2" + elif [ "$TASK" = "walker2d" ]; then + ENV_CONFIG="walker2d-medium-v2" + elif [ "$TASK" = "halfcheetah" ]; then + ENV_CONFIG="halfcheetah-medium-v2" + else + echo "Unknown task: $TASK" + exit 1 + fi + + python script/run.py --config-name=pre_diffusion_mlp \ + --config-dir=cfg/gym/pretrain/$ENV_CONFIG \ + train.n_epochs=2 \ + train.save_model_freq=1 + +elif [ "$MODE" = "finetune" ]; then + echo "Running fine-tuning test (short run)..." + + if [ "$TASK" = "hopper" ]; then + ENV_CONFIG="hopper-v2" + elif [ "$TASK" = "walker2d" ]; then + ENV_CONFIG="walker2d-v2" + elif [ "$TASK" = "halfcheetah" ]; then + ENV_CONFIG="halfcheetah-v2" + else + echo "Unknown task: $TASK" + exit 1 + fi + + python script/run.py --config-name=ft_ppo_diffusion_mlp \ + --config-dir=cfg/gym/finetune/$ENV_CONFIG \ + train.n_train_itr=10 \ + train.val_freq=5 + +else + echo "Unknown mode: $MODE. Use 'pretrain' or 'finetune'" + exit 1 +fi + +echo "Dev test completed!" \ No newline at end of file