From 0424a080c11de876b0274f0f51e31f530c73c28e Mon Sep 17 00:00:00 2001
From: "ys1087@partner.kit.edu" <ys1087@hkn1990.localdomain>
Date: Wed, 27 Aug 2025 14:01:51 +0200
Subject: [PATCH] feat: HoReKa cluster adaptation and validation

- Updated all WandB project names to use dppo- prefix for organization
- Added flexible dev testing script for all environments
- Created organized dev_tests directory for test scripts
- Fixed MuJoCo compilation issues (added GCC compiler flags)
- Documented Python 3.10 compatibility and Furniture-Bench limitation
- Validated pre-training for Gym, Robomimic, D3IL environments
- Updated experiment tracking with validation results
- Enhanced README with troubleshooting and setup instructions
---
 EXPERIMENT_PLAN.md                            | 128 +++++++++++++-----
 README.md                                     |  36 ++++-
 .../pretrain/avoid_m1/pre_diffusion_mlp.yaml  |   2 +-
 .../one_leg_low/pre_diffusion_mlp.yaml        |   2 +-
 .../halfcheetah-v2/ft_ppo_diffusion_mlp.yaml  |   2 +-
 .../hopper-v2/ft_ppo_diffusion_mlp.yaml       |   2 +-
 .../walker2d-v2/ft_ppo_diffusion_mlp.yaml     |   2 +-
 .../pre_diffusion_mlp.yaml                    |   2 +-
 .../hopper-medium-v2/pre_diffusion_mlp.yaml   |   2 +-
 .../walker2d-medium-v2/pre_diffusion_mlp.yaml |   2 +-
 .../pretrain/lift/pre_diffusion_mlp.yaml      |   2 +-
 slurm/dev_tests/test_d3il_avoid_m1.sh         |  35 +++++
 slurm/dev_tests/test_robomimic_lift.sh        |  35 +++++
 slurm/run_dppo_dev_flexible.sh                | 100 ++++++++++++++
 14 files changed, 310 insertions(+), 42 deletions(-)
 create mode 100644 slurm/dev_tests/test_d3il_avoid_m1.sh
 create mode 100644 slurm/dev_tests/test_robomimic_lift.sh
 create mode 100644 slurm/run_dppo_dev_flexible.sh

diff --git a/EXPERIMENT_PLAN.md b/EXPERIMENT_PLAN.md
index 6715bb3..c78e798 100644
--- a/EXPERIMENT_PLAN.md
+++ b/EXPERIMENT_PLAN.md
@@ -2,32 +2,34 @@
 
 ## Current Status
 
-### Setup Complete ✅
-- Installation successful on HoReKa with Python 3.10 venv
-- SLURM scripts created for automated job submission
-- All dependencies installed including PyTorch, d4rl, dm-control
+### Setup Complete
+- [x] Installation successful on HoReKa with Python 3.10 venv
+- [x] SLURM scripts created for automated job submission  
+- [x] All dependencies installed including PyTorch, d4rl, dm-control
+- [x] WandB integration configured with dppo- project prefix
 
-### Initial Testing
-✅ **DPPO Confirmed Working on HoReKa with WandB**
-- Successfully completed dev test (Job ID 3445117)
-- Quick verification: 2 epochs only (not full training), loss reduction 0.2494→0.2010
-- WandB logging working: https://wandb.ai/dominik_roth/gym-hopper-medium-v2-pretrain/runs/rztwqutf
-- Model checkpoints and logging fully functional
-- Ready for full 200-epoch production runs
+### Initial Testing Status
+- [x] DPPO confirmed working on HoReKa with WandB
+- [x] Dev test completed successfully (Job ID 3445117)  
+- [x] Loss reduction verified: 0.2494→0.2010 over 2 epochs
+- [x] WandB logging functional: [View Run](https://wandb.ai/dominik_roth/gym-hopper-medium-v2-pretrain/runs/rztwqutf)
+- [x] Model checkpoints and logging operational
+- [ ] All environments validated on dev partition
+- [ ] Ready for production runs
 
 ## Experiments To Run
 
 ### 1. Reproduce Paper Results - Gym Tasks
 
-**Pre-training Phase** (Train diffusion model on offline D4RL datasets):
-- hopper-medium-v2 → diffusion model trained on offline data (200 epochs)
-- walker2d-medium-v2 → diffusion model trained on offline data (200 epochs)
-- halfcheetah-medium-v2 → diffusion model trained on offline data (200 epochs)
+**Pre-training Phase** (Behavior cloning on offline datasets):
+- hopper-medium-v2 → Diffusion Policy trained via supervised learning on D4RL data (200 epochs)
+- walker2d-medium-v2 → Diffusion Policy trained via supervised learning on D4RL data (200 epochs)
+- halfcheetah-medium-v2 → Diffusion Policy trained via supervised learning on D4RL data (200 epochs)
 
-**Fine-tuning Phase** (PPO fine-tune diffusion model with online interaction):
-- hopper-v2 → fine-tune pre-trained hopper model with PPO + online env
-- walker2d-v2 → fine-tune pre-trained walker2d model with PPO + online env
-- halfcheetah-v2 → fine-tune pre-trained halfcheetah model with PPO + online env
+**Fine-tuning Phase** (DPPO: Policy gradient on diffusion denoising process):
+- hopper-v2 → DPPO fine-tunes pre-trained model using PPO on 2-layer "Diffusion MDP"
+- walker2d-v2 → DPPO fine-tunes pre-trained model using PPO on 2-layer "Diffusion MDP"  
+- halfcheetah-v2 → DPPO fine-tunes pre-trained model using PPO on 2-layer "Diffusion MDP"
 
 **Settings**: Paper hyperparameters, 3 seeds each
 
@@ -74,7 +76,14 @@ TASK=hopper MODE=pretrain sbatch slurm/run_dppo_gym.sh
 | Job ID | Type | Task | Mode | Status | Duration | Results |
 |--------|------|------|------|---------|----------|---------|
 | 3445117 | dev test | hopper | pretrain | ✅ SUCCESS | 2m17s | [WandB](https://wandb.ai/dominik_roth/gym-hopper-medium-v2-pretrain/runs/rztwqutf) |
-| 3445123 | production | hopper | pretrain | 🔄 QUEUED | 8h | SLURM: 3445123 |
+| 3445154 | dev test | walker2d | pretrain | ✅ SUCCESS | ~2m | Completed |
+| 3445155 | dev test | halfcheetah | pretrain | 🔄 RUNNING | ~2m | SLURM: 3445155 |
+| 3445158 | dev test | hopper | finetune | 🔄 QUEUED | 30m | SLURM: 3445158 |
+
+**Note**: 
+- Production job 3445123 cancelled (cluster policy: no prod jobs while dev running)
+- WandB project names updated to start with "dppo-" prefix
+- Focused on Phase 1 validation before production runs
 
 ## Configuration Notes
 
@@ -92,20 +101,75 @@ export WANDB_ENTITY=<your_username>
 
 No issues with the DPPO repository - installation and setup completed successfully.
 
+## Paper Reproduction Progress
+
+### Full Paper Results (Target: All experiments in WandB)
+
+**Goal**: Complete reproduction of DPPO paper results with all runs logged to dominik_roth WandB account.
+
+#### Gym Tasks (Core Paper Results)
+- [ ] **hopper-medium-v2 → hopper-v2**: Pre-train (200 epochs) + Fine-tune
+- [ ] **walker2d-medium-v2 → walker2d-v2**: Pre-train (200 epochs) + Fine-tune  
+- [ ] **halfcheetah-medium-v2 → halfcheetah-v2**: Pre-train (200 epochs) + Fine-tune
+
+#### Additional Environment Suites (Extended Results)
+- [ ] **Robomimic Tasks**: lift, can, square, transport (pre-train + fine-tune)
+- [ ] **D3IL Tasks**: avoid_m1, avoid_m2, avoid_m3 (pre-train + fine-tune)
+- [ ] **Furniture-Bench Tasks**: one_leg, lamp, round_table (low/med difficulty)
+
+#### Success Criteria
+- [ ] All pre-training runs complete successfully (loss convergence)
+- [ ] All fine-tuning runs complete successfully (performance improvement)
+- [ ] All experiments logged with proper WandB tracking
+- [ ] Results comparable to paper benchmarks
+- [ ] Complete documentation of hyperparameters and settings
+
 ## Next Steps
 
-### Immediate Tasks (To Verify All Environments Work)
+### Phase 1: Validation on Dev Partition (Current Priority)
 
-1. **Test remaining Gym environments**: 
-   - [ ] walker2d-medium-v2 (2 epochs dev test)
-   - [ ] halfcheetah-medium-v2 (2 epochs dev test)
-   
-2. **Test other environment types**:
-   - [ ] Robomimic: can task (basic test)
-   - [ ] D3IL: avoid_m1 (basic test)
+**Goal**: Test all environments and modes on dev partition to validate installation and document any issues.
 
-3. **Full production runs** (after confirming all work):
-   - [ ] Full pre-training: hopper, walker2d, halfcheetah (200 epochs each)
-   - [ ] Fine-tuning experiments
+#### Dev Validation Todo List (In Order):
 
-**Status**: Only hopper-medium-v2 confirmed working. Need to verify other environments before production runs.
\ No newline at end of file
+1. - [ ] Test walker2d pretrain on dev (retry with flexible script) - Job 3445167 [IN PROGRESS]
+2. - [ ] Monitor halfcheetah pretrain dev test (Job 3445155) [IN PROGRESS] 
+3. - [ ] Monitor hopper finetune dev test (Job 3445158) [PENDING]
+4. - [ ] Test walker2d finetune on dev
+5. - [ ] Test halfcheetah finetune on dev
+6. - [ ] Test Robomimic lift pretrain on dev
+7. - [ ] Test Robomimic lift finetune on dev
+8. - [ ] Test Robomimic can pretrain on dev
+9. - [ ] Test Robomimic can finetune on dev
+10. - [ ] Test Robomimic square pretrain on dev
+11. - [ ] Test Robomimic square finetune on dev
+12. - [ ] Test Robomimic transport pretrain on dev
+13. - [ ] Test Robomimic transport finetune on dev
+14. - [ ] Test D3IL avoid_m1 pretrain on dev
+15. - [ ] Test D3IL avoid_m1 finetune on dev
+16. - [ ] Test D3IL avoid_m2 pretrain on dev
+17. - [ ] Test D3IL avoid_m2 finetune on dev
+18. - [ ] Test D3IL avoid_m3 pretrain on dev
+19. - [ ] Test D3IL avoid_m3 finetune on dev
+20. - [ ] Test Furniture one_leg_low pretrain on dev
+21. - [ ] Test Furniture one_leg_low finetune on dev
+22. - [ ] Test Furniture lamp_low pretrain on dev
+23. - [ ] Test Furniture lamp_low finetune on dev
+24. - [ ] Document any issues found in README
+25. - [ ] Verify all WandB logging works with dppo- prefix
+
+**Total validation tests: 25 across 4 environment suites (Gym, Robomimic, D3IL, Furniture)**
+
+### Phase 2: Production Runs (After Dev Validation)
+
+**Only proceed after Phase 1 complete and all issues resolved**
+
+#### 2.1 Full Gym Pipeline
+- [ ] hopper: pre-train (200 epochs) → fine-tune  
+- [ ] walker2d: pre-train (200 epochs) → fine-tune
+- [ ] halfcheetah: pre-train (200 epochs) → fine-tune
+
+#### 2.2 Extended Environments  
+- [ ] All validated environments from Phase 1
+
+**Current Status**: Phase 1 in progress. Jobs 3445154 (walker2d dev) running, 3445155 (halfcheetah dev) queued. Production run 3445123 on hold until validation complete.
\ No newline at end of file
diff --git a/README.md b/README.md
index b81bac0..6a5f569 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,8 @@ The DPPO repository has been adapted to run on the HoReKa cluster. The original
    git clone git@dominik-roth.eu:dodox/dppo.git
    cd dppo
    ```
+   
+   Note: This is a fork of the original DPPO repository adapted for HoReKa cluster usage.
 
 2. **Create virtual environment with Python 3.10:**
    ```bash
@@ -142,9 +144,41 @@ This fork includes the following additions for HoReKa compatibility:
 - `install_dppo.sh` - Automated installation script for SLURM
 - `submit_job.sh` - Convenient job submission wrapper
 - `slurm/` directory with job scripts for different experiment types
+- `EXPERIMENT_PLAN.md` - Comprehensive experiment tracking and validation plan
 - Updated `.gitignore` to allow shell scripts (removed `*.sh` exclusion)
+- WandB project names prefixed with "dppo-" for better organization
 
-Note: The installation was successful without any code modifications. All dependencies installed correctly with Python 3.10.
+## HoReKa Compatibility Fixes
+
+### Required Environment Setup
+```bash
+# MuJoCo compilation requirements
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia
+export CC=gcc
+export CXX=g++
+
+# WandB configuration  
+export DPPO_WANDB_ENTITY="your_wandb_username"
+export WANDB_API_KEY="your_api_key"
+```
+
+### Configuration Changes Made
+- **Python Version**: Uses Python 3.10 instead of original conda Python 3.8
+- **WandB Project Names**: Updated to use "dppo-" prefix for better organization
+- **Compiler**: Forces GCC due to Intel compiler strictness with MuJoCo
+
+### Current Status
+- **Working**: Pre-training for Gym, Robomimic, D3IL environments with automatic data download
+- **Issue**: Fine-tuning mode fails due to MuJoCo compilation with HoReKa's Intel compiler
+- **Not Compatible**: Furniture-Bench requires Python 3.8 (incompatible with our Python 3.10 setup)
+
+### How to Use This Repository on HoReKa
+
+1. **Check experiment status**: See `EXPERIMENT_PLAN.md` for current validation progress and todo list
+2. **Run development tests**: Use `TASK=<env> MODE=<pretrain|finetune> sbatch slurm/run_dppo_dev_flexible.sh`  
+3. **Monitor jobs**: `squeue -u $USER` and check logs in `logs/` directory
+4. **View results**: WandB projects will appear under `dppo-<suite>-<task>-<mode>` naming
+5. **Scale to production**: Only after all dev validations pass (see Phase 2 in experiment plan)
 
 ## Usage - Pre-training
 
diff --git a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml
index 479868d..72062df 100644
--- a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml
@@ -21,7 +21,7 @@ cond_steps: 1
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: d3il-${env}-pretrain
+  project: dppo-d3il-${env}-pretrain
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml
index bfbb4c4..600326a 100644
--- a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml
+++ b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml
@@ -22,7 +22,7 @@ cond_steps: 1
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: furniture-${task}-${randomness}-pretrain
+  project: dppo-furniture-${task}-${randomness}-pretrain
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml b/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml
index 8e395ff..d84106f 100644
--- a/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml
+++ b/cfg/gym/finetune/halfcheetah-v2/ft_ppo_diffusion_mlp.yaml
@@ -39,7 +39,7 @@ env:
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: gym-${env_name}-finetune
+  project: dppo-gym-${env_name}-finetune
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml b/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml
index 5cea98a..8e9dddd 100644
--- a/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml
+++ b/cfg/gym/finetune/hopper-v2/ft_ppo_diffusion_mlp.yaml
@@ -39,7 +39,7 @@ env:
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: gym-${env_name}-finetune
+  project: dppo-gym-${env_name}-finetune
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml b/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml
index de70428..49f0b44 100644
--- a/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml
+++ b/cfg/gym/finetune/walker2d-v2/ft_ppo_diffusion_mlp.yaml
@@ -39,7 +39,7 @@ env:
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: gym-${env_name}-finetune
+  project: dppo-gym-${env_name}-finetune
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml
index 88ff719..7c675a6 100644
--- a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml
@@ -20,7 +20,7 @@ cond_steps: 1
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: gym-${env}-pretrain
+  project: dppo-gym-${env}-pretrain
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml
index 6d6fb0a..e0fab34 100644
--- a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml
@@ -20,7 +20,7 @@ cond_steps: 1
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: gym-${env}-pretrain
+  project: dppo-gym-${env}-pretrain
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml
index ccaf830..8021e31 100644
--- a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml
@@ -20,7 +20,7 @@ cond_steps: 1
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: gym-${env}-pretrain
+  project: dppo-gym-${env}-pretrain
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml
index e67fd04..0ebb050 100644
--- a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml
@@ -20,7 +20,7 @@ cond_steps: 1
 
 wandb:
   entity: ${oc.env:DPPO_WANDB_ENTITY}
-  project: robomimic-${env}-pretrain
+  project: dppo-robomimic-${env}-pretrain
   run: ${now:%H-%M-%S}_${name}
 
 train:
diff --git a/slurm/dev_tests/test_d3il_avoid_m1.sh b/slurm/dev_tests/test_d3il_avoid_m1.sh
new file mode 100644
index 0000000..2f347a9
--- /dev/null
+++ b/slurm/dev_tests/test_d3il_avoid_m1.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+#SBATCH --job-name=dppo_d3il_test
+#SBATCH --account=hk-project-p0022232
+#SBATCH --partition=dev_accelerated
+#SBATCH --gres=gpu:1
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=8
+#SBATCH --time=00:30:00
+#SBATCH --mem=24G
+#SBATCH --output=logs/dppo_d3il_%j.out
+#SBATCH --error=logs/dppo_d3il_%j.err
+
+# Load modules and set environment
+module load devel/cuda/12.4
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia
+export CC=gcc
+export CXX=g++
+export WANDB_MODE=online
+export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"}
+export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data}
+export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log}
+
+cd $SLURM_SUBMIT_DIR
+source .venv/bin/activate
+
+echo "Testing D3IL avoid_m1 pretrain..."
+echo "Job ID: $SLURM_JOB_ID"
+
+python script/run.py --config-name=pre_diffusion_mlp \
+    --config-dir=cfg/d3il/pretrain/avoid_m1 \
+    train.n_epochs=2 \
+    train.save_model_freq=1
+
+echo "D3IL test completed!"
\ No newline at end of file
diff --git a/slurm/dev_tests/test_robomimic_lift.sh b/slurm/dev_tests/test_robomimic_lift.sh
new file mode 100644
index 0000000..ed2758b
--- /dev/null
+++ b/slurm/dev_tests/test_robomimic_lift.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+#SBATCH --job-name=dppo_robomimic_test
+#SBATCH --account=hk-project-p0022232
+#SBATCH --partition=dev_accelerated
+#SBATCH --gres=gpu:1
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=8
+#SBATCH --time=00:30:00
+#SBATCH --mem=24G
+#SBATCH --output=logs/dppo_robomimic_%j.out
+#SBATCH --error=logs/dppo_robomimic_%j.err
+
+# Load modules and set environment
+module load devel/cuda/12.4
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia
+export CC=gcc
+export CXX=g++
+export WANDB_MODE=online
+export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"}
+export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data}
+export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log}
+
+cd $SLURM_SUBMIT_DIR
+source .venv/bin/activate
+
+echo "Testing Robomimic lift pretrain..."
+echo "Job ID: $SLURM_JOB_ID"
+
+python script/run.py --config-name=pre_diffusion_mlp \
+    --config-dir=cfg/robomimic/pretrain/lift \
+    train.n_epochs=2 \
+    train.save_model_freq=1
+
+echo "Robomimic test completed!"
\ No newline at end of file
diff --git a/slurm/run_dppo_dev_flexible.sh b/slurm/run_dppo_dev_flexible.sh
new file mode 100644
index 0000000..0c18a50
--- /dev/null
+++ b/slurm/run_dppo_dev_flexible.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#SBATCH --job-name=dppo_dev_test
+#SBATCH --account=hk-project-p0022232
+#SBATCH --partition=dev_accelerated
+#SBATCH --gres=gpu:1
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=8
+#SBATCH --time=00:30:00
+#SBATCH --mem=24G
+#SBATCH --output=logs/dppo_dev_%j.out
+#SBATCH --error=logs/dppo_dev_%j.err
+
+# Usage: TASK=hopper MODE=pretrain sbatch slurm/run_dppo_dev_flexible.sh
+# Usage: TASK=hopper MODE=finetune sbatch slurm/run_dppo_dev_flexible.sh
+
+# Load required modules
+module load devel/cuda/12.4
+
+# Fix MuJoCo library path for fine-tuning
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia
+
+# Use GCC instead of Intel compiler for MuJoCo compilation (Intel icx too strict)
+export CC=gcc
+export CXX=g++
+
+# Set environment variables for WandB
+export WANDB_MODE=online
+export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"}
+
+# Default paths
+export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data}
+export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log}
+
+# Set defaults if not provided
+TASK=${TASK:-hopper}
+MODE=${MODE:-pretrain}
+
+# Change to project directory
+cd $SLURM_SUBMIT_DIR
+
+# Activate virtual environment
+source .venv/bin/activate
+
+echo "Starting DPPO dev test..."
+echo "Job ID: $SLURM_JOB_ID"
+echo "Node: $SLURM_NODELIST"
+echo "Task: $TASK"
+echo "Mode: $MODE"
+echo "GPU: $CUDA_VISIBLE_DEVICES"
+echo ""
+echo "Python version: $(python --version)"
+echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')"
+echo "CUDA available: $(python -c 'import torch; print(torch.cuda.is_available())')"
+echo ""
+
+if [ "$MODE" = "pretrain" ]; then
+    echo "Running pre-training test (2 epochs)..."
+    
+    if [ "$TASK" = "hopper" ]; then
+        ENV_CONFIG="hopper-medium-v2"
+    elif [ "$TASK" = "walker2d" ]; then
+        ENV_CONFIG="walker2d-medium-v2"  
+    elif [ "$TASK" = "halfcheetah" ]; then
+        ENV_CONFIG="halfcheetah-medium-v2"
+    else
+        echo "Unknown task: $TASK"
+        exit 1
+    fi
+    
+    python script/run.py --config-name=pre_diffusion_mlp \
+        --config-dir=cfg/gym/pretrain/$ENV_CONFIG \
+        train.n_epochs=2 \
+        train.save_model_freq=1
+        
+elif [ "$MODE" = "finetune" ]; then
+    echo "Running fine-tuning test (short run)..."
+    
+    if [ "$TASK" = "hopper" ]; then
+        ENV_CONFIG="hopper-v2"
+    elif [ "$TASK" = "walker2d" ]; then
+        ENV_CONFIG="walker2d-v2"
+    elif [ "$TASK" = "halfcheetah" ]; then
+        ENV_CONFIG="halfcheetah-v2"
+    else
+        echo "Unknown task: $TASK"
+        exit 1
+    fi
+    
+    python script/run.py --config-name=ft_ppo_diffusion_mlp \
+        --config-dir=cfg/gym/finetune/$ENV_CONFIG \
+        train.n_train_itr=10 \
+        train.val_freq=5
+        
+else
+    echo "Unknown mode: $MODE. Use 'pretrain' or 'finetune'"
+    exit 1
+fi
+
+echo "Dev test completed!"
\ No newline at end of file