dppo/slurm/run_dppo_dev_flexible.sh

#!/bin/bash
#SBATCH --job-name=dppo_dev_test
#SBATCH --account=hk-project-p0022232
#SBATCH --partition=dev_accelerated
#SBATCH --gres=gpu:1
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8
#SBATCH --time=00:30:00
#SBATCH --mem=24G
#SBATCH --output=logs/dppo_dev_%j.out
#SBATCH --error=logs/dppo_dev_%j.err

# Usage: TASK=hopper MODE=pretrain sbatch slurm/run_dppo_dev_flexible.sh
# Usage: TASK=hopper MODE=finetune sbatch slurm/run_dppo_dev_flexible.sh

# Load required modules
module load devel/cuda/12.4

# Fix MuJoCo library path for fine-tuning
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/nvidia

# Use GCC instead of Intel compiler for MuJoCo compilation (Intel icx too strict)
export CC=gcc
export CXX=g++

# Set environment variables for WandB
export WANDB_MODE=online
export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"}

# Default paths
export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data}
export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log}

# Set defaults if not provided
TASK=${TASK:-hopper}
MODE=${MODE:-pretrain}

# Change to project directory
cd $SLURM_SUBMIT_DIR

# Activate virtual environment
source .venv/bin/activate

echo "Starting DPPO dev test..."
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $SLURM_NODELIST"
echo "Task: $TASK"
echo "Mode: $MODE"
echo "GPU: $CUDA_VISIBLE_DEVICES"
echo ""
echo "Python version: $(python --version)"
echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')"
echo "CUDA available: $(python -c 'import torch; print(torch.cuda.is_available())')"
echo ""

if [ "$MODE" = "pretrain" ]; then
    echo "Running pre-training test (2 epochs)..."

    if [ "$TASK" = "hopper" ]; then
        ENV_CONFIG="hopper-medium-v2"
    elif [ "$TASK" = "walker2d" ]; then
        ENV_CONFIG="walker2d-medium-v2"
    elif [ "$TASK" = "halfcheetah" ]; then
        ENV_CONFIG="halfcheetah-medium-v2"
    else
        echo "Unknown task: $TASK"
        exit 1
    fi

    python script/run.py --config-name=pre_diffusion_mlp \
        --config-dir=cfg/gym/pretrain/$ENV_CONFIG \
        train.n_epochs=2 \
        train.save_model_freq=1

elif [ "$MODE" = "finetune" ]; then
    echo "Running fine-tuning test (short run)..."

    if [ "$TASK" = "hopper" ]; then
        ENV_CONFIG="hopper-v2"
    elif [ "$TASK" = "walker2d" ]; then
        ENV_CONFIG="walker2d-v2"
    elif [ "$TASK" = "halfcheetah" ]; then
        ENV_CONFIG="halfcheetah-v2"
    else
        echo "Unknown task: $TASK"
        exit 1
    fi

    python script/run.py --config-name=ft_ppo_diffusion_mlp \
        --config-dir=cfg/gym/finetune/$ENV_CONFIG \
        train.n_train_itr=10 \
        train.val_freq=5

else
    echo "Unknown mode: $MODE. Use 'pretrain' or 'finetune'"
    exit 1
fi

echo "Dev test completed!"