#!/bin/bash #SBATCH --job-name=dppo_gym #SBATCH --account=hk-project-p0022232 #SBATCH --partition=accelerated #SBATCH --gres=gpu:1 #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=40 #SBATCH --time=08:00:00 #SBATCH --mem=32G #SBATCH --output=logs/dppo_gym_%j.out #SBATCH --error=logs/dppo_gym_%j.err # Load required modules module load devel/cuda/12.4 # Set environment variables for WandB export WANDB_MODE=online export WANDB_PROJECT=dppo_gym export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} # Use personal account # Default paths (can be overridden by environment) export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} # Parse command line arguments TASK=${1:-hopper} # hopper, walker2d, halfcheetah MODE=${2:-pretrain} # pretrain or finetune CONFIG_TYPE=${3:-pre_diffusion_mlp} # pre_diffusion_mlp or ft_ppo_diffusion_mlp # Change to project directory cd $SLURM_SUBMIT_DIR # Activate virtual environment source .venv/bin/activate echo "Starting DPPO Gym experiment..." echo "Job ID: $SLURM_JOB_ID" echo "Node: $SLURM_NODELIST" echo "GPU: $CUDA_VISIBLE_DEVICES" echo "Task: $TASK" echo "Mode: $MODE" echo "" # Select appropriate config based on mode if [ "$MODE" = "pretrain" ]; then CONFIG_DIR="cfg/gym/pretrain/${TASK}-medium-v2" CONFIG_NAME="pre_diffusion_mlp" elif [ "$MODE" = "finetune" ]; then CONFIG_DIR="cfg/gym/finetune/${TASK}-v2" CONFIG_NAME="ft_ppo_diffusion_mlp" else echo "Invalid mode: $MODE. Use 'pretrain' or 'finetune'" exit 1 fi # Run experiment python script/run.py \ --config-name=$CONFIG_NAME \ --config-dir=$CONFIG_DIR \ wandb=${WANDB_MODE:-null} echo "Experiment completed!"