dppo/slurm/run_dppo_gym.sh
ys1087@partner.kit.edu 5a458aac67 Configure personal WandB entity and clean up docs
Set DPPO_WANDB_ENTITY to dominik_roth for personal logging
Remove irrelevant implementation details from experiment plan
2025-08-27 12:24:39 +02:00

63 lines
1.7 KiB
Bash
Executable File

#!/bin/bash
#SBATCH --job-name=dppo_gym
#SBATCH --account=hk-project-p0022232
#SBATCH --partition=accelerated
#SBATCH --gres=gpu:1
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=40
#SBATCH --time=08:00:00
#SBATCH --mem=32G
#SBATCH --output=logs/dppo_gym_%j.out
#SBATCH --error=logs/dppo_gym_%j.err
# Load required modules
module load devel/cuda/12.4
# Set environment variables for WandB
export WANDB_MODE=online
export WANDB_PROJECT=dppo_gym
export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} # Use personal account
# Default paths (can be overridden by environment)
export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data}
export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log}
# Parse command line arguments
TASK=${1:-hopper} # hopper, walker2d, halfcheetah
MODE=${2:-pretrain} # pretrain or finetune
CONFIG_TYPE=${3:-pre_diffusion_mlp} # pre_diffusion_mlp or ft_ppo_diffusion_mlp
# Change to project directory
cd $SLURM_SUBMIT_DIR
# Activate virtual environment
source .venv/bin/activate
echo "Starting DPPO Gym experiment..."
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $SLURM_NODELIST"
echo "GPU: $CUDA_VISIBLE_DEVICES"
echo "Task: $TASK"
echo "Mode: $MODE"
echo ""
# Select appropriate config based on mode
if [ "$MODE" = "pretrain" ]; then
CONFIG_DIR="cfg/gym/pretrain/${TASK}-medium-v2"
CONFIG_NAME="pre_diffusion_mlp"
elif [ "$MODE" = "finetune" ]; then
CONFIG_DIR="cfg/gym/finetune/${TASK}-v2"
CONFIG_NAME="ft_ppo_diffusion_mlp"
else
echo "Invalid mode: $MODE. Use 'pretrain' or 'finetune'"
exit 1
fi
# Run experiment
python script/run.py \
--config-name=$CONFIG_NAME \
--config-dir=$CONFIG_DIR \
wandb=${WANDB_MODE:-null}
echo "Experiment completed!"