reppo/slurm/run_reppo_brax.sh
ys1087@partner.kit.edu 137b9e80c9 Add HoReKa cluster support with SLURM scripts and wandb integration
- Add SLURM job scripts for ManiSkill and Brax environments
- Add job submission helper script with environment validation
- Update README with HoReKa installation and usage instructions
- Create logs directory structure
- Configure wandb integration (requires external API key setup)
2025-07-22 16:33:12 +02:00

52 lines
1.4 KiB
Bash
Executable File

#!/bin/bash
#SBATCH --job-name=reppo_brax
#SBATCH --account=hk-project-p0022232
#SBATCH --partition=accelerated
#SBATCH --gres=gpu:1
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8
#SBATCH --time=04:00:00
#SBATCH --mem=24G
#SBATCH --output=logs/reppo_brax_%j.out
#SBATCH --error=logs/reppo_brax_%j.err
# Load required modules
module load devel/cuda/12.4
# Set environment variables
export WANDB_MODE=online
export WANDB_PROJECT=reppo_brax
# Change to project directory
cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo
# Activate virtual environment
source .venv/bin/activate
# Note: Ensure WANDB_API_KEY and WANDB_ENTITY are set before running
# Run REPPO with Brax environment
echo "Starting REPPO training with Brax..."
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $SLURM_NODELIST"
echo "GPU: $CUDA_VISIBLE_DEVICES"
# Default environment: ant (can be overridden)
ENV_NAME=${ENV_NAME:-ant}
EXPERIMENT_TYPE=${EXPERIMENT_TYPE:-mjx_dmc_small_data}
echo "Environment: $ENV_NAME"
echo "Experiment type: $EXPERIMENT_TYPE"
# Run the experiment
python reppo_alg/jaxrl/reppo.py \
env=brax \
env_name=$ENV_NAME \
experiment_override=$EXPERIMENT_TYPE \
wandb.mode=online \
wandb.entity=${WANDB_ENTITY} \
wandb.project=$WANDB_PROJECT \
wandb.name="reppo_${ENV_NAME}_${EXPERIMENT_TYPE}_${SLURM_JOB_ID}"
echo "Training completed!"