FastTD3/run_fasttd3.slurm
ys1087@partner.kit.edu 336c96bb7b Add HoReKa cluster support with SLURM and wandb integration
- Add complete HoReKa installation guide without conda dependency
- Include SLURM job script with GPU configuration and account setup
- Add helper scripts for job submission and environment testing
- Integrate wandb logging with both online and offline modes
- Support MuJoCo Playground environments for humanoid control
- Update README with clear separation of added vs original content
2025-07-22 16:15:30 +02:00

44 lines
1.1 KiB
Bash

#!/bin/bash
#SBATCH --job-name=fasttd3_test
#SBATCH --account=hk-project-p0022232
#SBATCH --partition=accelerated
#SBATCH --time=02:00:00
#SBATCH --gres=gpu:1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=32G
#SBATCH --output=fasttd3_%j.out
#SBATCH --error=fasttd3_%j.err
# Load necessary modules
module purge
module load toolkit/CUDA/12.4
# Navigate to the project directory
cd $SLURM_SUBMIT_DIR
# Activate the virtual environment
source .venv/bin/activate
# Set environment variables for proper GPU usage
export CUDA_VISIBLE_DEVICES=$SLURM_LOCALID
export JAX_PLATFORMS="gpu,cpu"
# Ensure wandb is logged in (set WANDB_API_KEY environment variable)
# export WANDB_API_KEY=your_api_key_here
# For testing, use offline mode
export WANDB_MODE=offline
# Run FastTD3 training with MuJoCo Playground environment
python fast_td3/train.py \
--env_name T1JoystickFlatTerrain \
--exp_name FastTD3_HoReKa_Test \
--seed 42 \
--total_timesteps 25000 \
--num_envs 1024 \
--batch_size 4096 \
--eval_interval 5000 \
--render_interval 0 \
--project FastTD3_HoReKa
echo "Job completed at $(date)"