#!/bin/bash #SBATCH --job-name=reppo_prod #SBATCH --account=hk-project-p0022232 #SBATCH --partition=accelerated #SBATCH --gres=gpu:1 #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=8 #SBATCH --time=24:00:00 #SBATCH --mem=32G #SBATCH --output=logs/reppo_prod_%j.out #SBATCH --error=logs/reppo_prod_%j.err # Load required modules module load devel/cuda/12.4 # Set environment variables export WANDB_MODE=online export WANDB_PROJECT=reppo_brax_production export WANDB_API_KEY=01fbfaf5e2f64bedd68febedfcaa7e3bbd54952c export WANDB_ENTITY=dominik_roth # Change to project directory cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo # Activate virtual environment source .venv/bin/activate # Use paper hyperparameters for production runs ENV_NAME=${ENV_NAME:-ant} SEED=${SEED:-0} echo "Starting REPPO production run..." echo "Job ID: $SLURM_JOB_ID" echo "Node: $SLURM_NODELIST" echo "GPU: $CUDA_VISIBLE_DEVICES" echo "Environment: $ENV_NAME" echo "Seed: $SEED" # Run the experiment with paper hyperparameters python reppo_alg/jaxrl/reppo.py \ env=brax \ env.name=$ENV_NAME \ hyperparameters.num_envs=1024 \ hyperparameters.num_steps=128 \ hyperparameters.num_mini_batches=64 \ hyperparameters.num_epochs=8 \ hyperparameters.total_time_steps=50000000 \ hyperparameters.lr=0.0003 \ hyperparameters.lmbda=0.95 \ hyperparameters.kl_bound=0.1 \ seed=$SEED \ wandb.mode=online \ wandb.entity=$WANDB_ENTITY \ wandb.project=$WANDB_PROJECT echo "Production run completed!"