seperate dmc setup...

This commit is contained in:
ys1087@partner.kit.edu 2025-07-29 14:58:43 +02:00
parent 30df18239a
commit a02e258f1c
3 changed files with 209 additions and 0 deletions

55
slurm/run_reppo_dmc_dev.sh Executable file
View File

@ -0,0 +1,55 @@
#!/bin/bash
#SBATCH --job-name=reppo_dmc_dev
#SBATCH --account=hk-project-p0022232
#SBATCH --partition=dev_accelerated
#SBATCH --gres=gpu:1
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=4
#SBATCH --time=00:30:00
#SBATCH --mem=16G
#SBATCH --output=logs/reppo_dmc_dev_%j.out
#SBATCH --error=logs/reppo_dmc_dev_%j.err
# Load required modules
module load devel/cuda/12.4
# Set environment variables
export WANDB_MODE=online
export WANDB_PROJECT=reppo_dmc_dev
export WANDB_API_KEY=01fbfaf5e2f64bedd68febedfcaa7e3bbd54952c
export WANDB_ENTITY=dominik_roth
# Change to project directory
cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo
# Activate virtual environment
source .venv/bin/activate
# Run DMC (mujoco_playground) test
echo "Starting REPPO dev test with DMC..."
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $SLURM_NODELIST"
echo "GPU: $CUDA_VISIBLE_DEVICES"
# Default to CartpoleBalance for quick test
ENV_NAME=${ENV_NAME:-CartpoleBalance}
EXPERIMENT_TYPE=${EXPERIMENT_TYPE:-mjx_dmc_small_data}
echo "Environment: $ENV_NAME"
echo "Experiment type: $EXPERIMENT_TYPE"
# Run the experiment with mjx_dmc environment
python reppo_alg/jaxrl/reppo.py \
env=mjx_dmc \
env.name=$ENV_NAME \
hyperparameters.num_envs=256 \
hyperparameters.num_steps=32 \
hyperparameters.num_mini_batches=8 \
hyperparameters.num_epochs=4 \
hyperparameters.total_time_steps=1000000 \
wandb.mode=online \
wandb.entity=$WANDB_ENTITY \
wandb.project=$WANDB_PROJECT
echo "DMC dev test completed!"

56
slurm/run_reppo_dmc_prod.sh Executable file
View File

@ -0,0 +1,56 @@
#!/bin/bash
#SBATCH --job-name=reppo_dmc_prod
#SBATCH --account=hk-project-p0022232
#SBATCH --partition=accelerated
#SBATCH --gres=gpu:1
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8
#SBATCH --time=24:00:00
#SBATCH --mem=32G
#SBATCH --output=logs/reppo_dmc_prod_%j.out
#SBATCH --error=logs/reppo_dmc_prod_%j.err
# Load required modules
module load devel/cuda/12.4
# Set environment variables
export WANDB_MODE=online
export WANDB_PROJECT=reppo_dmc_production
export WANDB_API_KEY=01fbfaf5e2f64bedd68febedfcaa7e3bbd54952c
export WANDB_ENTITY=dominik_roth
# Change to project directory
cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo
# Activate virtual environment
source .venv/bin/activate
# Run DMC experiment
echo "Starting REPPO production run with DMC..."
echo "Job ID: $SLURM_JOB_ID"
echo "Node: $SLURM_NODELIST"
echo "GPU: $CUDA_VISIBLE_DEVICES"
# Environment name passed as variable
ENV_NAME=${ENV_NAME:-CartpoleBalance}
SEED=${SEED:-0}
echo "Environment: $ENV_NAME"
echo "Seed: $SEED"
# Run the experiment with full 50M steps
python reppo_alg/jaxrl/reppo.py \
env=mjx_dmc \
env.name=$ENV_NAME \
hyperparameters.num_envs=1024 \
hyperparameters.num_steps=128 \
hyperparameters.num_mini_batches=128 \
hyperparameters.num_epochs=4 \
hyperparameters.total_time_steps=50000000 \
seed=$SEED \
wandb.mode=online \
wandb.entity=$WANDB_ENTITY \
wandb.project=$WANDB_PROJECT
echo "Training completed!"

98
submit_dmc_experiments.py Executable file
View File

@ -0,0 +1,98 @@
#!/usr/bin/env python3
"""
Submit DMC (mujoco_playground) experiments for REPPO
"""
import subprocess
import time
import argparse
# List of 23 DMC tasks from experiment plan
DMC_TASKS = [
"AcrobotSwingup",
"CartpoleBalance",
"CartpoleSwingup",
"CheetahRun",
"FingerSpin",
"FingerTurnEasy",
"FingerTurnHard",
"FishUpright",
"FishSwim",
"HopperStand",
"HopperHop",
"HumanoidStand",
"HumanoidWalk",
"HumanoidRun",
"ManipulatorBringBall",
"PendulumSwingup",
"PointMassEasy",
"ReacherEasy",
"ReacherHard",
"SwimmerSwimmer6",
"SwimmerSwimmer15",
"WalkerStand",
"WalkerWalk",
"WalkerRun"
]
def submit_job(env_name, seed=0):
"""Submit a single DMC job"""
cmd = [
"sbatch",
f"--job-name=reppo_dmc_{env_name}_seed{seed}",
"slurm/run_reppo_dmc_prod.sh"
]
env_vars = {
"ENV_NAME": env_name,
"SEED": str(seed)
}
print(f"Submitting {env_name} (seed {seed})...")
try:
result = subprocess.run(cmd, env={**subprocess.os.environ, **env_vars},
capture_output=True, text=True, check=True)
job_id = result.stdout.strip().split()[-1]
print(f" -> Job ID: {job_id}")
return job_id
except subprocess.CalledProcessError as e:
print(f" -> Error: {e}")
print(f" -> Stdout: {e.stdout}")
print(f" -> Stderr: {e.stderr}")
return None
def main():
parser = argparse.ArgumentParser(description="Submit DMC experiments")
parser.add_argument("--seeds", type=int, default=5, help="Number of seeds to run")
parser.add_argument("--tasks", nargs="+", default=DMC_TASKS,
help="List of tasks to run")
parser.add_argument("--delay", type=float, default=1.0,
help="Delay between submissions (seconds)")
args = parser.parse_args()
print(f"Submitting {len(args.tasks)} DMC tasks with {args.seeds} seeds each")
print(f"Total jobs: {len(args.tasks) * args.seeds}")
print()
job_ids = []
for task in args.tasks:
for seed in range(args.seeds):
job_id = submit_job(task, seed)
if job_id:
job_ids.append(job_id)
# Add delay to avoid overwhelming the scheduler
time.sleep(args.delay)
print(f"\nSubmitted {len(job_ids)} jobs successfully:")
for i, job_id in enumerate(job_ids):
print(f" {i+1}: {job_id}")
print(f"\nMonitor with: squeue -u $USER")
print(f"Check logs in: logs/")
if __name__ == "__main__":
main()