diff --git a/slurm/run_reppo_dmc_dev.sh b/slurm/run_reppo_dmc_dev.sh new file mode 100755 index 0000000..9956465 --- /dev/null +++ b/slurm/run_reppo_dmc_dev.sh @@ -0,0 +1,55 @@ +#!/bin/bash +#SBATCH --job-name=reppo_dmc_dev +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=4 +#SBATCH --time=00:30:00 +#SBATCH --mem=16G +#SBATCH --output=logs/reppo_dmc_dev_%j.out +#SBATCH --error=logs/reppo_dmc_dev_%j.err + +# Load required modules +module load devel/cuda/12.4 + +# Set environment variables +export WANDB_MODE=online +export WANDB_PROJECT=reppo_dmc_dev +export WANDB_API_KEY=01fbfaf5e2f64bedd68febedfcaa7e3bbd54952c +export WANDB_ENTITY=dominik_roth + +# Change to project directory +cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo + +# Activate virtual environment +source .venv/bin/activate + +# Run DMC (mujoco_playground) test +echo "Starting REPPO dev test with DMC..." +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURM_NODELIST" +echo "GPU: $CUDA_VISIBLE_DEVICES" + +# Default to CartpoleBalance for quick test +ENV_NAME=${ENV_NAME:-CartpoleBalance} +EXPERIMENT_TYPE=${EXPERIMENT_TYPE:-mjx_dmc_small_data} + +echo "Environment: $ENV_NAME" +echo "Experiment type: $EXPERIMENT_TYPE" + +# Run the experiment with mjx_dmc environment +python reppo_alg/jaxrl/reppo.py \ + env=mjx_dmc \ + env.name=$ENV_NAME \ + hyperparameters.num_envs=256 \ + hyperparameters.num_steps=32 \ + hyperparameters.num_mini_batches=8 \ + hyperparameters.num_epochs=4 \ + hyperparameters.total_time_steps=1000000 \ + wandb.mode=online \ + wandb.entity=$WANDB_ENTITY \ + wandb.project=$WANDB_PROJECT + +echo "DMC dev test completed!" \ No newline at end of file diff --git a/slurm/run_reppo_dmc_prod.sh b/slurm/run_reppo_dmc_prod.sh new file mode 100755 index 0000000..5f9a293 --- /dev/null +++ b/slurm/run_reppo_dmc_prod.sh @@ -0,0 +1,56 @@ +#!/bin/bash +#SBATCH --job-name=reppo_dmc_prod +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=24:00:00 +#SBATCH --mem=32G +#SBATCH --output=logs/reppo_dmc_prod_%j.out +#SBATCH --error=logs/reppo_dmc_prod_%j.err + +# Load required modules +module load devel/cuda/12.4 + +# Set environment variables +export WANDB_MODE=online +export WANDB_PROJECT=reppo_dmc_production +export WANDB_API_KEY=01fbfaf5e2f64bedd68febedfcaa7e3bbd54952c +export WANDB_ENTITY=dominik_roth + +# Change to project directory +cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo + +# Activate virtual environment +source .venv/bin/activate + +# Run DMC experiment +echo "Starting REPPO production run with DMC..." +echo "Job ID: $SLURM_JOB_ID" +echo "Node: $SLURM_NODELIST" +echo "GPU: $CUDA_VISIBLE_DEVICES" + +# Environment name passed as variable +ENV_NAME=${ENV_NAME:-CartpoleBalance} +SEED=${SEED:-0} + +echo "Environment: $ENV_NAME" +echo "Seed: $SEED" + +# Run the experiment with full 50M steps +python reppo_alg/jaxrl/reppo.py \ + env=mjx_dmc \ + env.name=$ENV_NAME \ + hyperparameters.num_envs=1024 \ + hyperparameters.num_steps=128 \ + hyperparameters.num_mini_batches=128 \ + hyperparameters.num_epochs=4 \ + hyperparameters.total_time_steps=50000000 \ + seed=$SEED \ + wandb.mode=online \ + wandb.entity=$WANDB_ENTITY \ + wandb.project=$WANDB_PROJECT + +echo "Training completed!" \ No newline at end of file diff --git a/submit_dmc_experiments.py b/submit_dmc_experiments.py new file mode 100755 index 0000000..4923880 --- /dev/null +++ b/submit_dmc_experiments.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +""" +Submit DMC (mujoco_playground) experiments for REPPO +""" + +import subprocess +import time +import argparse + +# List of 23 DMC tasks from experiment plan +DMC_TASKS = [ + "AcrobotSwingup", + "CartpoleBalance", + "CartpoleSwingup", + "CheetahRun", + "FingerSpin", + "FingerTurnEasy", + "FingerTurnHard", + "FishUpright", + "FishSwim", + "HopperStand", + "HopperHop", + "HumanoidStand", + "HumanoidWalk", + "HumanoidRun", + "ManipulatorBringBall", + "PendulumSwingup", + "PointMassEasy", + "ReacherEasy", + "ReacherHard", + "SwimmerSwimmer6", + "SwimmerSwimmer15", + "WalkerStand", + "WalkerWalk", + "WalkerRun" +] + +def submit_job(env_name, seed=0): + """Submit a single DMC job""" + cmd = [ + "sbatch", + f"--job-name=reppo_dmc_{env_name}_seed{seed}", + "slurm/run_reppo_dmc_prod.sh" + ] + + env_vars = { + "ENV_NAME": env_name, + "SEED": str(seed) + } + + print(f"Submitting {env_name} (seed {seed})...") + + try: + result = subprocess.run(cmd, env={**subprocess.os.environ, **env_vars}, + capture_output=True, text=True, check=True) + job_id = result.stdout.strip().split()[-1] + print(f" -> Job ID: {job_id}") + return job_id + except subprocess.CalledProcessError as e: + print(f" -> Error: {e}") + print(f" -> Stdout: {e.stdout}") + print(f" -> Stderr: {e.stderr}") + return None + +def main(): + parser = argparse.ArgumentParser(description="Submit DMC experiments") + parser.add_argument("--seeds", type=int, default=5, help="Number of seeds to run") + parser.add_argument("--tasks", nargs="+", default=DMC_TASKS, + help="List of tasks to run") + parser.add_argument("--delay", type=float, default=1.0, + help="Delay between submissions (seconds)") + + args = parser.parse_args() + + print(f"Submitting {len(args.tasks)} DMC tasks with {args.seeds} seeds each") + print(f"Total jobs: {len(args.tasks) * args.seeds}") + print() + + job_ids = [] + + for task in args.tasks: + for seed in range(args.seeds): + job_id = submit_job(task, seed) + if job_id: + job_ids.append(job_id) + + # Add delay to avoid overwhelming the scheduler + time.sleep(args.delay) + + print(f"\nSubmitted {len(job_ids)} jobs successfully:") + for i, job_id in enumerate(job_ids): + print(f" {i+1}: {job_id}") + + print(f"\nMonitor with: squeue -u $USER") + print(f"Check logs in: logs/") + +if __name__ == "__main__": + main() \ No newline at end of file