seperate dmc setup...
This commit is contained in:
parent
30df18239a
commit
a02e258f1c
55
slurm/run_reppo_dmc_dev.sh
Executable file
55
slurm/run_reppo_dmc_dev.sh
Executable file
@ -0,0 +1,55 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH --job-name=reppo_dmc_dev
|
||||||
|
#SBATCH --account=hk-project-p0022232
|
||||||
|
#SBATCH --partition=dev_accelerated
|
||||||
|
#SBATCH --gres=gpu:1
|
||||||
|
#SBATCH --nodes=1
|
||||||
|
#SBATCH --ntasks-per-node=1
|
||||||
|
#SBATCH --cpus-per-task=4
|
||||||
|
#SBATCH --time=00:30:00
|
||||||
|
#SBATCH --mem=16G
|
||||||
|
#SBATCH --output=logs/reppo_dmc_dev_%j.out
|
||||||
|
#SBATCH --error=logs/reppo_dmc_dev_%j.err
|
||||||
|
|
||||||
|
# Load required modules
|
||||||
|
module load devel/cuda/12.4
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
export WANDB_MODE=online
|
||||||
|
export WANDB_PROJECT=reppo_dmc_dev
|
||||||
|
export WANDB_API_KEY=01fbfaf5e2f64bedd68febedfcaa7e3bbd54952c
|
||||||
|
export WANDB_ENTITY=dominik_roth
|
||||||
|
|
||||||
|
# Change to project directory
|
||||||
|
cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo
|
||||||
|
|
||||||
|
# Activate virtual environment
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# Run DMC (mujoco_playground) test
|
||||||
|
echo "Starting REPPO dev test with DMC..."
|
||||||
|
echo "Job ID: $SLURM_JOB_ID"
|
||||||
|
echo "Node: $SLURM_NODELIST"
|
||||||
|
echo "GPU: $CUDA_VISIBLE_DEVICES"
|
||||||
|
|
||||||
|
# Default to CartpoleBalance for quick test
|
||||||
|
ENV_NAME=${ENV_NAME:-CartpoleBalance}
|
||||||
|
EXPERIMENT_TYPE=${EXPERIMENT_TYPE:-mjx_dmc_small_data}
|
||||||
|
|
||||||
|
echo "Environment: $ENV_NAME"
|
||||||
|
echo "Experiment type: $EXPERIMENT_TYPE"
|
||||||
|
|
||||||
|
# Run the experiment with mjx_dmc environment
|
||||||
|
python reppo_alg/jaxrl/reppo.py \
|
||||||
|
env=mjx_dmc \
|
||||||
|
env.name=$ENV_NAME \
|
||||||
|
hyperparameters.num_envs=256 \
|
||||||
|
hyperparameters.num_steps=32 \
|
||||||
|
hyperparameters.num_mini_batches=8 \
|
||||||
|
hyperparameters.num_epochs=4 \
|
||||||
|
hyperparameters.total_time_steps=1000000 \
|
||||||
|
wandb.mode=online \
|
||||||
|
wandb.entity=$WANDB_ENTITY \
|
||||||
|
wandb.project=$WANDB_PROJECT
|
||||||
|
|
||||||
|
echo "DMC dev test completed!"
|
56
slurm/run_reppo_dmc_prod.sh
Executable file
56
slurm/run_reppo_dmc_prod.sh
Executable file
@ -0,0 +1,56 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH --job-name=reppo_dmc_prod
|
||||||
|
#SBATCH --account=hk-project-p0022232
|
||||||
|
#SBATCH --partition=accelerated
|
||||||
|
#SBATCH --gres=gpu:1
|
||||||
|
#SBATCH --nodes=1
|
||||||
|
#SBATCH --ntasks-per-node=1
|
||||||
|
#SBATCH --cpus-per-task=8
|
||||||
|
#SBATCH --time=24:00:00
|
||||||
|
#SBATCH --mem=32G
|
||||||
|
#SBATCH --output=logs/reppo_dmc_prod_%j.out
|
||||||
|
#SBATCH --error=logs/reppo_dmc_prod_%j.err
|
||||||
|
|
||||||
|
# Load required modules
|
||||||
|
module load devel/cuda/12.4
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
export WANDB_MODE=online
|
||||||
|
export WANDB_PROJECT=reppo_dmc_production
|
||||||
|
export WANDB_API_KEY=01fbfaf5e2f64bedd68febedfcaa7e3bbd54952c
|
||||||
|
export WANDB_ENTITY=dominik_roth
|
||||||
|
|
||||||
|
# Change to project directory
|
||||||
|
cd /hkfs/home/project/hk-project-robolear/ys1087/Projects/reppo
|
||||||
|
|
||||||
|
# Activate virtual environment
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# Run DMC experiment
|
||||||
|
echo "Starting REPPO production run with DMC..."
|
||||||
|
echo "Job ID: $SLURM_JOB_ID"
|
||||||
|
echo "Node: $SLURM_NODELIST"
|
||||||
|
echo "GPU: $CUDA_VISIBLE_DEVICES"
|
||||||
|
|
||||||
|
# Environment name passed as variable
|
||||||
|
ENV_NAME=${ENV_NAME:-CartpoleBalance}
|
||||||
|
SEED=${SEED:-0}
|
||||||
|
|
||||||
|
echo "Environment: $ENV_NAME"
|
||||||
|
echo "Seed: $SEED"
|
||||||
|
|
||||||
|
# Run the experiment with full 50M steps
|
||||||
|
python reppo_alg/jaxrl/reppo.py \
|
||||||
|
env=mjx_dmc \
|
||||||
|
env.name=$ENV_NAME \
|
||||||
|
hyperparameters.num_envs=1024 \
|
||||||
|
hyperparameters.num_steps=128 \
|
||||||
|
hyperparameters.num_mini_batches=128 \
|
||||||
|
hyperparameters.num_epochs=4 \
|
||||||
|
hyperparameters.total_time_steps=50000000 \
|
||||||
|
seed=$SEED \
|
||||||
|
wandb.mode=online \
|
||||||
|
wandb.entity=$WANDB_ENTITY \
|
||||||
|
wandb.project=$WANDB_PROJECT
|
||||||
|
|
||||||
|
echo "Training completed!"
|
98
submit_dmc_experiments.py
Executable file
98
submit_dmc_experiments.py
Executable file
@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Submit DMC (mujoco_playground) experiments for REPPO
|
||||||
|
"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# List of 23 DMC tasks from experiment plan
|
||||||
|
DMC_TASKS = [
|
||||||
|
"AcrobotSwingup",
|
||||||
|
"CartpoleBalance",
|
||||||
|
"CartpoleSwingup",
|
||||||
|
"CheetahRun",
|
||||||
|
"FingerSpin",
|
||||||
|
"FingerTurnEasy",
|
||||||
|
"FingerTurnHard",
|
||||||
|
"FishUpright",
|
||||||
|
"FishSwim",
|
||||||
|
"HopperStand",
|
||||||
|
"HopperHop",
|
||||||
|
"HumanoidStand",
|
||||||
|
"HumanoidWalk",
|
||||||
|
"HumanoidRun",
|
||||||
|
"ManipulatorBringBall",
|
||||||
|
"PendulumSwingup",
|
||||||
|
"PointMassEasy",
|
||||||
|
"ReacherEasy",
|
||||||
|
"ReacherHard",
|
||||||
|
"SwimmerSwimmer6",
|
||||||
|
"SwimmerSwimmer15",
|
||||||
|
"WalkerStand",
|
||||||
|
"WalkerWalk",
|
||||||
|
"WalkerRun"
|
||||||
|
]
|
||||||
|
|
||||||
|
def submit_job(env_name, seed=0):
|
||||||
|
"""Submit a single DMC job"""
|
||||||
|
cmd = [
|
||||||
|
"sbatch",
|
||||||
|
f"--job-name=reppo_dmc_{env_name}_seed{seed}",
|
||||||
|
"slurm/run_reppo_dmc_prod.sh"
|
||||||
|
]
|
||||||
|
|
||||||
|
env_vars = {
|
||||||
|
"ENV_NAME": env_name,
|
||||||
|
"SEED": str(seed)
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"Submitting {env_name} (seed {seed})...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, env={**subprocess.os.environ, **env_vars},
|
||||||
|
capture_output=True, text=True, check=True)
|
||||||
|
job_id = result.stdout.strip().split()[-1]
|
||||||
|
print(f" -> Job ID: {job_id}")
|
||||||
|
return job_id
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f" -> Error: {e}")
|
||||||
|
print(f" -> Stdout: {e.stdout}")
|
||||||
|
print(f" -> Stderr: {e.stderr}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Submit DMC experiments")
|
||||||
|
parser.add_argument("--seeds", type=int, default=5, help="Number of seeds to run")
|
||||||
|
parser.add_argument("--tasks", nargs="+", default=DMC_TASKS,
|
||||||
|
help="List of tasks to run")
|
||||||
|
parser.add_argument("--delay", type=float, default=1.0,
|
||||||
|
help="Delay between submissions (seconds)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"Submitting {len(args.tasks)} DMC tasks with {args.seeds} seeds each")
|
||||||
|
print(f"Total jobs: {len(args.tasks) * args.seeds}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
job_ids = []
|
||||||
|
|
||||||
|
for task in args.tasks:
|
||||||
|
for seed in range(args.seeds):
|
||||||
|
job_id = submit_job(task, seed)
|
||||||
|
if job_id:
|
||||||
|
job_ids.append(job_id)
|
||||||
|
|
||||||
|
# Add delay to avoid overwhelming the scheduler
|
||||||
|
time.sleep(args.delay)
|
||||||
|
|
||||||
|
print(f"\nSubmitted {len(job_ids)} jobs successfully:")
|
||||||
|
for i, job_id in enumerate(job_ids):
|
||||||
|
print(f" {i+1}: {job_id}")
|
||||||
|
|
||||||
|
print(f"\nMonitor with: squeue -u $USER")
|
||||||
|
print(f"Check logs in: logs/")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user