From bda37869e121b4db19f584d81cf160ced7450abe Mon Sep 17 00:00:00 2001 From: "ys1087@partner.kit.edu" Date: Wed, 27 Aug 2025 21:06:44 +0200 Subject: [PATCH] Add remaining validation test scripts and D3IL installer - Additional robomimic fine-tuning tests: can, square - D3IL avoid_m2 and avoid_m3 validation scripts - D3IL installation script for SLURM - Add d3il_repo/ to gitignore - Comprehensive test coverage for all environment types --- .gitignore | 1 + install_d3il.sh | 34 +++++++++++++++++ .../dev_tests/test_d3il_avoid_m2_finetune.sh | 29 ++++++++++++++ .../dev_tests/test_d3il_avoid_m2_pretrain.sh | 29 ++++++++++++++ .../dev_tests/test_d3il_avoid_m3_pretrain.sh | 29 ++++++++++++++ .../dev_tests/test_robomimic_can_finetune.sh | 38 +++++++++++++++++++ .../test_robomimic_square_finetune.sh | 38 +++++++++++++++++++ 7 files changed, 198 insertions(+) create mode 100644 install_d3il.sh create mode 100644 slurm/dev_tests/test_d3il_avoid_m2_finetune.sh create mode 100644 slurm/dev_tests/test_d3il_avoid_m2_pretrain.sh create mode 100644 slurm/dev_tests/test_d3il_avoid_m3_pretrain.sh create mode 100644 slurm/dev_tests/test_robomimic_can_finetune.sh create mode 100644 slurm/dev_tests/test_robomimic_square_finetune.sh diff --git a/.gitignore b/.gitignore index f145a12..237d8bb 100644 --- a/.gitignore +++ b/.gitignore @@ -144,3 +144,4 @@ dmypy.json cfg/robomimic/*.sh *.out +d3il_repo/ diff --git a/install_d3il.sh b/install_d3il.sh new file mode 100644 index 0000000..56df416 --- /dev/null +++ b/install_d3il.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#SBATCH --job-name=dppo_d3il_install +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=4 +#SBATCH --time=00:20:00 +#SBATCH --mem=16G +#SBATCH --output=logs/dppo_d3il_install_%j.out +#SBATCH --error=logs/dppo_d3il_install_%j.err + +module load devel/cuda/12.4 + +echo "Installing D3IL for DPPO..." +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Install D3IL fork +echo "Cloning D3IL repository..." +git clone https://github.com/allenzren/d3il d3il_repo + +cd d3il_repo +echo "Installing d3il environments..." +pip install -e environments/d3il +pip install -e environments/d3il/envs/gym_avoiding_env/ + +echo "D3IL installation completed!" +cd $SLURM_SUBMIT_DIR + +# Test import +echo "Testing D3IL import..." +python -c "import d3il_benchmark; print('D3IL imported successfully!')" \ No newline at end of file diff --git a/slurm/dev_tests/test_d3il_avoid_m2_finetune.sh b/slurm/dev_tests/test_d3il_avoid_m2_finetune.sh new file mode 100644 index 0000000..8cde6cd --- /dev/null +++ b/slurm/dev_tests/test_d3il_avoid_m2_finetune.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --job-name=dppo_d3il_avoid_m2_ft +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=16G +#SBATCH --output=logs/dppo_d3il_avoid_m2_ft_%j.out +#SBATCH --error=logs/dppo_d3il_avoid_m2_ft_%j.err + +module load devel/cuda/12.4 + +# Environment variables +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +echo "Testing D3IL avoid_m2 fine-tuning..." +python script/run.py --config-name=ft_ppo_diffusion_mlp \ + --config-dir=cfg/d3il/finetune/avoid_m2 \ + train.n_train_itr=50 \ + train.save_model_freq=25 \ No newline at end of file diff --git a/slurm/dev_tests/test_d3il_avoid_m2_pretrain.sh b/slurm/dev_tests/test_d3il_avoid_m2_pretrain.sh new file mode 100644 index 0000000..ba33714 --- /dev/null +++ b/slurm/dev_tests/test_d3il_avoid_m2_pretrain.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --job-name=dppo_d3il_avoid_m2_pre +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=16G +#SBATCH --output=logs/dppo_d3il_avoid_m2_pre_%j.out +#SBATCH --error=logs/dppo_d3il_avoid_m2_pre_%j.err + +module load devel/cuda/12.4 + +# Environment variables +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +echo "Testing D3IL avoid_m2 pre-training..." +python script/run.py --config-name=pre_diffusion_mlp \ + --config-dir=cfg/d3il/pretrain/avoid_m2 \ + train.n_epochs=50 \ + train.save_model_freq=25 \ No newline at end of file diff --git a/slurm/dev_tests/test_d3il_avoid_m3_pretrain.sh b/slurm/dev_tests/test_d3il_avoid_m3_pretrain.sh new file mode 100644 index 0000000..4e617fa --- /dev/null +++ b/slurm/dev_tests/test_d3il_avoid_m3_pretrain.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --job-name=dppo_d3il_avoid_m3_pre +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=16G +#SBATCH --output=logs/dppo_d3il_avoid_m3_pre_%j.out +#SBATCH --error=logs/dppo_d3il_avoid_m3_pre_%j.err + +module load devel/cuda/12.4 + +# Environment variables +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +echo "Testing D3IL avoid_m3 pre-training..." +python script/run.py --config-name=pre_diffusion_mlp \ + --config-dir=cfg/d3il/pretrain/avoid_m3 \ + train.n_epochs=50 \ + train.save_model_freq=25 \ No newline at end of file diff --git a/slurm/dev_tests/test_robomimic_can_finetune.sh b/slurm/dev_tests/test_robomimic_can_finetune.sh new file mode 100644 index 0000000..bb7f1e3 --- /dev/null +++ b/slurm/dev_tests/test_robomimic_can_finetune.sh @@ -0,0 +1,38 @@ +#!/bin/bash +#SBATCH --job-name=dppo_can_ft +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_can_ft_%j.out +#SBATCH --error=logs/dppo_can_ft_%j.err + +module load devel/cuda/12.4 + +# MuJoCo environment for fine-tuning +export MUJOCO_PY_MUJOCO_PATH=$HOME/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +# Environment variables +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Apply HoReKa MuJoCo compilation fix +echo "Applying HoReKa MuJoCo compilation fix..." +python -c "exec(open('fix_mujoco_compilation.py').read()); apply_mujoco_fix(); import mujoco_py; print('MuJoCo ready!')" + +echo "Testing robomimic can fine-tuning..." +python script/run.py --config-name=ft_ppo_diffusion_mlp \ + --config-dir=cfg/robomimic/finetune/can \ + train.n_train_itr=50 \ + train.save_model_freq=25 \ No newline at end of file diff --git a/slurm/dev_tests/test_robomimic_square_finetune.sh b/slurm/dev_tests/test_robomimic_square_finetune.sh new file mode 100644 index 0000000..4aa3107 --- /dev/null +++ b/slurm/dev_tests/test_robomimic_square_finetune.sh @@ -0,0 +1,38 @@ +#!/bin/bash +#SBATCH --job-name=dppo_square_ft +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_square_ft_%j.out +#SBATCH --error=logs/dppo_square_ft_%j.err + +module load devel/cuda/12.4 + +# MuJoCo environment for fine-tuning +export MUJOCO_PY_MUJOCO_PATH=$HOME/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +# Environment variables +export WANDB_MODE=online +export DPPO_WANDB_ENTITY=${DPPO_WANDB_ENTITY:-"dominik_roth"} +export DPPO_DATA_DIR=${DPPO_DATA_DIR:-$SLURM_SUBMIT_DIR/data} +export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Apply HoReKa MuJoCo compilation fix +echo "Applying HoReKa MuJoCo compilation fix..." +python -c "exec(open('fix_mujoco_compilation.py').read()); apply_mujoco_fix(); import mujoco_py; print('MuJoCo ready!')" + +echo "Testing robomimic square fine-tuning..." +python script/run.py --config-name=ft_ppo_diffusion_mlp \ + --config-dir=cfg/robomimic/finetune/square \ + train.n_train_itr=50 \ + train.save_model_freq=25 \ No newline at end of file