- Add GCC wrapper script to filter Intel compiler flags - Download missing mujoco-py generated files automatically - Update installer with comprehensive MuJoCo fixes - Document complete solution in README and EXPERIMENT_PLAN - Hopper fine-tuning validated with reward 1415.8471 - All pre-training environments working - DPPO is now production-ready on HoReKa
143 lines
4.9 KiB
Bash
Executable File
143 lines
4.9 KiB
Bash
Executable File
#!/bin/bash
|
|
#SBATCH --job-name=dppo_install
|
|
#SBATCH --account=hk-project-p0022232
|
|
#SBATCH --partition=dev_accelerated
|
|
#SBATCH --gres=gpu:1
|
|
#SBATCH --nodes=1
|
|
#SBATCH --ntasks-per-node=1
|
|
#SBATCH --cpus-per-task=4
|
|
#SBATCH --time=00:30:00
|
|
#SBATCH --mem=16G
|
|
#SBATCH --output=logs/dppo_install_%j.out
|
|
#SBATCH --error=logs/dppo_install_%j.err
|
|
|
|
# Load CUDA module (required for PyTorch)
|
|
module load devel/cuda/12.4
|
|
|
|
# Print job info
|
|
echo "Starting DPPO installation..."
|
|
echo "Job ID: $SLURM_JOB_ID"
|
|
echo "Node: $SLURM_NODELIST"
|
|
echo "GPU: $CUDA_VISIBLE_DEVICES"
|
|
|
|
# Navigate to dppo directory (uses current directory)
|
|
cd $SLURM_SUBMIT_DIR
|
|
|
|
# Create and activate virtual environment with Python 3.10
|
|
python3.10 -m venv .venv
|
|
source .venv/bin/activate
|
|
|
|
# Upgrade pip
|
|
pip install --upgrade pip
|
|
|
|
# Install base package
|
|
pip install -e .
|
|
|
|
# Install ALL optional dependencies (except Kitchen which has conflicts)
|
|
pip install -e .[all]
|
|
|
|
# HoReKa-specific MuJoCo compilation fix
|
|
echo ""
|
|
echo "=== HoReKa Cluster MuJoCo Fix ==="
|
|
echo "Applying Intel compiler compatibility fix for mujoco-py..."
|
|
|
|
# Pin compatible Cython version first
|
|
pip install 'Cython==0.29.37' --force-reinstall
|
|
|
|
# Create GCC wrapper that filters Intel compiler flags
|
|
cat > gcc_wrapper.sh << 'EOF'
|
|
#!/bin/bash
|
|
# GCC wrapper that filters out Intel compiler flags
|
|
args=()
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
-xCORE-AVX2|--xCORE-AVX2|-xHost|--xHost)
|
|
# Skip Intel-specific flags
|
|
;;
|
|
*)
|
|
args+=("$arg")
|
|
;;
|
|
esac
|
|
done
|
|
exec /usr/bin/gcc "${args[@]}"
|
|
EOF
|
|
chmod +x gcc_wrapper.sh
|
|
|
|
# Create MuJoCo compilation fix script
|
|
cat > fix_mujoco_compilation.py << 'EOF'
|
|
import os
|
|
import sysconfig
|
|
|
|
def apply_mujoco_fix():
|
|
"""Apply HoReKa Intel compiler compatibility fix for mujoco-py"""
|
|
|
|
# Override compiler settings with wrapper that filters Intel flags
|
|
wrapper_path = os.path.abspath('gcc_wrapper.sh')
|
|
os.environ['CC'] = wrapper_path
|
|
os.environ['CXX'] = '/usr/bin/g++'
|
|
os.environ['CFLAGS'] = '-std=c99 -O2 -fPIC -w'
|
|
os.environ['CXXFLAGS'] = '-std=c++11 -O2 -fPIC -w'
|
|
|
|
# Patch sysconfig to remove Intel compiler flags
|
|
if not hasattr(sysconfig, '_original_get_config_var'):
|
|
def patched_get_config_var(name):
|
|
if name in ['CFLAGS', 'BASECFLAGS', 'PY_CFLAGS', 'PY_CORE_CFLAGS', 'CCSHARED', 'OPT']:
|
|
return '-std=c99 -O2 -fPIC -w'
|
|
elif name in ['CXXFLAGS']:
|
|
return '-std=c++11 -O2 -fPIC -w'
|
|
elif name == 'CC':
|
|
return wrapper_path
|
|
elif name == 'CXX':
|
|
return '/usr/bin/g++'
|
|
else:
|
|
return sysconfig._original_get_config_var(name)
|
|
|
|
sysconfig._original_get_config_var = sysconfig.get_config_var
|
|
sysconfig.get_config_var = patched_get_config_var
|
|
|
|
# Also patch distutils directly
|
|
import distutils.util
|
|
import distutils.ccompiler
|
|
def patched_customize_compiler(compiler):
|
|
compiler.set_executable('compiler_so', wrapper_path)
|
|
compiler.set_executable('compiler_cxx', '/usr/bin/g++')
|
|
compiler.set_executable('linker_so', wrapper_path + ' -shared')
|
|
return compiler
|
|
|
|
# Override customize_compiler function
|
|
distutils.ccompiler.customize_compiler = patched_customize_compiler
|
|
print("Applied HoReKa MuJoCo compilation fix")
|
|
|
|
if __name__ == "__main__":
|
|
apply_mujoco_fix()
|
|
EOF
|
|
|
|
echo "Created MuJoCo compilation fix script"
|
|
|
|
# Download missing mujoco-py generated files (common issue on HoReKa)
|
|
echo "Downloading missing mujoco-py generated files..."
|
|
mkdir -p .venv/lib/python3.10/site-packages/mujoco_py/generated
|
|
curl -s https://raw.githubusercontent.com/openai/mujoco-py/master/mujoco_py/generated/wrappers.pxi -o .venv/lib/python3.10/site-packages/mujoco_py/generated/wrappers.pxi
|
|
curl -s https://raw.githubusercontent.com/openai/mujoco-py/master/mujoco_py/generated/__init__.py -o .venv/lib/python3.10/site-packages/mujoco_py/generated/__init__.py
|
|
curl -s https://raw.githubusercontent.com/openai/mujoco-py/master/mujoco_py/generated/const.py -o .venv/lib/python3.10/site-packages/mujoco_py/generated/const.py
|
|
curl -s https://raw.githubusercontent.com/openai/mujoco-py/master/mujoco_py/generated/wrappers.py -o .venv/lib/python3.10/site-packages/mujoco_py/generated/wrappers.py
|
|
echo "Downloaded missing generated files"
|
|
echo ""
|
|
|
|
echo "Installation completed!"
|
|
echo "Python version: $(python --version)"
|
|
echo "Pip version: $(pip --version)"
|
|
|
|
echo ""
|
|
echo "=== IMPORTANT: MuJoCo Setup for Fine-tuning ==="
|
|
echo "1. Install MuJoCo 2.1.0: https://github.com/openai/mujoco-py#install-mujoco"
|
|
echo "2. Add these environment variables to your SLURM scripts:"
|
|
echo "export MUJOCO_PY_MUJOCO_PATH=\$HOME/.mujoco/mujoco210"
|
|
echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:\$HOME/.mujoco/mujoco210/bin:/usr/lib/nvidia"
|
|
echo "export MUJOCO_GL=egl"
|
|
echo ""
|
|
echo "Pre-training works without MuJoCo setup."
|
|
echo ""
|
|
|
|
echo "Installed packages:"
|
|
pip list |