From 826b55a2d2260604e39c815cdf0ae5772eea7589 Mon Sep 17 00:00:00 2001 From: "ys1087@partner.kit.edu" Date: Wed, 27 Aug 2025 16:09:13 +0200 Subject: [PATCH] Integrate HoReKa Intel compiler fix for mujoco-py - Add HoReKa-specific MuJoCo compilation fix to install script - Pin compatible Cython version (0.29.37) - Create fix_mujoco_compilation.py helper script - Document Intel compiler override in README - Update test script to use integrated fix - Addresses Intel OneAPI compiler flag incompatibility with GCC --- README.md | 12 +++ fix_mujoco_env.py | 37 --------- install_dppo.sh | 47 +++++++++++ slurm/dev_tests/test_hopper_finetune_v2.sh | 6 +- slurm/dev_tests/test_mujoco_clean.sh | 53 +++++++++++++ slurm/dev_tests/test_mujoco_cython_fix.sh | 47 +++++++++++ slurm/dev_tests/test_mujoco_final.sh | 50 ++++++++++++ slurm/dev_tests/test_mujoco_intel_compat.sh | 46 +++++++++++ slurm/dev_tests/test_mujoco_isolated.sh | 57 +++++++++++++ slurm/dev_tests/test_mujoco_override.sh | 88 +++++++++++++++++++++ slurm/dev_tests/test_mujoco_success.sh | 70 ++++++++++++++++ 11 files changed, 475 insertions(+), 38 deletions(-) delete mode 100644 fix_mujoco_env.py create mode 100644 slurm/dev_tests/test_mujoco_clean.sh create mode 100644 slurm/dev_tests/test_mujoco_cython_fix.sh create mode 100644 slurm/dev_tests/test_mujoco_final.sh create mode 100644 slurm/dev_tests/test_mujoco_intel_compat.sh create mode 100644 slurm/dev_tests/test_mujoco_isolated.sh create mode 100644 slurm/dev_tests/test_mujoco_override.sh create mode 100644 slurm/dev_tests/test_mujoco_success.sh diff --git a/README.md b/README.md index edf2977..509d28d 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,18 @@ The DPPO repository has been adapted to run on the HoReKa cluster. The original export MUJOCO_GL=egl ``` + c) **HoReKa Intel Compiler Fix**: Due to Intel OneAPI on HoReKa, mujoco-py compilation may fail. Use the provided fix: + ```python + # In your Python scripts that use MuJoCo, import this first: + exec(open('fix_mujoco_compilation.py').read()) + apply_mujoco_fix() + + # Then import mujoco_py normally + import mujoco_py + ``` + + The fix overrides Intel compiler flags to use GCC for mujoco-py compilation. This is automatically included in the installation process. + ### Running on HoReKa The repository includes pre-configured SLURM scripts for job submission: diff --git a/fix_mujoco_env.py b/fix_mujoco_env.py deleted file mode 100644 index e8249a8..0000000 --- a/fix_mujoco_env.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 -import os -import subprocess -import sys - -# Set up clean compilation environment -os.environ['CC'] = '/usr/bin/gcc' -os.environ['CXX'] = '/usr/bin/g++' - -# Unset Intel compiler variables that may interfere -for var in ['ICC_PATH', 'INTEL_COMPILER_HOME', 'INTEL_LICENSE_FILE']: - if var in os.environ: - del os.environ[var] - -# Set MuJoCo environment -os.environ['MUJOCO_PY_MUJOCO_PATH'] = '/home/hk-project-robolear/ys1087/.mujoco/mujoco210' -os.environ['LD_LIBRARY_PATH'] = os.environ.get('LD_LIBRARY_PATH', '') + ':/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia' -os.environ['MUJOCO_GL'] = 'egl' - -# Clear any cached compilation -import shutil -cache_dir = '.venv/lib/python3.10/site-packages/mujoco_py/generated' -if os.path.exists(cache_dir): - shutil.rmtree(cache_dir) - print(f"Cleared cache: {cache_dir}") - -print("Environment configured for GCC compilation") -print(f"CC: {os.environ.get('CC')}") -print(f"CXX: {os.environ.get('CXX')}") - -# Test import -try: - import mujoco_py - print("SUCCESS: mujoco_py imported successfully!") -except Exception as e: - print(f"FAILED: {e}") - sys.exit(1) \ No newline at end of file diff --git a/install_dppo.sh b/install_dppo.sh index 870a480..3cf3ac5 100755 --- a/install_dppo.sh +++ b/install_dppo.sh @@ -36,6 +36,53 @@ pip install -e . # Install ALL optional dependencies (except Kitchen which has conflicts) pip install -e .[all] +# HoReKa-specific MuJoCo compilation fix +echo "" +echo "=== HoReKa Cluster MuJoCo Fix ===" +echo "Applying Intel compiler compatibility fix for mujoco-py..." + +# Pin compatible Cython version first +pip install 'Cython==0.29.37' --force-reinstall + +# Create MuJoCo compilation fix script +cat > fix_mujoco_compilation.py << 'EOF' +import os +import sysconfig + +def apply_mujoco_fix(): + """Apply HoReKa Intel compiler compatibility fix for mujoco-py""" + + # Override compiler settings + os.environ['CC'] = '/usr/bin/gcc' + os.environ['CXX'] = '/usr/bin/g++' + os.environ['CFLAGS'] = '-std=c99 -O2 -fPIC -w' + os.environ['CXXFLAGS'] = '-std=c++11 -O2 -fPIC -w' + + # Patch sysconfig to remove Intel compiler flags + if not hasattr(sysconfig, '_original_get_config_var'): + def patched_get_config_var(name): + if name in ['CFLAGS', 'BASECFLAGS', 'PY_CFLAGS', 'PY_CORE_CFLAGS', 'CCSHARED']: + return '-std=c99 -O2 -fPIC -w' + elif name in ['CXXFLAGS']: + return '-std=c++11 -O2 -fPIC -w' + elif name == 'CC': + return '/usr/bin/gcc' + elif name == 'CXX': + return '/usr/bin/g++' + else: + return sysconfig._original_get_config_var(name) + + sysconfig._original_get_config_var = sysconfig.get_config_var + sysconfig.get_config_var = patched_get_config_var + print("Applied HoReKa MuJoCo compilation fix") + +if __name__ == "__main__": + apply_mujoco_fix() +EOF + +echo "Created MuJoCo compilation fix script" +echo "" + echo "Installation completed!" echo "Python version: $(python --version)" echo "Pip version: $(pip --version)" diff --git a/slurm/dev_tests/test_hopper_finetune_v2.sh b/slurm/dev_tests/test_hopper_finetune_v2.sh index 5f116e2..23c5ec9 100644 --- a/slurm/dev_tests/test_hopper_finetune_v2.sh +++ b/slurm/dev_tests/test_hopper_finetune_v2.sh @@ -32,7 +32,11 @@ export DPPO_LOG_DIR=${DPPO_LOG_DIR:-$SLURM_SUBMIT_DIR/log} cd $SLURM_SUBMIT_DIR source .venv/bin/activate -echo "Testing hopper finetune v2 with stdio.h fix and cleared cache..." +# Apply HoReKa MuJoCo compilation fix +echo "Applying HoReKa MuJoCo compilation fix..." +python -c "exec(open('fix_mujoco_compilation.py').read()); apply_mujoco_fix(); import mujoco_py; print('MuJoCo ready!')" + +echo "Testing hopper finetune v2 with HoReKa MuJoCo fix..." python script/run.py --config-name=ft_ppo_diffusion_mlp \ --config-dir=cfg/gym/finetune/hopper-v2 \ train.n_train_itr=10 \ diff --git a/slurm/dev_tests/test_mujoco_clean.sh b/slurm/dev_tests/test_mujoco_clean.sh new file mode 100644 index 0000000..a47f6b9 --- /dev/null +++ b/slurm/dev_tests/test_mujoco_clean.sh @@ -0,0 +1,53 @@ +#!/bin/bash +#SBATCH --job-name=dppo_mujoco_clean +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_mujoco_clean_%j.out +#SBATCH --error=logs/dppo_mujoco_clean_%j.err + +# Load only CUDA, avoid Intel modules completely +module purge +module load devel/cuda/12.4 + +# Clean environment - remove any Intel compiler variables +unset CFLAGS +unset CXXFLAGS +unset LDFLAGS +unset CC +unset CXX + +# Force pure GCC environment +export CC=/usr/bin/gcc +export CXX=/usr/bin/g++ +export CFLAGS="-std=c99" +export CXXFLAGS="-std=c++11" + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Complete mujoco-py cleanup +echo "Removing all mujoco-py build artifacts..." +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/generated/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/.eggs/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/build/ +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.so" -delete +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.pyc" -delete + +# Fresh mujoco-py install +echo "Reinstalling mujoco-py with clean GCC environment..." +pip uninstall mujoco-py -y +pip install 'mujoco-py<2.2,>=2.1' + +# MuJoCo environment +export MUJOCO_PY_MUJOCO_PATH=/home/hk-project-robolear/ys1087/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +echo "Testing mujoco-py import with clean environment..." +python -c "import mujoco_py; print('SUCCESS: mujoco_py works with clean GCC compilation!')" \ No newline at end of file diff --git a/slurm/dev_tests/test_mujoco_cython_fix.sh b/slurm/dev_tests/test_mujoco_cython_fix.sh new file mode 100644 index 0000000..f52fcf0 --- /dev/null +++ b/slurm/dev_tests/test_mujoco_cython_fix.sh @@ -0,0 +1,47 @@ +#!/bin/bash +#SBATCH --job-name=dppo_mujoco_cython +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_mujoco_cython_%j.out +#SBATCH --error=logs/dppo_mujoco_cython_%j.err + +# Load Intel OneAPI for Python support but use GCC for compilation +module load devel/cuda/12.4 + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Force GCC compilation +export CC=/usr/bin/gcc +export CXX=/usr/bin/g++ +export CFLAGS="-std=c99 -O2 -fPIC" +export CXXFLAGS="-std=c++11 -O2 -fPIC" + +# Clean mujoco-py completely +echo "Removing all mujoco-py build artifacts..." +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/generated/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/.eggs/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/build/ +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.so" -delete +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.pyc" -delete + +echo "Downgrading Cython to compatible version..." +pip install 'Cython<3.0,>=0.29.20' --force-reinstall + +echo "Reinstalling mujoco-py with compatible Cython..." +pip uninstall mujoco-py -y +pip install 'mujoco-py<2.2,>=2.1' --no-cache-dir --force-reinstall + +# MuJoCo environment +export MUJOCO_PY_MUJOCO_PATH=/home/hk-project-robolear/ys1087/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +echo "Testing mujoco-py import with compatible Cython..." +python -c "import mujoco_py; print('SUCCESS: mujoco_py compiled with compatible Cython and GCC!')" \ No newline at end of file diff --git a/slurm/dev_tests/test_mujoco_final.sh b/slurm/dev_tests/test_mujoco_final.sh new file mode 100644 index 0000000..7a4da1f --- /dev/null +++ b/slurm/dev_tests/test_mujoco_final.sh @@ -0,0 +1,50 @@ +#!/bin/bash +#SBATCH --job-name=dppo_mujoco_final +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_mujoco_final_%j.out +#SBATCH --error=logs/dppo_mujoco_final_%j.err + +# Load Intel OneAPI for Python support but use GCC for compilation +module load devel/cuda/12.4 + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Force GCC compilation +export CC=/usr/bin/gcc +export CXX=/usr/bin/g++ +export CFLAGS="-std=c99 -O2 -fPIC" +export CXXFLAGS="-std=c++11 -O2 -fPIC" + +# Clean mujoco-py completely +echo "Removing all mujoco-py build artifacts..." +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/generated/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/.eggs/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/build/ +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.so" -delete +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.pyc" -delete + +echo "Installing exact compatible versions..." +pip uninstall mujoco-py cython -y + +# Install compatible Cython first and pin it +pip install 'Cython==0.29.37' + +# Install mujoco-py without allowing dependency upgrades +pip install 'mujoco-py<2.2,>=2.1' --no-deps +pip install 'glfw>=1.4.0' 'numpy>=1.11,<2.0' 'imageio>=2.1.2' 'cffi>=1.10' 'fasteners~=0.15' + +# MuJoCo environment +export MUJOCO_PY_MUJOCO_PATH=/home/hk-project-robolear/ys1087/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +echo "Testing mujoco-py import with pinned compatible versions..." +python -c "import mujoco_py; print('SUCCESS: mujoco_py compiled successfully!')" \ No newline at end of file diff --git a/slurm/dev_tests/test_mujoco_intel_compat.sh b/slurm/dev_tests/test_mujoco_intel_compat.sh new file mode 100644 index 0000000..4d8f39a --- /dev/null +++ b/slurm/dev_tests/test_mujoco_intel_compat.sh @@ -0,0 +1,46 @@ +#!/bin/bash +#SBATCH --job-name=dppo_mujoco_intel +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_mujoco_intel_%j.out +#SBATCH --error=logs/dppo_mujoco_intel_%j.err + +# Load Intel OneAPI properly with compatibility mode +module load devel/cuda/12.4 + +# Force compatibility mode for Intel compiler +export CFLAGS="-std=c99 -w" +export CXXFLAGS="-std=c++11 -w" +export CC=gcc +export CXX=g++ + +# Completely remove any cached builds +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Remove ALL mujoco_py cache and build artifacts +echo "Removing all mujoco-py build artifacts..." +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/generated/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/.eggs/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/build/ +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.so" -delete +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.pyc" -delete + +# Reinstall mujoco-py from scratch +echo "Reinstalling mujoco-py..." +pip uninstall mujoco-py -y +pip install 'mujoco-py<2.2,>=2.1' + +# Set MuJoCo environment +export MUJOCO_PY_MUJOCO_PATH=/home/hk-project-robolear/ys1087/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +echo "Testing mujoco-py import after fresh install..." +python -c "import mujoco_py; print('SUCCESS: mujoco_py works!')" \ No newline at end of file diff --git a/slurm/dev_tests/test_mujoco_isolated.sh b/slurm/dev_tests/test_mujoco_isolated.sh new file mode 100644 index 0000000..bb6079a --- /dev/null +++ b/slurm/dev_tests/test_mujoco_isolated.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=dppo_mujoco_isolated +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_mujoco_isolated_%j.out +#SBATCH --error=logs/dppo_mujoco_isolated_%j.err + +# Keep Intel OneAPI for Python but isolate compilation +module load devel/cuda/12.4 +module load compiler/intel/2023.2.1 + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Override all compiler settings to force pure GCC +export CC=/usr/bin/gcc +export CXX=/usr/bin/g++ + +# Clear all Intel compiler flags +unset CFLAGS +unset CXXFLAGS +unset FFLAGS +unset LDFLAGS + +# Set clean GCC-compatible flags +export CFLAGS="-std=c99 -O2 -fPIC" +export CXXFLAGS="-std=c++11 -O2 -fPIC" + +# Clean mujoco-py completely +echo "Removing all mujoco-py build artifacts..." +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/generated/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/.eggs/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/build/ +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.so" -delete +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.pyc" -delete + +# Force clean distutils cache +rm -rf ~/.cache/pip/ +python -c "import distutils.util; import shutil; shutil.rmtree(distutils.util.get_platform(), ignore_errors=True)" 2>/dev/null || true + +echo "Reinstalling mujoco-py with isolated GCC compilation..." +pip uninstall mujoco-py -y +pip install 'mujoco-py<2.2,>=2.1' --no-cache-dir --force-reinstall + +# MuJoCo environment +export MUJOCO_PY_MUJOCO_PATH=/home/hk-project-robolear/ys1087/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +echo "Testing mujoco-py import with isolated GCC compilation..." +python -c "import mujoco_py; print('SUCCESS: mujoco_py compiled with isolated GCC!')" \ No newline at end of file diff --git a/slurm/dev_tests/test_mujoco_override.sh b/slurm/dev_tests/test_mujoco_override.sh new file mode 100644 index 0000000..e2da150 --- /dev/null +++ b/slurm/dev_tests/test_mujoco_override.sh @@ -0,0 +1,88 @@ +#!/bin/bash +#SBATCH --job-name=dppo_mujoco_override +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_mujoco_override_%j.out +#SBATCH --error=logs/dppo_mujoco_override_%j.err + +# Load only CUDA for OpenGL support +module load devel/cuda/12.4 + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Override ALL compiler settings completely +export CC=/usr/bin/gcc +export CXX=/usr/bin/g++ +export CPP=/usr/bin/cpp + +# Clear ALL Intel-specific environment variables +unset CFLAGS +unset CXXFLAGS +unset FFLAGS +unset LDFLAGS +unset OPT + +# Set clean GCC-only flags that override everything +export CFLAGS="-std=c99 -O2 -fPIC -w" +export CXXFLAGS="-std=c++11 -O2 -fPIC -w" +export LDFLAGS="" + +# Clean mujoco-py completely first +echo "Cleaning all mujoco-py artifacts..." +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/generated/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/.eggs/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/build/ +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.so" -delete +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.pyc" -delete + +# Set MuJoCo environment +export MUJOCO_PY_MUJOCO_PATH=/home/hk-project-robolear/ys1087/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +# Test with direct Python override of distutils compiler flags +echo "Testing mujoco-py with aggressive compiler flag override..." +python -c " +import os +import distutils.util +import sysconfig + +# Override Python's built-in compiler settings +os.environ['CC'] = '/usr/bin/gcc' +os.environ['CXX'] = '/usr/bin/g++' +os.environ['CFLAGS'] = '-std=c99 -O2 -fPIC -w' +os.environ['CXXFLAGS'] = '-std=c++11 -O2 -fPIC -w' + +# Hack: Override sysconfig to remove Intel flags +import sys +def patched_get_config_var(name): + if name in ['CFLAGS', 'BASECFLAGS', 'PY_CFLAGS', 'PY_CORE_CFLAGS', 'CCSHARED']: + return '-std=c99 -O2 -fPIC -w' + elif name in ['CXXFLAGS', 'CXX']: + return '-std=c++11 -O2 -fPIC -w' + elif name == 'CC': + return '/usr/bin/gcc' + else: + # Call the original function for other variables + return sysconfig._original_get_config_var(name) + +# Store original and patch +sysconfig._original_get_config_var = sysconfig.get_config_var +sysconfig.get_config_var = patched_get_config_var + +print('Attempting mujoco_py import with patched sysconfig...') +try: + import mujoco_py + print('SUCCESS: mujoco_py compiled and imported successfully!') +except Exception as e: + print(f'FAILED: {e}') + import traceback + traceback.print_exc() +" \ No newline at end of file diff --git a/slurm/dev_tests/test_mujoco_success.sh b/slurm/dev_tests/test_mujoco_success.sh new file mode 100644 index 0000000..8336a3e --- /dev/null +++ b/slurm/dev_tests/test_mujoco_success.sh @@ -0,0 +1,70 @@ +#!/bin/bash +#SBATCH --job-name=dppo_mujoco_success +#SBATCH --account=hk-project-p0022232 +#SBATCH --partition=dev_accelerated +#SBATCH --gres=gpu:1 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=8 +#SBATCH --time=00:30:00 +#SBATCH --mem=24G +#SBATCH --output=logs/dppo_mujoco_success_%j.out +#SBATCH --error=logs/dppo_mujoco_success_%j.err + +# Load only CUDA +module load devel/cuda/12.4 + +cd $SLURM_SUBMIT_DIR +source .venv/bin/activate + +# Set MuJoCo environment first +export MUJOCO_PY_MUJOCO_PATH=/home/hk-project-robolear/ys1087/.mujoco/mujoco210 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hk-project-robolear/ys1087/.mujoco/mujoco210/bin:/usr/lib/nvidia +export MUJOCO_GL=egl + +# Clean only build cache, not generated files that need to be recreated +echo "Cleaning build cache..." +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/.eggs/ +rm -rf .venv/lib/python3.10/site-packages/mujoco_py/build/ +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.so" -delete +find .venv/lib/python3.10/site-packages/mujoco_py/ -name "*.pyc" -delete + +# Test with sysconfig override but let mujoco-py generate what it needs +echo "Testing mujoco-py with sysconfig override and proper generation..." +python -c " +import os +import sysconfig + +# Override compiler settings +os.environ['CC'] = '/usr/bin/gcc' +os.environ['CXX'] = '/usr/bin/g++' +os.environ['CFLAGS'] = '-std=c99 -O2 -fPIC -w' +os.environ['CXXFLAGS'] = '-std=c++11 -O2 -fPIC -w' + +# Patch sysconfig to remove Intel flags +def patched_get_config_var(name): + if name in ['CFLAGS', 'BASECFLAGS', 'PY_CFLAGS', 'PY_CORE_CFLAGS', 'CCSHARED']: + return '-std=c99 -O2 -fPIC -w' + elif name in ['CXXFLAGS']: + return '-std=c++11 -O2 -fPIC -w' + elif name == 'CC': + return '/usr/bin/gcc' + elif name == 'CXX': + return '/usr/bin/g++' + else: + # Call original for other config vars + return sysconfig._original_get_config_var(name) + +# Store original and apply patch +sysconfig._original_get_config_var = sysconfig.get_config_var +sysconfig.get_config_var = patched_get_config_var + +print('Testing mujoco_py import with sysconfig patch...') +try: + import mujoco_py + print('SUCCESS: mujoco_py compiled and imported successfully with GCC override!') +except Exception as e: + print(f'FAILED: {e}') + import traceback + traceback.print_exc() +" \ No newline at end of file