Here’s a method to compile onnxruntime-gpu.
git clone --recursive https://github.com/microsoft/onnxruntime.git
cd onnxruntime
# pick your version, most recent is v1.23.1. https://github.com/microsoft/onnxruntime/releases
git checkout v1.23.1
git submodule update --init --recursive
export CXXFLAGS="-Wno-error=deprecated-declarations"
export CPLUS_INCLUDE_PATH="/usr/local/cuda/targets/sbsa-linux/include/cccl:${CPLUS_INCLUDE_PATH}"
./build.sh --config Release --update --build --parallel 10 \
--cmake_generator Ninja --skip_tests \
--enable_pybind --build_wheel \
--use_cuda \
--cuda_home /usr/local/cuda \
--cudnn_home /usr/lib/aarch64-linux-gnu \
--use_tensorrt \
--tensorrt_home /usr/lib/aarch64-linux-gnu \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=110 \
--cmake_extra_defines CMAKE_CUDA_FLAGS="--forward-unknown-to-host-compiler -Xcompiler=-Wno-strict-aliasing -Xcompiler=-Wno-deprecated-declarations"
pip install build/Linux/Release/dist/onnxruntime_gpu-1.23.1-cp312-cp312-linux_aarch64.whl
When you hit the “warning as error” run following and then rerun above build.sh command:
Errors occur with cuda-13 deprecations of longlong4 (use longlong4_16a/_32a instead)
# inspect where deprecated vector types are used
grep -nR --color=always -E '\b(long4|ulong4|longlong4|ulonglong4|double4)\b' \
onnxruntime/contrib_ops/cuda/bert \
build/Linux/Release/_deps/cutlass-src/include/cutlass/platform/platform.h
# backup
cp onnxruntime/contrib_ops/cuda/bert/attention_impl.h{,.bak}
cp onnxruntime/contrib_ops/cuda/bert/attention_transpose.cu{,.bak} 2>/dev/null || true
cp onnxruntime/contrib_ops/cuda/bert/attention.cu{,.bak} 2>/dev/null || true
# patch BERT attention sources
sed -i 's/\blonglong4\b/longlong4_16a/g' \
onnxruntime/contrib_ops/cuda/bert/attention_impl.h \
onnxruntime/contrib_ops/cuda/bert/attention_transpose.cu 2>/dev/null || true
# (repeat sed for any other files the grep listed)
# patch CUTLASS platform.h specializations
cp build/Linux/Release/_deps/cutlass-src/include/cutlass/platform/platform.h{,.bak}
sed -i \
-e 's/\blong4\b/long4_16a/g' \
-e 's/\bulong4\b/ulong4_16a/g' \
-e 's/\blonglong4\b/longlong4_16a/g' \
-e 's/\bulonglong4\b/ulonglong4_16a/g' \
-e 's/\bdouble4\b/double4_16a/g' \
build/Linux/Release/_deps/cutlass-src/include/cutlass/platform/platform.h