Issue using Onnxruntime with CUDAExecutionProvider on Orin

Hi,

We have confirmed that ONNXRuntime can work on Orin after adding the sm=87 GPU architecture.
Below are the details for your reference:

Install prerequisites

$ sudo apt install -y --no-install-recommends build-essential software-properties-common libopenblas-dev libpython3.8-dev python3-pip python3-dev python3-setuptools python3-wheel
$ sudo apt install -y protobuf-compiler libprotobuf-dev openssl libssl-dev libcurl4-openssl-dev
$ sudo apt install -y autoconf bc g++-8 gcc-8 clang-8 lld-8 gettext-base gfortran-8 iputils-ping libbz2-dev libc++-dev libcgal-dev libffi-dev libfreetype6-dev libhdf5-dev libjpeg-dev liblzma-dev libncurses5-dev libncursesw5-dev libpng-dev libreadline-dev libssl-dev libsqlite3-dev libxml2-dev libxslt-dev locales moreutils openssl python-openssl rsync scons
$ pip3 install wheel==0.35.1

Upgrade cmake

$ wget http://www.cmake.org/files/v3.18/cmake-3.18.0.tar.gz
$ tar xpvf cmake-3.18.0.tar.gz cmake-3.18.0/
$ cd cmake-3.18.0/
$ ./bootstrap --system-curl
$ make -j8
$ echo 'export PATH=/home/nvidia/topic_219457/cmake-3.18.0/bin/:$PATH' >> ~/.bashrc
$ source ~/.bashrc

Build ONNXRuntime

$ git clone --recursive -b rel-1.12.0 https://github.com/microsoft/onnxruntime+
$ cd onnxruntime/

Add sm=87 support

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index d591d1b8a..ac3271bab 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1761,6 +1761,7 @@ if (onnxruntime_USE_CUDA)
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_53,code=sm_53") # TX1, Nano
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_62,code=sm_62") # TX2
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_72,code=sm_72") # AGX Xavier, NX Xavier
+      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # Orin
     else()
       # the following compute capabilities are removed in CUDA 11 Toolkit
       if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
$ ./build.sh --config Release --update --build --parallel --build_wheel \
 --use_tensorrt --cuda_home /usr/local/cuda --cudnn_home /usr/lib/aarch64-linux-gnu \
 --tensorrt_home /usr/lib/aarch64-linux-gnu
$ sudo pip3 install build/Linux/Release/dist/onnxruntime_gpu-1.12.0-cp38-cp38-linux_aarch64.whl

Test (same as yours)

$ wget https://raw.githubusercontent.com/microsoft/onnxruntime-inference-examples/main/python/api/onnxruntime-python-api.py
$ python3 onnxruntime-python-api.py
/home/nvidia/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1383: UserWarning: positional arguments and argument "destination" are deprecated. nn.Module.state_dict will not accept them in the future. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
  warnings.warn(
[5. 7. 9.]
[ 2.  4.  6.  8. 10.]
tensor([1.4077, 0.4510, 1.2116, 0.6008, 0.7652], device='cuda:0')
tensor([1, 1, 1, 1, 1], device='cuda:0')

Thanks.

3 Likes