Issue using Onnxruntime with CUDAExecutionProvider on Orin

I am trying to run this sample onnxruntime code on my Orin:

I get the following error:

CUDA error cudaErrorNoKernelImageForDevice:no kernel image is available for execution on the device

I’ve tried the following:

Unfortunately, none of these solution seem to work


Thanks for reporting this to us.

Since there is no prebuilt package for JetPack 5 in the Jetson Zoo, you will need to build it from the source.
We are going to why building the package with sm=87 is not working.

Will share more information with you later.


It seems that the GPU architecture for Orin is not added to the ONNXRuntime yet.
Could you add it to the below CMakeLists.txt and build it from the source again?


Yes, I tried that, I added

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # AGX Orin

after line 1772 of CMakeLists.txt, and built another wheel, but still had the same error message as before.

Thanks for the testing.

We are trying to reproduce this internally.
Will share more information with you later.


We have confirmed that ONNXRuntime can work on Orin after adding the sm=87 GPU architecture.
Below are the details for your reference:

Install prerequisites

$ sudo apt install -y --no-install-recommends build-essential software-properties-common libopenblas-dev libpython3.8-dev python3-pip python3-dev python3-setuptools python3-wheel
$ sudo apt install -y protobuf-compiler libprotobuf-dev openssl libssl-dev libcurl4-openssl-dev
$ sudo apt install -y autoconf bc g++-8 gcc-8 clang-8 lld-8 gettext-base gfortran-8 iputils-ping libbz2-dev libc++-dev libcgal-dev libffi-dev libfreetype6-dev libhdf5-dev libjpeg-dev liblzma-dev libncurses5-dev libncursesw5-dev libpng-dev libreadline-dev libssl-dev libsqlite3-dev libxml2-dev libxslt-dev locales moreutils openssl python-openssl rsync scons
$ pip3 install wheel==0.35.1

Upgrade cmake

$ wget
$ tar xpvf cmake-3.18.0.tar.gz cmake-3.18.0/
$ cd cmake-3.18.0/
$ ./bootstrap --system-curl
$ make -j8
$ echo 'export PATH=/home/nvidia/topic_219457/cmake-3.18.0/bin/:$PATH' >> ~/.bashrc
$ source ~/.bashrc

Build ONNXRuntime

$ git clone --recursive -b rel-1.12.0
$ cd onnxruntime/

Add sm=87 support

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index d591d1b8a..ac3271bab 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1761,6 +1761,7 @@ if (onnxruntime_USE_CUDA)
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_53,code=sm_53") # TX1, Nano
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_62,code=sm_62") # TX2
       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_72,code=sm_72") # AGX Xavier, NX Xavier
+      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # Orin
       # the following compute capabilities are removed in CUDA 11 Toolkit
$ ./ --config Release --update --build --parallel --build_wheel \
 --use_tensorrt --cuda_home /usr/local/cuda --cudnn_home /usr/lib/aarch64-linux-gnu \
 --tensorrt_home /usr/lib/aarch64-linux-gnu
$ sudo pip3 install build/Linux/Release/dist/onnxruntime_gpu-1.12.0-cp38-cp38-linux_aarch64.whl

Test (same as yours)

$ wget
$ python3
/home/nvidia/.local/lib/python3.8/site-packages/torch/nn/modules/ UserWarning: positional arguments and argument "destination" are deprecated. nn.Module.state_dict will not accept them in the future. Refer to for details.
[5. 7. 9.]
[ 2.  4.  6.  8. 10.]
tensor([1.4077, 0.4510, 1.2116, 0.6008, 0.7652], device='cuda:0')
tensor([1, 1, 1, 1, 1], device='cuda:0')


