Hi,
I have a simple code using TensorRT 5.1.6 and cuda 10 for arm64, both taken from jetpack 4.2.2:
nvidia@tegra-ubuntu:~/jetpack_4.2.2$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Mon_Mar_11_22:13:24_CDT_2019
Cuda compilation tools, release 10.0, V10.0.326
nvidia@tegra-ubuntu:~/jetpack_4.2.2$ dpkg -l | grep TensorRT
ii graphsurgeon-tf 5.1.6-1+cuda10.0 arm64 GraphSurgeon for TensorRT package
ii libnvinfer-dev 5.1.6-1+cuda10.0 arm64 TensorRT development libraries and headers
ii libnvinfer-samples 5.1.6-1+cuda10.0 all TensorRT samples and documentation
ii libnvinfer5 5.1.6-1+cuda10.0 arm64 TensorRT runtime libraries
iU python3-libnvinfer 5.1.6-1+cuda10.0 arm64 Python 3 bindings for TensorRT
iU python3-libnvinfer-dev 5.1.6-1+cuda10.0 arm64 Python 3 development package for TensorRT
ii tensorrt 5.1.6.1-1+cuda10.0 arm64 Meta package of TensorRT
ii uff-converter-tf 5.1.6-1+cuda10.0 arm64 UFF converter for TensorRT package
This is the code sample:
#include “NvInfer.h”
#include “NvUffParser.h”
#include “NvUtils.h”
#include “nvToolsExt.h”
#include
#include
#include
#include
#include “common.h”
using namespace nvuffparser;
using namespace nvinfer1;
Logger gLogger{Logger::Severity::kINFO};
ICudaEngine* loadModelAndCreateEngine(const std::string fileName)
{
const std::string modelFileName = fileName;
std::cout << "Building engine from model file: "<< modelFileName << std::endl;
const char* uffFile = modelFileName.c_str();
int maxBatchSize = 2;
auto parser = createUffParser();
parser->registerInput("input_3", Dims3(3,256, 256), UffInputOrder::kNCHW);
parser->registerOutput("DetectionLayer/BiasAdd");
// define builder
IBuilder* builder = createInferBuilder(gLogger);
//INetworkDefinition* network = builder->createNetwork();
// network->destroy();
builder->destroy();
parser->destroy();
ICudaEngine* engine = nullptr;
return engine;
}
int main() {
ICudaEngine* engine = loadModelAndCreateEngine(“dummy.uff”);
return 0;
}
I build it using the following command:
nvcc -std=c++11 -O3 --compiler-options ‘-fPIC -Wfatal-errors’ hello_trt.cpp -lnvinfer -lnvparsers -lnvinfer_plugin -lnvToolsExt -o hello_trt --verbose
Build output:
#$ SPACE=
#$ CUDART=cudart
#$ HERE=/usr/local/cuda-10.0/bin
#$ THERE=/usr/local/cuda-10.0/bin
#$ TARGET_SIZE=
#$ TARGET_DIR=
#$ TARGET_DIR=targets/aarch64-linux
#$ TOP=/usr/local/cuda-10.0/bin/…
#$ NVVMIR_LIBRARY_DIR=/usr/local/cuda-10.0/bin/…/nvvm/libdevice
#$ LD_LIBRARY_PATH=/usr/local/cuda-10.0/bin/…/lib:/usr/local/cuda-10.0/lib64:
#$ PATH=/usr/local/cuda-10.0/bin/…/nvvm/bin:/usr/local/cuda-10.0/bin:/home/nvidia/bin:/usr/local/cuda-10.0/bin:/home/nvidia/swigtool/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/usr/lib/jvm/java-8-oracle/bin:/usr/lib/jvm/java-8-oracle/db/bin:/usr/lib/jvm/java-8-oracle/jre/bin
#$ INCLUDES=“-I/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/include”
#$ LIBRARIES= “-L/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/lib/stubs” “-L/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/lib”
#$ CUDAFE_FLAGS=
#$ PTXAS_FLAGS=
#$ gcc -std=c++11 -c -x c++ -D__NVCC__ -fPIC -Wfatal-errors -O3 “-I/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/include” -D__CUDACC_VER_MAJOR__=10 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=326 -o “/tmp/tmpxft_00001063_00000000-4_hello_trt.o” “hello_trt.cpp”
#$ nvlink --arch=sm_30 --register-link-binaries=“/tmp/tmpxft_00001063_00000000-2_hello_trt_dlink.reg.c” -m64 -lnvinfer -lnvparsers -lnvinfer_plugin -lnvToolsExt “-L/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/lib/stubs” “-L/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/lib” -cpu-arch=AARCH64 “/tmp/tmpxft_00001063_00000000-4_hello_trt.o” -o “/tmp/tmpxft_00001063_00000000-5_hello_trt_dlink.sm_30.cubin”
#$ fatbinary --create=“/tmp/tmpxft_00001063_00000000-3_hello_trt_dlink.fatbin” -64 -no-asm -link “–image=profile=sm_30,file=/tmp/tmpxft_00001063_00000000-5_hello_trt_dlink.sm_30.cubin” --embedded-fatbin=“/tmp/tmpxft_00001063_00000000-3_hello_trt_dlink.fatbin.c”
#$ rm /tmp/tmpxft_00001063_00000000-3_hello_trt_dlink.fatbin
#$ gcc -std=c++11 -c -x c++ -DFATBINFILE=“"/tmp/tmpxft_00001063_00000000-3_hello_trt_dlink.fatbin.c"” -DREGISTERLINKBINARYFILE=“"/tmp/tmpxft_00001063_00000000-2_hello_trt_dlink.reg.c"” -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -fPIC -Wfatal-errors -O3 “-I/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/include” -D__CUDACC_VER_MAJOR__=10 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=326 -o “/tmp/tmpxft_00001063_00000000-6_hello_trt_dlink.o” “/usr/local/cuda-10.0/bin/crt/link.stub”
#$ g++ -fPIC -Wfatal-errors -O3 -o “hello_trt” -std=c++11 -Wl,–start-group “/tmp/tmpxft_00001063_00000000-6_hello_trt_dlink.o” “/tmp/tmpxft_00001063_00000000-4_hello_trt.o” -lnvinfer -lnvparsers -lnvinfer_plugin -lnvToolsExt “-L/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/lib/stubs” “-L/usr/local/cuda-10.0/bin/…/targets/aarch64-linux/lib” -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,–end-group
When Running it I get Cuda error code 35, resulting from TensorRT CreateInferBuilder:
Building engine from model file: dummy.uff
ERROR: Cuda initialization failure with error 35. Please check cuda installation: Installation Guide Linux :: CUDA Toolkit Documentation.
Segmentation fault (core dumped)
Ideas anyone? Looks like compatibility issue between CUDA & TRT