Unable to link nvinfer_lean_static.a and cuda kernels into same binary

Hi,

I would like to build a c++ application that links against nvinfer_lean_static.a.
So far no problem, but when I try to add cuda files I get linker errors like:

undefined reference to `__aarch64_swp4_sync'
undefined reference to `__aarch64_cas4_sync'
undefined reference to `__aarch64_swp4_sync'

Here is a small CMakeLists.txt to reprocudce the probelm:

cmake_minimum_required(VERSION 3.17)

project(TestNVInfer LANGUAGES CXX CUDA)

find_library(NVINFER_LIBRARY libnvinfer_lean_static.a REQUIRED)
find_library(NVINFERPLUGIN libnvinfer_plugin_static.a REQUIRED)
find_package(CUDAToolkit REQUIRED)

file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/main.cpp "
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <iostream>
#include <memory>
#include <vector>
#include <string>

// Simple logger implementation for TensorRT
class Logger : public nvinfer1::ILogger
{
    void log(Severity severity, const char* msg) noexcept override
    {
        // Only log messages with severity \>= kWARNING
        if (severity <= Severity::kWARNING)
        {
            std::cout << msg << std::endl;
        }
    }
};


int  main()
{
   
    Logger logger;
    std::vector<char> engineData;
    auto infer = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger));
    auto engine = std::unique_ptr<nvinfer1::ICudaEngine>(infer->deserializeCudaEngine(engineData.data(), engineData.size()));
    auto context = std::unique_ptr<nvinfer1::IExecutionContext>(engine->createExecutionContext());
    return 0;
}
")

file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_kernel.cu "
#include <math.h>
#include <cuda_runtime.h>

__global__ void copyData(float* layerMemoryPtr, unsigned char* inputDataPtr, unsigned int dataSize) {
    unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < dataSize) {
        layerMemoryPtr[i] = static_cast<float>(inputDataPtr[i]);
    }
}

void copyData(float* layerMemoryPtr, unsigned char* inputDataPtr, unsigned int dataSize, cudaStream_t stream) {
    int blocks = (dataSize + 255) / 256;
    copyData<<<blocks, 256, 0, stream>>>(layerMemoryPtr, inputDataPtr, dataSize);
}
")

add_executable(test_nv_infer ${CMAKE_CURRENT_BINARY_DIR}/test_kernel.cu  ${CMAKE_CURRENT_BINARY_DIR}/main.cpp)

target_link_libraries(test_nv_infer
	${NVINFER_LIBRARY}
	${NVINFERPLUGIN}
  CUDA::cudla)

The CMakeLists.txt will create the example files
Just build with:

cmake ..
make

Hi,

Do you want to use TensorRT on AGX Orin?
If so, you can find our sample and Makefile in the /usr/src/tensorrt/samples folder.

Thanks.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.