Cuda linker errors when crosscompiling

I’m trying to compile a simple test program I came across in a recent blog post:

My goal being to be able to cross compile code (that may or may not include CUDA) with cmake on my x86 laptop and then deploy it to my NX (or TX2). My dev environment is a Ubuntu 18.04 docker container with all of the sdk_manager packages installed (cuda v10.2). I can build the code fine for the host machine. I got a sample Toolchain file from one of the vpi examples:

#Toolchain_aarch64_l4t.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)

set(target_arch aarch64-linux-gnu)
set(CMAKE_LIBRARY_ARCHITECTURE ${target_arch} CACHE STRING "" FORCE)

# Configure cmake to look for libraries, include directories and
# packages inside the target root prefix.
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
set(CMAKE_FIND_ROOT_PATH "/usr/${target_arch}")

# needed to avoid doing some more strict compiler checks that
# are failing when cross-compiling
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)

# specify the toolchain programs
find_program(CMAKE_C_COMPILER ${target_arch}-gcc)
find_program(CMAKE_CXX_COMPILER ${target_arch}-g++)
if(NOT CMAKE_C_COMPILER OR NOT CMAKE_CXX_COMPILER)
    message(FATAL_ERROR "Can't find suitable C/C++ cross compiler for ${target_arch}")
endif()

set(CMAKE_AR ${target_arch}-ar CACHE FILEPATH "" FORCE)
set(CMAKE_RANLIB ${target_arch}-ranlib)
set(CMAKE_LINKER ${target_arch}-ld)

# Not all shared libraries dependencies are installed in host machine.
# Make sure linker doesn't complain.
set(CMAKE_EXE_LINKER_FLAGS_INIT -Wl,--allow-shlib-undefined)

# instruct nvcc to use our cross-compiler
set(CMAKE_CUDA_FLAGS "-ccbin ${CMAKE_CXX_COMPILER} -Xcompiler -fPIC" CACHE STRING "" FORCE)

However when I include the toolchain file, i.e.

cmake -GNinja -DCMAKE_CUDA_ARCHITECTURES=72 -DCMAKE_TOOLCHAIN_FILE=../Toolchain_aarch64_l4t.cmake -DCMAKE_BUILD_TYPE=Release ..

I get this error:

[1/1] Linking CXX executable particle_test
FAILED: particle_test 
: && /usr/bin/aarch64-linux-gnu-g++ -O3 -DNDEBUG -Wl,--allow-shlib-undefined CMakeFiles/particle_test.dir/test.cu.o CMakeFiles/particle_test.dir/cmake_device_link.o -o particle_test  libparticles.a  -lcudadevrt  -lcudart_static  -lrt  -lpthread  -ldl && :
/usr/lib/gcc-cross/aarch64-linux-gnu/7/../../../../aarch64-linux-gnu/bin/ld: cannot find -lcudadevrt
/usr/lib/gcc-cross/aarch64-linux-gnu/7/../../../../aarch64-linux-gnu/bin/ld: cannot find -lcudart_static
collect2: error: ld returned 1 exit status
ninja: build stopped: subcommand failed.

it appears in /usr/local/cuda-10.2 there are two symlinks:

include -> targets/x86_64-linux/include/
lib64 -> targets/x86_64-linux/lib/

In the targets folder, in addition to the x86_64 folder, I also have one named aarch64 which appears to have the required files in it. I remapped the symlinks (which seems hokey) so they pointed to the aarch64 libraries but no joy I still get the linker errors.

I’ve also tried adding find_package(cuda 10.2 REQUIRED) to my CMakeLists.txt, but that gives other errors saying that it can’t be found but then reports it’s location. From what I can tell the find_package(CUDA) is deprecated and no longer needed since CMake has CUDA language support.

Any ideas?

Looks like this is potentially a bug in cmake:

The quick, dirty work around works for me but seems not ideal.

Thanks for the update.