Undefined reference when compiling on TX2

Hi,

I am coding a cuda project and trying to make it run on TX2. The code runs fine on my PC (with a GTX850 card). However, when I tried to compile it on TX2, it shows following errors:

.
.
.

[100%] Linking CXX executable cudagmmHierEM
libCuda_Lib_.a(Cuda_Lib__intermediate_link.o): In function `__cudaRegisterLinkedBinary_38_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37':
/tmp/tmpxft_00006d4d_00000000-2_Cuda_Lib__intermediate_link.reg.c:4: undefined reference to `__fatbinwrap_38_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37'
/tmp/tmpxft_00006d4d_00000000-2_Cuda_Lib__intermediate_link.reg.c:4: undefined reference to `__fatbinwrap_38_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37'
collect2: error: ld returned 1 exit status
CMakeFiles/cudagmmHierEM.dir/build.make:101: recipe for target 'cudagmmHierEM' failed
make[2]: *** [cudagmmHierEM] Error 1
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/cudagmmHierEM.dir/all' failed
make[1]: *** [CMakeFiles/cudagmmHierEM.dir/all] Error 2
Makefile:83: recipe for target 'all' failed
make: *** [all] Error 2

And my cmake file shows as follows:

cmake_minimum_required(VERSION 3.5)

project(cudagmm)
#set(CMAKE_BUILD_TYPE Release)
set(CMAKE_BUILD_TYPE DEBUG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
#set(CMAKE_CXX_STANDARD 11)

find_package(CUDA REQUIRED)
find_package(Eigen3 REQUIRED)

set(CUDA_PROPAGATE_HOST_FLAGS OFF)
set(CUDA_SEPARABLE_COMPILATION ON)
set(CUDA_NVCC_FLAGS "-gencode" "arch=compute_62,code=compute_62") #TX2
list(APPEND CUDA_NVCC_FLAGS "-gencode" "arch=compute_50,code=compute_50")
list(APPEND CUDA_NVCC_FLAGS "-rdc=true" "-std=c++11")
list(APPEND CUDA_NVCC_FLAGS "-g" "-G")

include_directories(include
                    ${CUDA_INCLUDE_DIRS}
                    ${EIGEN3_INCLUDE_DIRS}
                    "/usr/include/eigen3"
					"/usr/include"
                    ${Boost_INCLUDE_DIRS}
                    )

link_directories(
		/usr/local/cuda/lib64
       		 src
       		 lib
)

file(GLOB CU_Files "src/*.cu")

cuda_add_library(Cuda_Lib_
            ${CU_Files})

cuda_add_executable(${PROJECT_NAME}HierEM
		test/cudatest.cc
		)
target_link_libraries(${PROJECT_NAME}HierEM
		Cuda_Lib_
		${CUDA_LIBRARIES}
        ${CUDA_CUBLAS_LIBRARIES}
		${CUDA_cusolver_LIBRARY}
		${CUDA_cudadevrt_LIBRARY}
		stdc++
		)

The error has something to do with CDP. The following cmake file works on TX2

cmake_minimum_required(VERSION 3.5)

project(cuda)
set(CMAKE_BUILD_TYPE DEBUG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")

find_package(CUDA REQUIRED)

set(CUDA_PROPAGATE_HOST_FLAGS OFF)
set(CUDA_SEPARABLE_COMPILATION ON)

set(CUDA_NVCC_FLAGS
		${CUDA_NVCC_FLAGS};
		-gencode arch=compute_62,code=sm_62
		-rdc=true -std=c++11
        )

include_directories(include
                    /usr/local/cuda/include
                    )

link_directories(
		/usr/local/cuda/lib64
)

cuda_add_library(culib
            src/cuda.cu)

add_executable(${PROJECT_NAME}H
		test/cudatest.cc
		)

target_link_libraries(${PROJECT_NAME}H
		culib
		-lcudart
		-lcublas
		-lcusolver
		-lcudadevrt
		)

Could you also share the source code (original or simplified one) with us?

Also, do you use jetpack3.2 BSP?

czhm13,

Please try to narrow down the code and see which part is causing the linking problem.

To me, I think you could try to see if your issue is also able to reproduce on different BSP with different version of CUDA. (jetpack3.1, jetpack3.2).

Also, please refer to NVIDIA CUDA samples to write a makefile instead of generated by cmake at this moment.

Please refer this link:

https://devtalk.nvidia.com/default/topic/1026656/jetson-tx2/compile-cuda-program-with-dynamic-parallelism/post/5221874/