Seperate compile for teample cuda with cmake error

15618561709 · October 11, 2020, 3:54am

environment

os : win10
msvc : 141
cmake :3.17.5
cuda: 11.1

we use template in cuda, and use cmake to compile it.
but very strange error occurs:

[build] CUDA_SEPARATE_SHABAL_KERNAL.lib(separate_shabal.obj) : error LNK2019: "void __cdecl caculate_poc_hash_with_share_mem<256>(int *,int *,unsigned __int64,unsigned __int64)" (??$caculate_poc_hash_with_share_mem@$0BAA@@@YAXPEAH0_K1@Z)，

here is my organization of my code

-  separate_shabal_api.h
-  separate_shabal_api.cpp
-  CMakeLists.txt
--cuda_kernal/separate_shabal.cuh
--cuda_kernal/separate_shabal.cu
--cuda_kernal/separate_shabal_kernal.cu
--cuda_kernal/separate_shabal_kernal.cuh

here is my cmake file

cmake_minimum_required(VERSION 3.17.0)
project(CUDA_SEPARATE_SHABAL_KERNAL VERSION 0.0.1 LANGUAGES CXX CUDA)
find_package(CUDAToolkit REQUIRED)

if (MSVC)
    set(CMAKE_C_FLAGS /source-charset:utf-8)
endif()

include_directories(cuda_kernal)
aux_source_directory(cuda_kernal CUDA_KERNAL_FILES)
file(GLOB API_SOURCE_FILES *h *cpp)
message(STATUS "the sub diction sources is ${API_SOURCE_FILES}")


add_library(${PROJECT_NAME} STATIC ${CUDA_KERNAL_FILES} ${API_SOURCE_FILES})
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_11)
set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-arch=sm_52
-gencode=arch=compute_52,code=sm_52
-gencode=arch=compute_60,code=sm_60
-gencode=arch=compute_61,code=sm_61
-gencode=arch=compute_70,code=sm_70
-gencode=arch=compute_75,code=sm_75
-gencode=arch=compute_75,code=compute_75
-lineinfo
-dc
-use_fast_math
-rdc=true
-lcudadevrt
>)
target_link_libraries(${PROJECT_NAME} PRIVATE cudart_static)

the template was used in seperate_shabal_kernal.cuh and .cu file

template <int32_t BLOCK_SIZE>
__global__ void caculate_poc_hash_with_share_mem(int32_t *, int32_t *, size_t, size_t);

the compiler for lib is success!!!

[build]   CUDA_SEPARATE_SHABAL_KERNAL.vcxproj -> F:\cuda_shabal256\cuda_separate_shabal\build\cuda_sperate_shabal256\Debug\CUDA_SEPARATE_SHABAL_KERNAL.lib

but after link it to cmake test file : it throws out the error

[build] CUDA_SEPARATE_SHABAL_KERNAL.lib(separate_shabal.obj) : error LNK2019: "void __cdecl caculate_poc_hash_with_share_mem<256>(int *,int *,unsigned __int64,unsigned __int64)" (??$caculate_poc_hash_with_share_mem@$0BAA@@@YAXPEAH0_K1@Z)，

here is my test cmake

# if(NOT CMAKE_CUDA_COMPILER)
#     set(CMAKE_CUDA_COMPILER "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.1/bin/nvcc.exe")
#     message(STATUS "define the nvcc compiler to cmake")
# endif()
cmake_minimum_required(VERSION 3.17.0)
project(CUDA_SPERATE_SHABAL LANGUAGES CXX CUDA VERSION 0.1.0)

if (MSVC)
    set(CMAKE_CXX_FLAGS /source-charset:utf-8)
endif()

find_package(CUDAToolkit)

add_subdirectory(cuda_sperate_shabal256)
include_directories(cuda_sperate_shabal256)

include(CTest)
enable_testing()
file(GLOB test_files test_api/*.cpp)


foreach(test_file ${test_files})
    message(STATUS "find test file ${test_file}")
    STRING(REGEX REPLACE ".+/(.+)\\..*" "\\1" test_file_name ${test_file})
    message(STATUS "find test file name ${test_file_name}")
    add_executable(${test_file_name} ${test_file})
    set_target_properties(${test_file_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
    target_link_libraries(${test_file_name} PRIVATE CUDA_SEPARATE_SHABAL_KERNAL)
    add_test(${test_file_name} ${test_file_name})
endforeach()
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --verbose)


set(CPACK_PROJECT_NAME ${PROJECT_NAME})
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
include(CPack)

and the function call is in the lib itself, and is in separate_shabal.cu file

caculate_poc_hash_with_share_mem<128> <<<gridDim, blockDim, blockDim.y * sizeof(shabal_context) , *cuda_stream_ptr>>> (cuda_mem_final_ptr,cuda_mem_hash_ptr,nonce_nr,plotter_id);

i don’t understand why if i add template <int32_t BLOCK_SIZE> to the kernal function
it will throws out no link to kernal_function<> .

can any one help me??