Defining device function in source file and linking to executable

I have the following:

function.cu

#include "function.hpp"

#ifndef __CUDA_ARCH__
#include <cmath>
#endif // __CUDA_ARCH__

#ifndef __CUDA_ARCH__
using std::ceil;
#endif // __CUDA_ARCH__

__host__ __device__ int float2int_ru(float x)
{
#ifdef __CUDA_ARCH__
  return __float2int_ru(x);
#else // __CUDA_ARCH__
  return static_cast<int>(ceil(x));
#endif // __CUDA_ARCH__
}

function.hpp

#pragma once

__host__ __device__ int float2int_ru(float x);

test.cu

#include "function.hpp"

__global__
void float2int_ru_kernel(float* in, int* out)
{
  *out = float2int_ru(*in);
}

int main()
{
  return 0;
}

CMakeLists.txt

enable_language(CUDA)

# Enable separable compilation and set -rdc=true
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -rdc=true")

# Set sources for the library
set(SOURCES function.cu)

option(BUILD_SHARED_LIBS "Build using shared libraries" ON)

# Create the library
add_library(lib ${SOURCES})

# Find the CUDA toolkit
find_package(CUDAToolkit REQUIRED)

# Include CUDA headers
target_include_directories(lib PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

# Link CUDA runtime library
target_link_libraries(lib PUBLIC CUDA::cudart)

# Enable separable compilation for the library
set_target_properties(lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

# Set the source files properties for CUDA
set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CUDA)

# Set sources for the test executable
set(SOURCES test.cu)

# Create the test executable
add_executable(test ${SOURCES})

# Enable separable compilation for the test
set_target_properties(test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

# Set the source files properties for CUDA
set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CUDA)

# Link the test executable with the library
target_link_libraries(test lib)

When I try and compile I get the following and I dont understand why:

nvlink error   : Undefined reference to '_Z12float2int_ruf' in 'CMakeFiles/test.dir/test.o'
make[2]: *** [CMakeFiles/test.dir/build.make:107: CMakeFiles/test.dir/cmake_device_link.o] Error 255
make[1]: *** [CMakeFiles/Makefile2:121: CMakeFiles/test.dir/all] Error 2
make: *** [Makefile:91: all] Error 2

Any help is appreciated!

This is a fairly common issue people run into using CMake. I’m not suggesting its identical to your code/example/case, but here is a comment I made on a recent posting that had a similar error signature.

Of course, you will immediately reply “but I’m already doing RDC!!” :

Which I don’t dispute. I’m not a CMake expert, but one of the things I sometimes suggest is to get a good handle on what CMake is actually doing based on your CMakeLists.txt file. You can do this by asking for verbose output from CMake.

Study the verbose output, and see if the actual sequence of nvcc commands make sense (it almost certainly will not, since it will contain the error you report.) IMO, this will help to understand why your request for RDC is not getting applied in the right place at the right time in the right way, and may help others to spot the issue.

And of course there are the resource I referred to in the forum post I linked.

No, I don’t know how to request verbose output from CMake. But that is not unique or specific to CUDA and is something you should be able to discover with basic web searches.

FWIW I don’t have trouble with what you have shown with “typical” nvcc usage, so I think the problem is a project organization/CMake usage issue:

# cat function.cu
#include "function.hpp"

#ifndef __CUDA_ARCH__
#include <cmath>
#endif // __CUDA_ARCH__

#ifndef __CUDA_ARCH__
using std::ceil;
#endif // __CUDA_ARCH__

__host__ __device__ int float2int_ru(float x)
{
#ifdef __CUDA_ARCH__
  return __float2int_ru(x);
#else // __CUDA_ARCH__
  return static_cast<int>(ceil(x));
#endif // __CUDA_ARCH__
}
# cat function.hpp
#pragma once

__host__ __device__ int float2int_ru(float x);

# cat test.cu
#include "function.hpp"

__global__
void float2int_ru_kernel(float* in, int* out)
{
  *out = float2int_ru(*in);
}

int main()
{
  return 0;
}
# nvcc -rdc=true function.cu test.cu
#