Hi, I am a fairly experienced C++ programmer who is new to CUDA. I am having an issue with getting a kernel to execute when calling it from a class in a compiled library. I have tried for a while to get it to work, but have failed thus far. I have produced a minimally reproducible example below.
Directory structure is as follows:
cudatest
|-lib
| |-inc
| | |- utilities.cuh
| | |- solver.cuh
| |-src
| |- solver.cu
|-test
|- test.cu
I am building with CMake:
cudatest/CMakeLists.txt
FILE(REMOVE ${CMAKE_SOURCE_DIR}/CMakeCache.txt)
IF(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
MESSAGE(FATAL_ERROR "\n\nPlease build in a separate build directory.
Ensure that you remove the CMakeCache.txt and CMakeFiles/ from the top level directory")
ENDIF(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
CMAKE_MINIMUM_REQUIRED(VERSION 3.22)
PROJECT(CUDATEST
LANGUAGES CXX CUDA)
SET(CMAKE_CUDA_STANDARD 20)
SET(CMAKE_CUDA_STANDARD_REQUIRED ON)
SET(CMAKE_CXX_STANDARD 20)
SET(CMAKE_CXX_STANDARD_REQUIRED ON)
SET(EXECUTABLE_OUTPUT_PATH ${CUDATEST_BINARY_DIR}/bin)
add_subdirectory(lib)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/lib/inc)
ADD_EXECUTABLE(cudatest_ex test/test.cu)
TARGET_LINK_LIBRARIES(cudatest_ex cudatest_culib)
SET_TARGET_PROPERTIES(cudatest_ex PROPERTIES OUTPUT_NAME "cudatest.ex")
SET_TARGET_PROPERTIES(cudatest_culib PROPERTIES CUDA_ARCHITECTURES "75")
lib/CMakeLists.txt
SET(cudatest_culib_src src/solver.cu)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/inc)
ADD_LIBRARY(cudatest_culib SHARED ${cudatest_culib_src})
SET_TARGET_PROPERTIES(cudatest_culib PROPERTIES OUTPUT_NAME "cudatest")
SET_TARGET_PROPERTIES(cudatest_culib PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CUDATEST_BINARY_DIR}/lib)
SET_TARGET_PROPERTIES(cudatest_culib PROPERTIES CUDA_ARCHITECTURES "75")
Here is my source :
test/test.cu:
#include "solver.cuh"
int main(int argc, char *argv[])
{
Solver slvr;
bool use_external_lib = true;
if(use_external_lib){
slvr.solve();
}
else{
Utilities::doNothing<<<1,1>>>();
CUDA_CALL(cudaGetLastError());
CUDA_CALL(cudaDeviceSynchronize());
}
}
lib/src/solver.cu
#include "solver.cuh"
void Solver::solve(){
Utilities::doNothing<<<1,1>>>();
CUDA_CALL(cudaGetLastError());
CUDA_CALL(cudaDeviceSynchronize());
}
lib/inc/solver.cuh
#ifndef solver_cuh
#define solver_cuh
#include "utilities.cuh"
class Solver{
public:
Solver(){};
~Solver(){};
void solve();
};
#endif
lib/inc/utilities.cuh
#ifndef utilities_cuh
#define utilities_cuh
#include <iostream>
#define CUDA_CALL(ans) { gAssert((ans), __FILE__, __LINE__); }
inline void gAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"Assert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
namespace Utilities{
__global__ void doNothing(){
int tidx = threadIdx.x + blockDim.x*blockIdx.x;
printf("I, thread %d, am doing nothing!\n", tidx);
}
}
#endif
If, in test.cu, I change use_external_lib
to false
I get the appropriate output: “I, thread 0, am doing nothing!” If I set it to true
I get absolutely nothing. nvprof
shows that the kernel code doesn’t get executed in this case. Also,compute-sanitizer
returns no errors.
I am sure that the issue is either a really dumb one, or has to do with my misunderstanding of how CUDA compiles/links/runs, but I am at a complete loss here as to how to get this to work. I appreciate any input.
ETA: Here is my nvidia-smi
output
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.57.01 Driver Version: 565.57.01 CUDA Version: 12.7 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA GeForce RTX 2080 Ti Off | 00000000:3B:00.0 Off | N/A |
| 33% 41C P8 39W / 250W | 763MiB / 11264MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
| 1 NVIDIA GeForce RTX 2080 Ti Off | 00000000:5E:00.0 Off | N/A |
| 30% 31C P8 8W / 250W | 2MiB / 11264MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| 0 N/A N/A 22732 G /usr/libexec/Xorg 461MiB |
| 0 N/A N/A 24732 G /usr/bin/gnome-shell 235MiB |
| 0 N/A N/A 25695 G /usr/bin/gnome-software 30MiB |
+-----------------------------------------------------------------------------------------+