I’ve created a MWE that demonstrates how I intend to integrate CUDA kernels into my existing C++ code base. The plan is to compile the CUDA code as a library and link it to the existing code. The code (shown below) compiles but does not run correctly i.e. the “Hello” message is not printed.
Please note that I am a CUDA beginner, Thanks.
[main.cpp]
#include <iostream>
[main.cpp]
extern "C" void runKernel();
int main(int argc, char **argv)
{
runKernel();
}
[Test.cu]
__global__ void testKernel( unsigned *data )
{
int tId = threadIdx.x;
data[tId] = tId;
}
extern "C" void runKernel()
{
std::cout << "Running Kernel" << std::endl;
const unsigned NUM_THREADS = 32;
unsigned *hostData;
unsigned *devPtrData;
cudaMalloc( (void**) devPtrData , NUM_THREADS );
testKernel<<<1,NUM_THREADS>>>();
cudaMemcpy( hostData , devPtrData , NUM_THREADS * sizeof(unsigned) , cudaMemcpyDeviceToHost );
for( unsigned i = 0; i < NUM_THREADS; ++i )
{
std::cout << hostData[i] << std::endl;
}
}
Compilations steps:
nvcc -ccbin g++ -m64 -gencode arch=compute_30,code=sm_30 -lib -o Test.a Test.o
nvcc -ccbin g++ -m64 -gencode arch=compute_30,code=sm_30 -o main.o -c main.cpp
nvcc -ccbin g++ -m64 -gencode arch=compute_30,code=sm_30 -o test Test.a main.o