is it possible to compile a kernel in a shared library and run it using dlopen ?
Actually i compiled the following code
nvcc -c test.cu
#include <stdio.h>
extern void test(int a, int b);
__global__ void kernel(int N, float *g_data, float *g_result, int *statArray);
extern __global__ void kernel(int N, float *g_data, float *g_result, int *statArray)
{
int n = threadIdx.x + blockDim.x*blockIdx.x;
if(n<N)
g_result[n] = g_data[n]*g_data[n];
}
extern void test(int a, float *b, float *c, int *d)
{
kernel<<<2,256>>>(a,b,c,d);
}
Now i should compile the .o file as a shared library, but if I do it with gcc i get the following errors:
Undefined symbols:
"_cudaSetupArgument", referenced from:
___device_stub__Z6kerneliPfS_Pi in test.o
___device_stub__Z6kerneliPfS_Pi in test.o
___device_stub__Z6kerneliPfS_Pi in test.o
___device_stub__Z6kerneliPfS_Pi in test.o
"___cudaUnregisterFatBinary", referenced from:
___cudaUnregisterBinaryUtil in test.o
"_cudaConfigureCall", referenced from:
test(int, float*, float*, int*)in test.o
"___cudaRegisterFunction", referenced from:
___sti____cudaRegisterAll_39_tmpxft_00003681_00000000_4
_test_cpp1_ii_85ce6c63 in test.o
"___gxx_personality_v0", referenced from:
___gxx_personality_v0$non_lazy_ptr in test.o
"_cudaLaunch", referenced from:
cudaError cudaLaunch<char>(char*)in test.o
"___cudaRegisterFatBinary", referenced from:
___sti____cudaRegisterAll_39_tmpxft_00003681_00000000_4
_test_cpp1_ii_85ce6c63 in test.o
ld: symbol(s) not found
collect2: ld returned 1 exit status
and if i try to use nvcc, it doesn’t know the -dynamiclib or -dylib options !!