I am trying to make a kernel that invokes a device function via a pointer. It works well as long as the function and its caller reside in the same source (.cu) file, but breaks if they are in different files. Here is the full example code.
FuncPointer.h:
#ifndef FuncPointer_h
#define FuncPointer_h
typedef float (*op_func) (float, float);
struct FuncPointer {
FuncPointer();
op_func fptr;
};
#endif // FuncPointer_h
Main.cu:
#include <cstdio>
#include "FuncPointer.h"
/// start of FuncPointer.cu
__device__ float add_func (float x, float y)
{
return x + y;
}
__device__ op_func func = add_func;
FuncPointer::FuncPointer() {
cudaMemcpyFromSymbol(&fptr, func, sizeof(func));
}
/// end of FuncPointer.cu
__global__ void kernel (FuncPointer* p)
{
float x=100, y=10, result=0;
result = p->fptr(x, y);
printf ("result = %f\n", result);
}
int main ()
{
FuncPointer fp;
FuncPointer* dev_fp;
cudaMalloc(&dev_fp, sizeof(FuncPointer));
cudaMemcpy(dev_fp, &fp,
sizeof(FuncPointer), cudaMemcpyHostToDevice);
kernel<<<1,1>>>(dev_fp);
cudaFree(dev_fp);
return EXIT_SUCCESS;
}
This works as expected.
Note the code section between [font=“Courier New”]/// start of FuncPointer.cu[/font] and [font=“Courier New”]/// end of FuncPointer.cu[/font].
If I move this code from Main.cu into another file FuncPointer.cu and link them together, the execution stops with the “unspecified launch error” message.
What is wrong with calling a device function from another file by pointer?
A similar question was asked in this post, but never answered.