How to pass a NVRTC-compiled device function pointer to an already compiled device function?

qorrns0328 · June 15, 2022, 8:32am

nvrtcCreateProgram buffer

std::string  text = "                                          \n\
"#pragma once\n"
"#include \"test.h\"\n"
"extern \"C\" __global__ void nvrtcfunctest(double* a, double* b, double* c, size_t size, func* f)\n"
"{\n"
"    size_t tid = blockIdx.x * blockDim.x + threadIdx.x;\n"
"    if (tid < size) {\n"
"        c[tid] = (*f)(a[tid], b[tid]);\n"
"    }\n"
"}\n"
"\n";

header

template <typename T>
struct cudaCallableFunctionPointer
{
public:
    cudaCallableFunctionPointer(T* f_)
    {
        T* host_ptr = (T*)malloc(sizeof(T));
        cudaMalloc((void**)&ptr, sizeof(T));

        cudaMemcpyFromSymbol(host_ptr, *f_, sizeof(T));
        cudaMemcpy(ptr, host_ptr, sizeof(T), cudaMemcpyHostToDevice);

        cudaFree(host_ptr);
    }

    ~cudaCallableFunctionPointer()
    {
        cudaFree(ptr);
    }

    T* ptr;
};



typedef double (*func)(double a, double b);

__device__ double devpuls(double a, double b)
{                                                         
    return  a + b;
}

main

__device__ func devpulsptr = devpuls;
__device__ func devminusptr = devminus;

size_t n = 10;
size_t bufferSize = n * sizeof(double);
double* hosta = new double[n],
        * hostb = new double[n],
        * hostc = new double[n];
   
for (size_t i = 0; i < n; ++i) {
        hosta[i] = static_cast<double>(i);
        hostb[i] = static_cast<double>(i * 2);
}
double* devA, * devB, * devC;

cudaCallableFunctionPointer<func> pulsptr(&devpulsptr);
cudaMalloc((void**)&devA, bufferSize);
cudaMalloc((void**)&devB, bufferSize);
cudaMalloc((void**)&devC, bufferSize);

CUdeviceptr ddX, ddY, ddOut, dsize, ddfunc, ;
ddX         = (CUdeviceptr)devA;
ddY         = (CUdeviceptr)devB;
ddOut       = (CUdeviceptr)devC;
ddfunc      = (CUdeviceptr)pulsptr.ptr;
dsize       = (CUdeviceptr)n;

/////////////////////////////

~
~
~
void *kernelParams[] = { &ddX, &ddY, &ddOut, &dsize ,&ddfunc };

cuLaunchKernel(kernel, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra);

Topic		Replies	Views
Warp Invalid PC, device function pointer CUDA Programming and Performance	4	1114	May 29, 2019
device function pointers CUDA Programming and Performance	0	571	January 25, 2012
How to copy a host function pointer to device in CUDA CUDA Programming and Performance cuda	6	774	July 10, 2024
Passing a function ptr CUDA Programming and Performance cuda	2	598	April 14, 2021
Function pointers crashing kernel calls CUDA Programming and Performance	1	2920	August 8, 2011
A pointer to a function CUDA Programming and Performance	7	1426	May 13, 2016
How can I use __device__ function pointer in CUDA ? CUDA Programming and Performance	34	60710	June 3, 2020
Is this correct way to code function pointers? CUDA Programming and Performance	4	2531	March 12, 2009
Device function pointers: Is it possible to use them in a useful way? CUDA Programming and Performance	16	9199	May 20, 2020
Array of function pointers assignment CUDA Programming and Performance	7	1082	March 23, 2022

How to pass a NVRTC-compiled device function pointer to an already compiled device function?

nvrtcCreateProgram buffer

header

main

Does the code look plausible? I’d appreciate any feedback.

Related topics