Calling Cuda Kernel as arguement to another function Passing Kernel As arguement


I have been trying out to pass cuda kernel as arguement to another function, but I seem to getting errors. Here is my code

// Kernel Def

__global__ void tmp(int a,int b)


// Doing Something


// Call Func Def

void Func(void(*kernel),int Nob, int BlkSz,int a, int b,cudaStream_t stream)




// main

cudaStream_t stream;


But in this I’m getting some errors,

error: expression must be a pointer to a complete object type

error: argument of type “cudaStream_t” is incompatible with parameter of type “size_t”

So, if anybody has worked on something like this, please help.



Well, actually it seems to work:

#include <cuda.h>

#include <stdio.h>

__global__ void foo(int* a) {



void Call(void(*func)(int*), int Nob, int BlkSz, int* a, cudaStream_t stream) {

    (*func)<<<Nob, BlkSz, 0, stream>>>(a);


int main() {

    int *a, b;

    cudaMalloc(&a, sizeof(int));

    Call(foo, 1, 1, a, 0);

    cudaMemcpy(&b, a, sizeof(int), cudaMemcpyDeviceToHost);

    printf("result is %d\n", b);


    return 0;

$ nvcc -o foo

$ ./foo 

result is 1

I guess that the important bits are to fully define your function pointer (argument types included) and not to forget the shared memory allocated (the 0 as third argument).