I want to pass a global function as argument to a __global__function. E.g.
global
void a(void)
{
printf(“Hello from a”);
}
global
void b(void)
{
printf(“Hello from b”);
}
// Uses dynamic parallelism
global
kernel_launcher(void(*func)(void))
{
func<<<1,1>>>();
}
int main(int argc, char **argv)
{
assert(argc == 2);
if (strcmp(argv[1], “a”) == 0) {
kernel_launcher<<<1,1>>>(a);
} else {
kernel_launcher<<<1,1>>>(b);
}
}
Is this possible to do? If not, what’s the alternate?
The problem is that the cuda programming guide has this line:
" The address of a global function taken in host code cannot be used in device code (e.g. to launch the kernel)."
If you are using C++, you can create a device lambda which calls the inner kernel, and which is passed to the outer kernel
__global__
void kernel2(int n){
printf("thread %d in kernel2 called by thread %d from kernel1\n", threadIdx.x, n);
}
__global__
void kernel1(){
kernel2<<<1,4>>>(threadIdx.x);
}
__global__
void kernel2_mod(int n, int a){
printf("thread %d in kernel2_mod called by thread %d from kernel1_mod. a = %d\n", threadIdx.x, n, a);
}
template<class Func>
__global__
void kernel1_mod(Func f){
f(threadIdx.x);
}
int main(){
kernel1<<<1,2>>>();
cudaDeviceSynchronize();
int a = 42;
auto lambda = [=] __device__(int n){
kernel2_mod<<<1,4>>>(n,a);
};
kernel1_mod<<<1,2>>>(lambda);
cudaDeviceSynchronize();
cudaDeviceReset();
}
Compiled with: nvcc -std=c++14 -arch=sm_61 -rdc=true --expt-extended-lambda main.cu -o main