How can I use __device__ function pointer in CUDA ?

[Clarified the wording of the initial paragraph in a later edit – nj]

Function pointers for device functions are supported in CUDA 3.2 on sm_2x platforms, based on the ABI that was introduced with CUDA 3.1. Here is a very simple app that shows that everything works exactly the same as function pointers in host code.

#include <stdio.h>

#include <stdlib.h>

#define N 5

__device__ float add_func (float x, float y)

{

    return x + y;

}

__device__ float mul_func (float x, float y)

{

    return x * y;

}

__device__ float div_func (float x, float y)

{ 

    return x / y;

}

typedef float (*op_func) (float, float);

__device__ op_func func[3] = { add_func, mul_func, div_func };

__device__ char* op_name[3] = { "add", "mul", "div" };

__device__ void op_array (const float *a, const float *b, float *res, int op, int n)

{

    for (int i = 0; i < N; i++) {

        res[i] = func[op](a[i], b[i]);

    }

}

__global__ void kernel (void)

{

    float x[N];

    float y[N];

    float res[N];

for (int i = 0; i < N; i++) {

        x[i] = (float)(10 + i);

    }

    for (int i = 0; i < N; i++) {

        y[i] = (float)(100 + i);

    }

for (int op = 0; op < 3; op++) {

        printf ("\nop=%s\n", op_name[op]);

        op_array (x, y, res, op, N);

        for (int i = 0; i < N; i++) {

            printf ("res = % 16.9e\n", res[i]);

        }

    }

}

int main (void) 

{

    kernel<<<1,1>>>();

    cudaThreadSynchronize();

    return EXIT_SUCCESS;

}

The build and run log (Linux64, C2050) looks as follows:

~ $ nvcc -arch=sm_20 -o funcptr funcptr.cu

~ $ funcptr

op=add

res =  1.100000000e+02

res =  1.120000000e+02

res =  1.140000000e+02

res =  1.160000000e+02

res =  1.180000000e+02

op=mul

res =  1.000000000e+03

res =  1.111000000e+03

res =  1.224000000e+03

res =  1.339000000e+03

res =  1.456000000e+03

op=div

res =  1.000000015e-01

res =  1.089108884e-01

res =  1.176470593e-01

res =  1.262135953e-01

res =  1.346153915e-01