I got the code from the forum,it can complie it,but when i run the result is not what we want,does anybody known where is wrong,Thanks!
#include <stdio.h>
typedef float (*op_func_t) (float, float);
device float add_func (float x, float y)
{
return x + y;
}
device float mul_func (float x, float y)
{
return x * y;
}
// Static pointers to device functions
device op_func_t p_add_func = add_func;
device op_func_t p_mul_func = mul_func;
global void kernel( op_func_t op )
{
printf(“Result: %f\n”, ( *op )( 1.0, 2.0 ) );
}
int main()
{
op_func_t h_add_func;
op_func_t h_mul_func;
// Copy device function pointer to host side
cudaMemcpyFromSymbol( &h_mul_func, p_mul_func, sizeof( op_func_t ) );
cudaMemcpyFromSymbol( &h_add_func, p_add_func, sizeof( op_func_t ) );
op_func_t d_myfunc = h_mul_func;
kernel<<<1,1>>>( d_myfunc );
cudaThreadSynchronize();
return EXIT_SUCCESS;
}
I compliled the code on my GTX460 && cuda verison 3.2:
$nvcc -arch=sm_21 -o 1 test.cu
$./1
$0
The result is 0 but What we want is 1.0*2.0 = 2.0
I do not know where is wrong does anyboy hele me,thanks!