CUDA Bug report

I seem to be encountering a CUDA bug:

[codebox]#include <stdio.h>

#include <unistd.h>

void checkCUDAError() {

cudaThreadSynchronize();

cudaError_t error = cudaGetLastError();

if (error != cudaSuccess) {

fprintf(stdout, "CUDA error: %s\n", cudaGetErrorString(error));

exit(1);

}

}

device float val(const float p[2]) {

float dx = p[0];

float r2 = dx*dx;

dx = p[1];

r2 += dx*dx;

float rv;

if (2.0f+r2 < 1.0f) {rv = -1.0f;} else {rv = 0.0f;}

return -rv;

}

global void doCalc(float *results) {

float p[2];

p[0] = 0;

p[1] = 0;

results[0] = val(p);

return;

}

int main(int argc, char** argv) {

float* d_results;

cudaMalloc((void**)&d_results, sizeof(float));

checkCUDAError();

doCalc<<<1, 1>>>(d_results);

checkCUDAError();

float *h_results = new float[1];

cudaMemcpy(h_results, d_results, sizeof(float), cudaMemcpyDeviceToHost);

checkCUDAError();

cudaFree(d_results);

fprintf(stdout, “result: %f\n”, h_results[0]);

return 0;

}[/codebox]

The result should be 0 (or -0), but it’s 1. I see this behavior with toolkit versions 2.2 and 2.3 and driver versions 185.18.36 and 190.32 (beta). My system is a quad-core (2.83GHz) running RHEL 5.4. The GPU is a GTX295.

I have strange result too with your code:

-bash-3.2$ nvcc test.cu
-bash-3.2$ ./a.out
result: 1.000000
-bash-3.2$ nvcc test.cu -G
-bash-3.2$ ./a.out
result: -0.000000

But when you put the device function code directly in the kernel, it works properly.

It’s a bug–reported to compiler guys.

After a new test it seems also that the inline key word helps for the device function…
That is strange because i thought that nvcc automatically inlined device function

yes… unless you try this as your calling function:

[codebox]global void doCalc(float *results) {

float p[1][2];

p[0][0] = 2;

p[0][1] = 0;

results[0] = val(p[0]);

}[/codebox]

that works for me:

[codebox]device inline float val(const float p[2]) {

float dx = p[0];

float r2 = dx*dx;

dx = p[1];

r2 += dx*dx;

float rv;

if (2.0f+r2 < 1.0f) {rv = -1.0f;} else {rv = 0.0f;}

return -rv;

}

global void doCalc(float *results) {

float p[1][2];

p[0][0] = 2;

p[0][1] = 0;

results[0] = val(p[0]);

return;

}[/codebox]

the execution return -0.00000