'm experiencing an inconsistent behaviour while running my code using Cuda 8 on a GTX 1080. If I try to run this code on my GTX 1080 (Ubuntu 16.04, Cuda 8)
__global__
void setValue(int *ptr, int index, int val)
{
ptr[index] = val;
}
void foo(int size)
{
int *data;
CudaSafeCall(cudaMallocManaged(&data, size*sizeof(int)));
memset(data, 0, size*sizeof(int));
setValue<<<1,1>>>(data, size/2, 5);
CudaCheckError();
CudaSafeCall(cudaDeviceSynchronize());
//useData(data);
for(int i=0;i<size;i++)
printf("%u %u\n",i,data[i]);
CudaSafeCall(cudaFree(data));
}
int main()
{
foo(5);
}
I get the following output
0 0
1 0
2 0
3 0
4 0
while if I run it on a machine with a GeForce GT 740 and Cuda 7 I get
0 0
1 0
2 5
3 0
4 0
I really cannot explain this behavior. I’m checking the CudaErrors but everything is ok even on the faulty code. Any hint about how I can debug this behaviour?