First time cuda user. Do not understand where the numerical error is

I just started experimenting cuda with the following cude

[codebox]

#include “macro.hpp”

#include

#include

#include

//#define double float

//#define double int

int RandomNumber(){return static_cast(rand() % 1000);}

global void sum3(double const* a,

	     double const* b,

	     double const* c,

	     double * result, 

	     unsigned const* n)

{

unsigned i = blockIdx.x;

while(i < (*n))

{

  result[i] = (a[i] + b[i] + c[i]);

}

};

template

void print_array(T const* arr, int size)

{

for(int i = 0; i < size; ++i)

{

  std::cout << arr[i] << std::endl;

}

};

int main()

{

static unsigned size = 1e2;

srand(0);

double* a = new double;

double* b = new double;

double* c = new double;

double* result = new double;

std::generate(a, a+size, RandomNumber);

std::generate(b, b+size, RandomNumber);

std::generate(c, c+size, RandomNumber);

double* ad, bd, cd;

double* resultd;

unsigned * sized;

std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;

std::cout << cudaMalloc((void**) &bd, size*sizeof(double)) << std::endl;

std::cout << cudaMalloc((void**) &cd, size*sizeof(double)) << std::endl;

std::cout << cudaMalloc((void**) &resultd, size*sizeof(double)) << std::endl;

std::cout << cudaMalloc((void**) &sized, sizeof(unsigned)) << std::endl;

cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

// print_array(a, size);

for(int i = 0; i < 1000; ++i)

{

  cudaMemcpy(ad, a, size*sizeof(double), cudaMemcpyHostToDevice);

  cudaMemcpy(bd, b, size*sizeof(double), cudaMemcpyHostToDevice);

  cudaMemcpy(cd, c, size*sizeof(double), cudaMemcpyHostToDevice);      

  sum3<<<size, 1>>>(ad, bd, cd, resultd, sized);

  cudaMemcpy(result, resultd, size*sizeof(double), cudaMemcpyDeviceToHost);

}

#ifdef PRINT

for( int i = 0; i < size; ++i)

{

  std::cout << a[i] << ", "<< b[i] <<"," << c[i] << "," << result[i]<< std::endl;

}

#endif

cudaFree(ad);

cudaFree(bd);

cudaFree(cd);

cudaFree(resultd);

delete a;

delete b;

delete c;

delete result;

return 0;

}

[/codebox]

Compile this on mac book pro without any problem. However when I try to run this I get

[codebox]

930, 22,538,899

691, 832,205,23

415, 655,148,120

872, 876,481,985

761, 909,583,619

841, 104,466,917

610, 635,911,52

//more useless numbers

[/codebox]

I have compared my samples with the one in Cuda By Example and I dont see any major difference except type. Any pointer on this problem is appreciated.

It is probably because your GPU doesn’t support double precision.

It is probably because your GPU doesn’t support double precision.