Can anyone tell me why generating random numbers with CURAND is much slower (100times) than MATLAB’s randn? Or am I doing something wrong?

Thank you.

My code:

N2=512*512;

float *q;

cudaMalloc( (void **) &q,sizeof(float)*N2);

curandGenerator_t gen;

curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT);

for(int j=0;j<100;j++) {

curandSetPseudoRandomGeneratorSeed(gen, (unsigned long int)j*1000);

curandGenerateNormal(gen, q, N2, 0.0, 1.0);

…

}

curandDestroyGenerator(gen);