Hi,
I have the following code, and I would like to understand why I need to copy the numbers from GPU to CPU in order to print d_ox. If I don’t copy it back, the program aborts the execution. What I am trying to do is to generate the random numbers on GPU for future use on my kernel. So I think it is not necessary to use the numbers on CPU.
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <curand.h>
#define CUDA_CALL(x) do { if((x)!=cudaSuccess) {
printf(“Error at %s:%d\n”,FILE,LINE);
return EXIT_FAILURE;}} while(0)
#define CURAND_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) {
printf(“Error at %s:%d\n”,FILE,LINE);
return EXIT_FAILURE;}} while(0)
int main(int argc, char *argv)
{
size_t n = 10000;
size_t i;
curandGenerator_t gen;
double *devData, *hostData, *d_ox;
cudaMallocManaged(&d_ox,n*sizeof(double));
d_ox[0]=1.0;
/* Allocate n floats on host */
hostData = (double *)calloc(n, sizeof(double));
/* Allocate n floats on device */
CUDA_CALL(cudaMalloc((void **)&devData, n*sizeof(double)));
/* Create pseudo-random number generator */
CURAND_CALL(curandCreateGenerator(&gen,
CURAND_RNG_PSEUDO_MT19937));
/* Set seed */
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen,
1234ULL));
/* Generate n floats on device */
CURAND_CALL(curandGenerateUniformDouble(gen, devData, n));
/* Copy device memory to host */
//If I comment/remove the code below the program aborts
CUDA_CALL(cudaMemcpy(hostData, devData, n * sizeof(double),
cudaMemcpyDeviceToHost));
// /* Show result */
// for(i = 0; i < n; i++) {
// printf(“%1.4f “, hostData[i]);
// }
printf(”\n”);
printf("%lf\n",d_ox[0]);
/* Cleanup */
CURAND_CALL(curandDestroyGenerator(gen));
CUDA_CALL(cudaFree(devData));
free(hostData);
cudaFree(d_ox);
return EXIT_SUCCESS;
}
Thanks