when i have 0xffffffff floats in an array, cudaMemcpy fails. Does it have a maximum size that its allowed to copy? cudaMalloc is able to handle that size but not cudaMemcpy?
Thanks in advance
int main(void)
{
float* a_h, b_h; // pointers to host memory
float a_d; // pointer to device memory
int i, N = 0xffffffff;
// allocate arrays on host
a_h = (float*)malloc(sizeof(float)*N);
b_h = (float*)malloc(sizeof(float)*N);
//allocate array on device
CUDA_SAFE_CALL(cudaMalloc((void**)&a_d, sizeof(float)*N));
// initialization of host data
for (i=0; i<N; i++) a_h[i] = (float)i;
//copy data from host to device
CUDA_SAFE_CALL(cudaMemcpy(a_d, a_h, sizeof(float)*N, cudaMemcpyHostToDevice));
// do calculation on host
incrementArrayOnHost(a_h, N);
//check assert to see if we get results expected
for (i=0; i<N; i++) assert(a_h[i] == i+1);
/*
// do calculation on device:
// Part 1 of 2. Compute execution configuration
int blockSize = 4;
int nBlocks = N/blockSize + (N%blockSize == 0?0:1);
// Part 2 of 2. Call incrementArrayOnDevice kernel
incrementArrayOnDevice <<< nBlocks, blockSize >>> (a_d, N);
// Retrieve result from device and store in b_h
cudaMemcpy(b_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// check results
for (i=0; i<N; i++) assert(a_h[i] == b_h[i]);
*/
// cleanup
free(a_h); free(b_h); CUDA_SAFE_CALL(cudaFree(a_d));
system("pause");
}