Hi, I am using this code to test transfer velocity and optimizations but I don’t understand why if the data is more than 1340 MB the transfer fail but CUDA doesn’t report any error. I verify the transfer with a comparison between send data and received data.
int main()
{
unsigned int nElements = 400*1024*1024;
const unsigned int bytes = nElements * sizeof(float);
// Declarar variables de HOST
float *Host_enviado, *Host_recibido;
// Declarar variables de DEVICE
float *device_dato;
// Reservar la memoria
Host_enviado = (float*)malloc(bytes); // hos
Host_recibido = (float*)malloc(bytes); // host
checkCuda( cudaMalloc((void**)&device_dato, bytes) ); // device
//Inicializar
for (int i = 0; i < nElements; ++i) Host_enviado[i] = i;
memset(Host_recibido, 0, bytes);
// output device info and transfer size
cudaDeviceProp prop;
checkCuda( cudaGetDeviceProperties(&prop, 0) );
printf("\nDevice: %s\n", prop.name);
printf("Transfer size (MB): %d\n", bytes / (1024 * 1024));
checkCuda( cudaMemcpy(device_dato, Host_enviado, bytes, cudaMemcpyHostToDevice) );
checkCuda( cudaMemcpy(Host_recibido, device_dato, bytes, cudaMemcpyDeviceToHost) );
for (int i = 0; i < nElements; ++i) {
if (Host_enviado[i] != Host_recibido[i]) {
printf("*** transfers failed ***");
break;
}
}
printf("\n");
// liberar la memoria reservadas
cudaFree(device_dato);
free(Host_enviado);
free(Host_recibido);
return 0;
}
Could anyone understand this problem and help me? Thanks very much.