Hi all,
I was timing my program, and I found a strange behavior using cudaMemcpy:
I first had something like this:
gettimeofday(&t_ini,NULL);
112 cudaMemcpy (nodosExp, nodosExp_d, Nsizeof(int), cudaMemcpyDeviceToHost);
113 gettimeofday(&t_fin,NULL);
114
115 secs = timeval_diff(&t_fin,&t_ini);
116 printf(“Tiempo de ejecucion memcpy 1: %.6g ms.\n\n”,secs1000);
117
118
119 gettimeofday(&t_ini,NULL);
120 cudaMemcpy (Sdec, Sdec_d, mNsizeof(float), cudaMemcpyDeviceToHost);
121 gettimeofday(&t_fin,NULL);
122
123 secs = timeval_diff(&t_fin,&t_ini);
124 printf(“Tiempo de ejecucion memcpy 2: %.6g ms.\n\n”,secs*1000);
getting:
Tiempo de ejecucion memcpy 1: 28.2121 ms.
Tiempo de ejecucion memcpy 2: 0.0162125 ms.
So I though it was something about using integers, but after a few tests, I’ve seen that whatever I copy from device to host in that place, it gives that much time. For example, if I comment the first cudaMemcpy:
/* gettimeofday(&t_ini,NULL);
112 cudaMemcpy (nodosExp, nodosExp_d, Nsizeof(int), cudaMemcpyDeviceToHost);
113 gettimeofday(&t_fin,NULL);
114
115 secs = timeval_diff(&t_fin,&t_ini);
116 printf(“Tiempo de ejecucion memcpy 1: %.6g ms.\n\n”,secs1000);
117 /
118
119 gettimeofday(&t_ini,NULL);
120 cudaMemcpy (Sdec, Sdec_d, mNsizeof(float), cudaMemcpyDeviceToHost);
121 gettimeofday(&t_fin,NULL);
122
123 secs = timeval_diff(&t_fin,&t_ini);
124 printf(“Tiempo de ejecucion memcpy 2: %.6g ms.\n\n”,secs1000);
Now I get:
Tiempo de ejecucion memcpy 2: 28.203 ms.
Any idea why it happens??. Thanks.