Regarding the CUDA port for the stream benchmark, could someone please explain the purpose for skipping the first iteration when calculating the average, min, and max times?
Here is the relevant section of code from mfatica’s port:
[codebox]/* — MAIN LOOP — repeat test cases NTIMES times — */
scalar=3.0f;
for (k=0; k<NTIMES; k++)
{
times[0][k]= mysecond();
STREAM_Copy<<<dimGrid,dimBlock>>>(d_a, d_c, N);
cudaThreadSynchronize();
times[0][k]= mysecond() - times[0][k];
times[1][k]= mysecond();
STREAM_Scale<<<dimGrid,dimBlock>>>(d_b, d_c, scalar, N);
cudaThreadSynchronize();
times[1][k]= mysecond() - times[1][k];
times[2][k]= mysecond();
STREAM_Add<<<dimGrid,dimBlock>>>(d_a, d_b, d_c, N);
cudaThreadSynchronize();
times[2][k]= mysecond() - times[2][k];
times[3][k]= mysecond();
STREAM_Triad<<<dimGrid,dimBlock>>>(d_b, d_c, d_a, scalar, N);
cudaThreadSynchronize();
times[3][k]= mysecond() - times[3][k];
}
/* — SUMMARY — */
for (k=1; k<NTIMES; k++) /* note – skip first iteration */
{
for (j=0; j<4; j++)
{
avgtime[j] = avgtime[j] + times[j][k];
mintime[j] = MIN(mintime[j], times[j][k]);
maxtime[j] = MAX(maxtime[j], times[j][k]);
}
}
printf(“Function Rate (MB/s) Avg time Min time Max time\n”);
for (j=0; j<4; j++) {
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
printf("%s%11.4f %11.4f %11.4f %11.4f\n", label[j],
1.0E-06 * bytes[j]/mintime[j],
avgtime[j],
mintime[j],
maxtime[j]);
}
[/codebox]