Hi,
I want to know about timing of kernel execution in loop. For Example, I am using following code. I want to get overall running time of all kernel invocations.
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
// execute the kernel
kernel<<< grid, threads >>>(.......);
kernel_1<<< blocksPerGrid, threadsPerBlock>>>(.....);
cudaMemcpy(A, B, mem_size_B, cudaMemcpyDeviceToDevice);
for(k=2;k<v1;k++)
{
// execute the kernel
kernel<<< grid1, threads >>>(.......);
cudaMemcpy(dA, dB, mem_size_B, cudaMemcpyDeviceToDevice);
}
kernel<<< grid2, threads >>>(.........);
cudaMemcpy(......);
//stop timer
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time,start,stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
Regards,
Kashif