I tried this,
int main(int argc, char *argv[]){
float cpu_time;
cudaEvent_t start,end;
gpuErrchk(cudaEventCreate(&start));
gpuErrchk(cudaEventCreate(&end));
gpuErrchk(cudaEventRecord(start,0));
tracemain(atoi(argv[1]));
//tracemain() contains functions
//createContext(), createMaterial(), createGeometry() ...
//and a function that used to be the main function launching optix
gpuErrchk(cudaEventRecord(end,0));
gpuErrchk(cudaEventSynchronize(end));
gpuErrchk(cudaEventElapsedTime(&cpu_time,start,end));
printf("time elapsed: %g us\n", cpu_time*1000);
printf("time cost %g us\n",cpu_time);
return 0;
}
But I got error “invalid resource handle” at line
gpuErrchk(cudaEventRecord(end,0));
Why does this occur?
I also tried
int main(int argc, char *argv[]){
clock_t tstart, tend;
float cpu_time;
tstart = clock();
tracemain(atoi(argv[1]));
cudaDeviceSynchronize();
tend = clock();
cpu_time = (float)(tend-tstart)/CLOCKS_PER_SEC*1000000.f;
printf("time cost %g us\n",cpu_time);
return 0;
}
It works well. But I’m not sure how cudaDeviceSynchronize() affects program performance.
Any ideas?
Thanks!