class cudaStopWatch { private: cudaStream_t stream; /* the stream the clocks are placed in */ cudaEvent_t * eventStack; /* this eventStack holds all timers */ unsigned int * timerStack; unsigned short int maxEvents; /* maximum number of timers */ unsigned short int curEvent; /* points to current watch */ unsigned short int maxTimers; /* maximum number of timers */ unsigned short int curTimer; /* points to current watch */ float last; /* holds the last time */ public: /* ctor - init with maximum number of concurrent timers optional: specify stream (default 0) */ cudaStopWatch(unsigned short int Max, cudaStream_t Stream = 0) { stream = Stream; maxEvents = Max; maxTimers = Max; curEvent = 0; last = -1.0f; eventStack = (cudaEvent_t *) malloc(sizeof(cudaEvent_t) * maxEvents); timerStack = (unsigned int *) malloc(sizeof(unsigned int) * maxTimers); } ~cudaStopWatch() /* dtor - free eventStack */ { free(eventStack); } void tStart() /* start a new timer */ { if(curEvent < maxEvents) { cudaThreadSynchronize(); cutCreateTimer(&timerStack[curTimer]); cutStartTimer(timerStack[curTimer]); curTimer++; } } float tStop() /* stop the last timer */ { if(curTimer > 0 && curTimer <= maxTimers) { curTimer--; cudaThreadSynchronize(); cutStopTimer(timerStack[curTimer]); last = cutGetTimerValue(timerStack[curTimer]); cutDeleteTimer(timerStack[curTimer]); return last; } return -1.0f; } void sStart() /* start a new stream timer */ { if(curEvent < maxEvents) { cudaEventCreate(&eventStack[curEvent]); cudaEventRecord(eventStack[curEvent], stream); curEvent++; } } float sStop() /* stop the last stream timer */ { if(curEvent > 0 && curEvent <= maxEvents) { curEvent--; cudaEvent_t stop; cudaEventCreate(&stop); cudaEventRecord(stop, stream); cudaEventSynchronize(stop); cudaEventElapsedTime(&last, eventStack[curEvent], stop); return last; } return -1.0f; } float getLast() /* get the last time */ { return last; } };