Hi all, I was wondering if someone could help me regarding using CudaEvents in combination with using the cudaStreamAddCallback() function. As far as I understand it, the callback function is used to call a host function once the work in the specified stream has finished its tasks.
So my thinking was, I create a struct which has a cudaEvent for start and stop times, and once the relevant task has been completed by the cudaStream we can calculate the time taken for task execution. This however is producing some strange results… and I was wondering if someone could guide me in the right direction.
The line which prints "cpu side is : " prints a value in milliseconds. but when the callBack function is called, and the elapsed time is calculated, it is in the order of (some number) e^-28.
struct timeStamp {
std::basic_string<char> functionId;
cudaEvent_t start;
cudaEvent_t end;
cudaStream_t stream;
float elapsedTimeMilli;
timeStamp(std::basic_string<char> functionid, cudaEvent_t s, cudaEvent_t e): functionId(functionid), start(s), end(e) {}
timeStamp(std::basic_string<char> functionid, cudaStream_t stream): functionId(functionid), stream(stream) {}
};
void streamCallback(cudaStream_t stream, cudaError_t status, void* data){
timeStamp* hostData = static_cast<timeStamp*>(data);
if(status == cudaSuccess) {
cudaEventElapsedTime(&hostData->elapsedTimeMilli, hostData->start, hostData->event);
}
std::cout << "The total time taken for the stream execution is : " << hostData->elapsedTimeMilli << " milliseconds for task "<< std::endl;
}
void ExecuteItem(size_t p, size_t deviceId, std::shared_ptr<cuda_stream> stream) {
float milliseconds;
timeStamp ts = timeStamp(typeid(*this).name(), stream->GetStream());
cudaEventCreate(&ts.start);
cudaEventCreate(&ts.end);
cudaEventRecord(ts.start, ts.stream);
//execute the following function using the stream and device ID
Execute(p, deviceId, stream);
cudaEventRecord(ts.end, ts.stream);
cudaEventSynchronize(ts.end);
cudaStreamAddCallback(ts.stream, streamCallback, (void*)&(ts), 0);
cudaEventElapsedTime(&milliseconds, ts.start,ts.end);
std::cout << "cpu side is : " << milliseconds << std::endl;
}