I’m having problems with cudaEventQuery not performing as expected in device emulation. The behaviour is that what works correctly with the GPU as target, in device emulation it doesn’t
[codebox]arf@arf-desktop:~/test$ nvcc -deviceemu test.cu
arf@arf-desktop:~/test$ ./a.out
Query unrecorded event: no error
Query recorded but not occured event: no error
Query recorded and occured event: no error
arf@arf-desktop:~/test$ nvcc test.cu
arf@arf-desktop:~/test$ ./a.out
Query unrecorded event: device not ready
Query recorded but not occured event: device not ready
Query recorded and occured event: no error[/codebox]
Basically, cudaEventQuery in device emulation (the above compiled with -deviceemu) always returns cudaSuccess.
The code to reproduce:
[codebox]#include <cuda_runtime_api.h>
#include <assert.h>
#include <stdio.h>
void global emptyKernel(void) {
}
int main(void) {
cudaEvent_t event;
cudaError_t err;
err = cudaSetDevice(0);
assert(cudaSuccess == err);
err = cudaEventCreate(&event);
assert(cudaSuccess == err);
/* Query an event that hasn’t been recorded */
err = cudaEventQuery(event);
printf("Query unrecorded event: \t\t%s\n", cudaGetErrorString(err));
/* Record the event */
err = cudaEventRecord(event, 0);
assert(cudaSuccess == err);
/* Query the event again, we now expect cudaErrorNotReady */
err = cudaEventQuery(event);
printf("Query recorded but not occured event: \t%s\n", cudaGetErrorString(err));
/* Do some work in the stream */
emptyKernel<<< 1, 1>>>();
cudaStreamSynchronize(0);
/* Query the event again, we now expect cudaSuccess */
err = cudaEventQuery(event);
printf("Query recorded and occured event: \t%s\n", cudaGetErrorString(err));
return 0;
}
[/codebox]