Hell all,
I try to use flag to check whether the kernel execution is finished or not.
Following is my code.
bool* isFinished ;
device bool* flagTrue ;
cutilSafeCall( cudaMalloc((void**)&flagTrue, sizeof(bool)) ) ;
cutilSafeCall( cudaMemset(flagTrue, 1, sizeof(bool) ) ) ;
cutilSafeCall( cudaHostAlloc((void**)&isFinished, sizeof(bool), cudaHostAllocDefault ) ) ;
isFinished[0] = false ;
cutilSafeCallNoSync(
cudaMemcpyAsync( isFinished, flagTrue, sizeof(bool), cudaMemcpyDeviceToHost, 0 ) ) ;
while ( !isFinished[0] ) {
// Waiting loop
}
printf(“!”) ;
But, the isFinished[0] is doesn’t changed to true.
When I use cudaMemcpy, the code works well.
What is the problem of my code? ( I use window 7 and GTX 285 )
-
I found some way to the code run.
That is use cudaEvent.
Before start the code, addfloat elapsedTime ;
cudaEvent_t start, stop ;
cudaEventCreate(&start) ; cudaEventCreate(&stop) ;cudaEventRecord(start, 0) ;
After the code, add
cudaEventRecord(stop,0) ;
cudaEventSynchronize(stop) ;
cudaEventElapsedTime(&elapsedTime, start, stop) ;
printf("Kernel time: %f ms \n", elapsedTime );
cudaEventDestroy(start) ; cudaEventDestroy(stop) ;
Then, the code works.
What is the problem of original code?