Is there a way to have the kernel pause after executing a function on the gpu and allow it to complete?
Basically thats indeed what you must do :) (unless you’re doing async invocations…)
You have 3 options:
-
Call cudaMemcpy (or the like) as you’ll probably want to copy back the results from the kernel back to the CPU.
This will call the cudaThreadSynchronize() method for you.
-
Call cudaThreadSynchronize() which will block till the kernel exits.
-
People usually write some MACRO to do this:
# define GPU_CHECK_ERR( iDeviceId, errorMessage ) do { \ cudaError_t err = cudaGetLastError(); \ char buff[ 1000 ]; \ if( cudaSuccess != err) { \ sprintf_s( buff, "Cuda error: %s in file '%s' in line %i : %s[%d].\n", \ errorMessage, __FILE__, __LINE__, cudaGetErrorString( err), err ); \ printf( buff ); \ LogGPUData( iDeviceId, pGPULogFile, buff ); \ exit(EXIT_FAILURE); \ } \ err = cudaThreadSynchronize(); \ if( cudaSuccess != err) { \ sprintf_s( buff, "Cuda error: %s in file '%s' in line %i : %s[%d].\n", \ errorMessage, __FILE__, __LINE__, cudaGetErrorString( err), err ); \ printf( buff ); \ LogGPUData( iDeviceId, pGPULogFile, buff ); \ exit(EXIT_FAILURE); \ } } while (0)
eyal