Allow kernel to wait for completion of gpu code

Is there a way to have the kernel pause after executing a function on the gpu and allow it to complete?

Basically thats indeed what you must do :) (unless you’re doing async invocations…)

You have 3 options:

  1. Call cudaMemcpy (or the like) as you’ll probably want to copy back the results from the kernel back to the CPU.

    This will call the cudaThreadSynchronize() method for you.

  2. Call cudaThreadSynchronize() which will block till the kernel exits.

  3. People usually write some MACRO to do this:

    #  define GPU_CHECK_ERR( iDeviceId, errorMessage ) do {                     \
    
    	cudaError_t err = cudaGetLastError();                                    \
    
    	char buff[ 1000 ];														 \
    
    	if( cudaSuccess != err) {                                                \
    
    	sprintf_s( buff, "Cuda error: %s in file '%s' in line %i : %s[%d].\n",   \
    
    	errorMessage, __FILE__, __LINE__, cudaGetErrorString( err), err );		 \
    
    	printf( buff );															\
    
    	LogGPUData( iDeviceId, pGPULogFile, buff );								\
    
    	exit(EXIT_FAILURE);													 \
    
    	}                                                                        \
    
    	err = cudaThreadSynchronize();                                           \
    
    	if( cudaSuccess != err) {                                                \
    
    	sprintf_s( buff, "Cuda error: %s in file '%s' in line %i : %s[%d].\n",   \
    
    	errorMessage, __FILE__, __LINE__, cudaGetErrorString( err), err );		 \
    
    	printf( buff );															\
    
    	LogGPUData( iDeviceId, pGPULogFile, buff );														 \
    
    	exit(EXIT_FAILURE);														 \
    
    	} } while (0)
    

eyal