After the code run for 4 iterations,I am getting this error.
Fatal error: kernel1 (the launch timed out and was terminated at …/simple.cu:498)
*** FAILED - ABORTING
I am calling kernels in do while loop. Part of code inside the while loop is as follows,
CUDA_SAFE_CALL(cudaMemcpy(u, hu, (sizeT), cudaMemcpyHostToDevice),__LINE__);
CUDA_SAFE_CALL(cudaMemcpy(du, hdu, (sizeT), cudaMemcpyHostToDevice),__LINE__);
DisU<<<dimGrid,dimBlock>>>(uo,vo,wo,u,du,p);
cudaThreadSynchronize();
cudaCheckErrors("kernel1");
// CUDA_SAFE_CALL(cudaMemcpy( hdu, du,(sizeT), cudaMemcpyDeviceToHost),__LINE__);
// CUDA_SAFE_CALL(cudaMemcpy( hu, u,(sizeT), cudaMemcpyDeviceToHost),__LINE__);
CUDA_SAFE_CALL(cudaMemcpy(v, hv, (sizeT), cudaMemcpyHostToDevice),__LINE__);
CUDA_SAFE_CALL(cudaMemcpy(dv, hdv, (sizeT), cudaMemcpyHostToDevice),__LINE__);
DisV<<<dimGrid,dimBlock>>>(uo,vo,wo,v,dv,p);
cudaDeviceSynchronize();
cudaCheckErrors("kernel2");
CUDA_SAFE_CALL(cudaMemcpy(dw, hdw, (sizeT), cudaMemcpyHostToDevice),__LINE__);
CUDA_SAFE_CALL(cudaMemcpy(w, hw, (sizeT), cudaMemcpyHostToDevice),__LINE__);
DisW<<<dimGrid,dimBlock>>>(uo,vo,wo,w,dw,p);
cudaDeviceSynchronize();
cudaCheckErrors("kernel3");
please help.