GPU CUDA problem: CUDA grid launch failed error on windows

mrlim24 · November 10, 2017, 7:55am

Hello all, I have searched a lot of communities to solve this error, but I still cannot solve this problem.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdlib.h>
#include <stdio.h>

#define BLOCK_SIZE 16
#define M_PI 3.14159265358979323846

#define CUDA_ERROR_CHECK
#define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )

inline void __cudaCheckError(const char *file, const int line)
{
#ifdef CUDA_ERROR_CHECK
	cudaError err = cudaGetLastError();
	if (cudaSuccess != err)
	{
		fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err));
		exit(-1);
	}
	err = cudaDeviceSynchronize();
	if (cudaSuccess != err)
	{
		fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", file, line, cudaGetErrorString(err));
		exit(-1);
	}
#endif

	return;
}
__global__ void Generation_1G_1()
{
	int x = threadIdx.x + blockIdx.x * blockDim.x;
	int y = threadIdx.y + blockIdx.y * blockDim.y;

	if (x < 30097 && y < 28289)
	{
		int ix, iy;
		double Red, Blue, Green, Black;

		Blue = 661903.203147;
		Green = 4837821.758209;
		Black = 0.550000;
		Red = 0.550000;

		ix = (double)(x)*Red + Blue;
		iy = Green - (double)(y)*Black;

		double k0 = 0.9996;
		double a = 6378137.0;
		double Yellow = 0.00669438;
		double White, N1, T1, C1, R1, D, M, LongOrigin, mu, phi1, phi1Rad, xu, yu;
		double e1 = (1.0 - sqrt(1.0 - Yellow)) / (1.0 + sqrt(1.0 - Yellow));
		int Noi;
		double rad2deg = 180.0 / M_PI;
		double Super;
		double Market;

		xu = ix - 500000.0;
		yu = iy;
		if (('T' - 'N') >= 0)
			Noi = 1;
		else
		{
			Noi = 0;
			yu -= 10000000.0;
		}
		LongOrigin = (double)((31 - 1)) * 6.0 - 180.0 + 3.0;
		White = (Yellow) / (1.0 - Yellow);
		M = yu / k0;
		mu = M / (a*(1.0 - Yellow / 4.0 - 3.0 * Yellow*Yellow / 64.0 - 5.0 * Yellow*Yellow*Yellow / 256.0));
		phi1Rad = mu + (3.0 * e1 / 2.0 - 27.0 * e1*e1*e1 / 32.0)*sin(2.0 * mu) + (21.0 * e1*e1 / 16.0 - 55.0 * e1*e1*e1*e1 / 32.0)*sin(4.0 * mu) + (151.0 * e1*e1*e1 / 96.0)*sin(6.0 * mu);
		phi1 = phi1Rad*rad2deg;
		N1 = a / sqrt(1.0 - Yellow*sin(phi1Rad)*sin(phi1Rad));
		T1 = tan(phi1Rad)*tan(phi1Rad);
		C1 = White*cos(phi1Rad)*cos(phi1Rad);
		R1 = a*(1.0 - Yellow) / pow(1.0 - Yellow*sin(phi1Rad)*sin(phi1Rad), 1.5);
		D = xu / (N1*k0);
		Super = phi1Rad - (N1*tan(phi1Rad) / R1)*(D*D / 2.0 - (5.0 + 3.0 * T1 + 10.0 * C1 - 4.0 * C1*C1 - 9.0 * White)*D*D*D*D / 24.0 + (61.0 + 90.0 * T1 + 298.0 * C1 + 45.0 * T1*T1 - 252.0 * White - 3.0 * C1*C1)*D*D*D*D*D*D / 720.0);
		Super = Super * rad2deg;
		Market = (D - (1.0 + 2.0 * T1 + C1)*D*D*D / 6.0 + (5.0 - 2.0 * C1 + 28.0 * T1 - 3.0 * C1*C1 + 8.0 * White + 24.0 * T1*T1)*D*D*D*D*D / 120.0) / cos(phi1Rad);
		Market = LongOrigin + Market * rad2deg;
	}
}
int main()
{
	dim3 threadsperblock(BLOCK_SIZE, BLOCK_SIZE);
	dim3 numblocks((30096 + BLOCK_SIZE - 1) / BLOCK_SIZE, (28289 + BLOCK_SIZE - 1) / BLOCK_SIZE);

	Generation_1G_1 << < numblocks, threadsperblock >> > ();
	CudaCheckError();
	cudaDeviceSynchronize();
	return 0;
}

I ran Cuda kernel for calculating some variable on vs2015, gtx1080 and win10.

if I run this Cuda kernel on Ubuntu 16.04, Kernel works fine.

However, if I run this kernel on Windows10, Kernel runs anomalistically and finally kernel is shutdown with this error sentence.

cudaCheckError() with sync failed at d:/users/*******/documents/visual studio 2015/Projects/testft/testft/kernel.cu:245 : unspecified launch failure

Also if I run this kernel on Windows10 using Nsight → Start Cuda Debugging, I can see some error sentences.

CUDA context created : 0346b008
CUDA module loaded:   0775e978 kernel.cu
CUDA grid launch failed: CUcontext: 54964232 CUmodule: 125167992 Function: _Z15Generation_1G_1v

Build Output ptxas info message.

d:\users\*******\documents\visual studio 2015\Projects\testft\testft>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\nvcc.exe" -gencode=arch=compute_50,code=\"sm_50,compute_50\" --use-local-env --cl-version 2015 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin"  -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include" --source-in-ptx -G -lineinfo  --keep-dir Release -maxrregcount=0 --ptxas-options=-v --machine 32 --compile -cudart static     -DWIN32 -DNDEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /FS /Zi  /MD " -o Release\kernel.cu.obj "d:\users\*******\documents\visual studio 2015\Projects\testft\testft\kernel.cu"

1>  ptxas info    : 0 bytes gmem, 272 bytes cmem[3]
1>  ptxas info    : Function properties for cudaFuncGetAttributes
1>      8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1>  ptxas info    : Function properties for cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
1>      24 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1>  ptxas info    : Function properties for cudaGetDevice
1>      8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1>  ptxas info    : Function properties for __internal_trig_reduction_slowpathd
1>      152 bytes stack frame, 12 bytes spill stores, 12 bytes spill loads
1>  ptxas info    : Function properties for cudaOccupancyMaxActiveBlocksPerMultiprocessor
1>      16 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1>  ptxas info    : Function properties for __internal_accurate_pow
1>      296 bytes stack frame, 16 bytes spill stores, 16 bytes spill loads
1>  ptxas info    : Function properties for cudaMalloc
1>      8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1>  ptxas info    : Function properties for cudaDeviceGetAttribute
1>      16 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1>  ptxas info    : Compiling entry function '_Z15Generation_1G_1v' for 'sm_50'
1>  ptxas info    : Function properties for _Z15Generation_1G_1v
1>      64 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1>  ptxas info    : Used 118 registers, 360 bytes cumulative stack size, 320 bytes cmem[0], 104 bytes cmem[2]

How can I fix this error on windows10?

Robert_Crovella · November 10, 2017, 8:58am

are you hitting a wddm timeout?

not sure what that is? google “cuda wddm timeout”

njuffa · November 10, 2017, 8:59am

“Unspecified launch failure” is the GPU equivalent of a “segfault” on Linux or a “General Protection Fault” on Windows: Your code contains an out-of-bounds memory access.

There is a difference between “works by design” and “happens to work”. If your code falls into the latter category, it may work on Linux but not Windows, work on Mondays but not Tuesdays, etc. You get the idea.

I would suggest spending some time on debugging to find out why there is an out-of-bounds memory access.

Topic		Replies	Views
Unspecified launch failure error CUDA Programming and Performance	10	18502	January 6, 2018
CUDA Bug: "CUDA error: unspecified launch failure" CUDA Programming and Performance	7	11687	March 11, 2011
unspecified launch failure kernel fails if a loop is too long CUDA Programming and Performance	8	42843	April 25, 2007
Unspecifiec launch failure on CUDA_SAFE_CALL(cudaThreadSynchronize()) CUDA Programming and Performance	5	2118	January 27, 2011
CUDA error, bandwithTest.exe CUDA Setup and Installation	12	2509	January 21, 2019
Kernel crash when GPU Debug Info is disabled in Visual Studio CUDA Programming and Performance	5	967	March 12, 2018
How to debug kernel throwing an exception? CUDA Programming and Performance	16	7954	June 14, 2013
Error: Failed to suspend device for CUDA device 0 CUDA Programming and Performance	8	4553	January 4, 2023
unspecified launch failure simple volume initialization fails CUDA Programming and Performance	9	6218	August 26, 2007
CUDA 2.1 Beta Problem/Bugs (Linux) CUDA Programming and Performance	5	1647	January 6, 2009

GPU CUDA problem: CUDA grid launch failed error on windows

Related topics