Hello all, I have searched a lot of communities to solve this error, but I still cannot solve this problem.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdlib.h>
#include <stdio.h>
#define BLOCK_SIZE 16
#define M_PI 3.14159265358979323846
#define CUDA_ERROR_CHECK
#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ )
inline void __cudaCheckError(const char *file, const int line)
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if (cudaSuccess != err)
{
fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err));
exit(-1);
}
err = cudaDeviceSynchronize();
if (cudaSuccess != err)
{
fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", file, line, cudaGetErrorString(err));
exit(-1);
}
#endif
return;
}
__global__ void Generation_1G_1()
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if (x < 30097 && y < 28289)
{
int ix, iy;
double Red, Blue, Green, Black;
Blue = 661903.203147;
Green = 4837821.758209;
Black = 0.550000;
Red = 0.550000;
ix = (double)(x)*Red + Blue;
iy = Green - (double)(y)*Black;
double k0 = 0.9996;
double a = 6378137.0;
double Yellow = 0.00669438;
double White, N1, T1, C1, R1, D, M, LongOrigin, mu, phi1, phi1Rad, xu, yu;
double e1 = (1.0 - sqrt(1.0 - Yellow)) / (1.0 + sqrt(1.0 - Yellow));
int Noi;
double rad2deg = 180.0 / M_PI;
double Super;
double Market;
xu = ix - 500000.0;
yu = iy;
if (('T' - 'N') >= 0)
Noi = 1;
else
{
Noi = 0;
yu -= 10000000.0;
}
LongOrigin = (double)((31 - 1)) * 6.0 - 180.0 + 3.0;
White = (Yellow) / (1.0 - Yellow);
M = yu / k0;
mu = M / (a*(1.0 - Yellow / 4.0 - 3.0 * Yellow*Yellow / 64.0 - 5.0 * Yellow*Yellow*Yellow / 256.0));
phi1Rad = mu + (3.0 * e1 / 2.0 - 27.0 * e1*e1*e1 / 32.0)*sin(2.0 * mu) + (21.0 * e1*e1 / 16.0 - 55.0 * e1*e1*e1*e1 / 32.0)*sin(4.0 * mu) + (151.0 * e1*e1*e1 / 96.0)*sin(6.0 * mu);
phi1 = phi1Rad*rad2deg;
N1 = a / sqrt(1.0 - Yellow*sin(phi1Rad)*sin(phi1Rad));
T1 = tan(phi1Rad)*tan(phi1Rad);
C1 = White*cos(phi1Rad)*cos(phi1Rad);
R1 = a*(1.0 - Yellow) / pow(1.0 - Yellow*sin(phi1Rad)*sin(phi1Rad), 1.5);
D = xu / (N1*k0);
Super = phi1Rad - (N1*tan(phi1Rad) / R1)*(D*D / 2.0 - (5.0 + 3.0 * T1 + 10.0 * C1 - 4.0 * C1*C1 - 9.0 * White)*D*D*D*D / 24.0 + (61.0 + 90.0 * T1 + 298.0 * C1 + 45.0 * T1*T1 - 252.0 * White - 3.0 * C1*C1)*D*D*D*D*D*D / 720.0);
Super = Super * rad2deg;
Market = (D - (1.0 + 2.0 * T1 + C1)*D*D*D / 6.0 + (5.0 - 2.0 * C1 + 28.0 * T1 - 3.0 * C1*C1 + 8.0 * White + 24.0 * T1*T1)*D*D*D*D*D / 120.0) / cos(phi1Rad);
Market = LongOrigin + Market * rad2deg;
}
}
int main()
{
dim3 threadsperblock(BLOCK_SIZE, BLOCK_SIZE);
dim3 numblocks((30096 + BLOCK_SIZE - 1) / BLOCK_SIZE, (28289 + BLOCK_SIZE - 1) / BLOCK_SIZE);
Generation_1G_1 << < numblocks, threadsperblock >> > ();
CudaCheckError();
cudaDeviceSynchronize();
return 0;
}
I ran Cuda kernel for calculating some variable on vs2015, gtx1080 and win10.
if I run this Cuda kernel on Ubuntu 16.04, Kernel works fine.
However, if I run this kernel on Windows10, Kernel runs anomalistically and finally kernel is shutdown with this error sentence.
cudaCheckError() with sync failed at d:/users/*******/documents/visual studio 2015/Projects/testft/testft/kernel.cu:245 : unspecified launch failure
Also if I run this kernel on Windows10 using Nsight → Start Cuda Debugging, I can see some error sentences.
CUDA context created : 0346b008
CUDA module loaded: 0775e978 kernel.cu
CUDA grid launch failed: CUcontext: 54964232 CUmodule: 125167992 Function: _Z15Generation_1G_1v
Build Output ptxas info message.
d:\users\*******\documents\visual studio 2015\Projects\testft\testft>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\nvcc.exe" -gencode=arch=compute_50,code=\"sm_50,compute_50\" --use-local-env --cl-version 2015 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include" --source-in-ptx -G -lineinfo --keep-dir Release -maxrregcount=0 --ptxas-options=-v --machine 32 --compile -cudart static -DWIN32 -DNDEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /FS /Zi /MD " -o Release\kernel.cu.obj "d:\users\*******\documents\visual studio 2015\Projects\testft\testft\kernel.cu"
1> ptxas info : 0 bytes gmem, 272 bytes cmem[3]
1> ptxas info : Function properties for cudaFuncGetAttributes
1> 8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1> ptxas info : Function properties for cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
1> 24 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1> ptxas info : Function properties for cudaGetDevice
1> 8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1> ptxas info : Function properties for __internal_trig_reduction_slowpathd
1> 152 bytes stack frame, 12 bytes spill stores, 12 bytes spill loads
1> ptxas info : Function properties for cudaOccupancyMaxActiveBlocksPerMultiprocessor
1> 16 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1> ptxas info : Function properties for __internal_accurate_pow
1> 296 bytes stack frame, 16 bytes spill stores, 16 bytes spill loads
1> ptxas info : Function properties for cudaMalloc
1> 8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1> ptxas info : Function properties for cudaDeviceGetAttribute
1> 16 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1> ptxas info : Compiling entry function '_Z15Generation_1G_1v' for 'sm_50'
1> ptxas info : Function properties for _Z15Generation_1G_1v
1> 64 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
1> ptxas info : Used 118 registers, 360 bytes cumulative stack size, 320 bytes cmem[0], 104 bytes cmem[2]
How can I fix this error on windows10?