CUDA Grid Launch Failed : CUDA grid launch failed: CUcontext: 2038968216304 CUmodule: 2039055847792 ...

Hi Guys,

I have been trying to run the sobel edge detection algorithm using Visual studio Nsight edition. I am unable to debug it using Next-Gen.

Can anyone please tell me what is going wrong with it.

I am running it on

Windows 10
CUDA 10.1
Visual Studio 2017
Nvidia 940 MX
Nvidia Nsight Visual Studio Edition 2019
Graphics Drivers 430.39

Thanks

The error that I am encountering while I try to debug the code using legacy debugging is as stated above. It says the CUDA grid launch failed everytime I am trying to do so. Can anyone please help me with this.

The Next-Gen CUDA Debugger only supports Pascal and later GPUs.
The 940MX (Maxwell) architecture precedes the Pascal architecture and is not currently supported by the Next-Gen CUDA Debugger.

The Legacy CUDA Debugger, does support Maxwell GPUs and should work well for you.
Try running your program with cuda-memcheck to help identify problems.
Could you supply some sample code that reproduces the problem?

Note that we are working to extend the Next-Gen support to Maxwell, and hope to have it in an upcoming release.

Thank you rbischof,

I have been trying to use Legacy but even with it I seem to be getting the same error.

Here is what I am trying to do.

#include
#include <cuda_runtime.h>
#include <cuda.h>
#include “device_launch_parameters.h”
#include “Header.h”
#include <math.h>

//#define CUDA_CALL(x) {cudaError_t cuda_error__ = (x); if (cuda_error__) printf(“CUDA error: " #x " returned “%s”\n”, cudaGetErrorString(cuda_error__));}
#define GRIDVAL 20.0
global void sobelfilter_kernel(int iw, int ih, unsigned char source, unsigned char dest)
{
int x = (blockIdx.x + blockDim.x) + threadIdx.x;
int y = (blockIdx.y + blockDim.y) + threadIdx.y;
int gx, gy;
if (x > 0 && x < iw - 1 && y > 0 && y < ih - 1)
{
gx = -source[iw
(y - 1)
(x - 1)] + source[iw*(y - 1)(x + 1)] +
-2 * source[iw
y*(x - 1)] + 2 * source[iwy(x + 1)] +
-source[iw*(y + 1)(x - 1)] + source[iw(y + 1)(x + 1)];
gy = -source[ih
(y - 1)(x - 1)] - 2 * source[ih(y - 1)x] + -source[ih(y - 1)(x + 1)] +
source[ih
(y + 1)(x - 1)] + source[ih(y + 1)(x)] + source[ih(y + 1)(x + 1)];
dest[iw
yx] = (int)sqrt((float)(gx)(float)(gx)+(float)(gy)*(float)(gy));

}

}

void sobelfilter(int iw, int ih, unsigned char *source, unsigned char *dest)
{
unsigned char *dev_source, *dev_dest;
cudaHostGetDevicePointer(&dev_source, source, 0);
cudaHostGetDevicePointer(&dev_dest, dest, 0);

dim3 threadsPerBlock(GRIDVAL, GRIDVAL, 1);
dim3 numBlocks(ceil(iw / GRIDVAL), ceil(ih / GRIDVAL), 1);

sobelfilter_kernel <<<numBlocks, threadsPerBlock>>> (iw, ih, dev_source, dev_dest);
cudaThreadSynchronize();

}

unsigned char* createImageBuffer(unsigned int bytes)
{
unsigned char *ptr = NULL;
cudaSetDeviceFlags(cudaDeviceMapHost);
cudaHostAlloc(&ptr, bytes, cudaHostAllocMapped);

return ptr;

}

void destroyImageBuffer(unsigned char* bytes)
{
cudaFreeHost(bytes);
}

What changes do I have to make to this code in order to successfully launch CUDA grid