Exception in a CUDA Program

Hello everybody, i have to do a project in C for CUDA and i don’t understand why it doesn’t works.

I want to convert a picture in grayscale. I just did it on CPU (c++) and i have to do it in CUDA too.

I’m Belgian so i will try to translate some part of the code for a better understanding.

Look at this code :

#include <stdio.h>

#include <cv.h>

#include <highgui.h>

#include <math.h>

#include <cutil_inline.h>

#define LONG 490000

float* h_rouge;

float* h_vert;

float* h_bleu;

float* h_gris;

float* d_rouge;

float* d_vert;

float* d_bleu;

float* d_gris;

void Cleanup(void);

// device code

__global__ void NVG(const float* R, const float* V, const float* B,float* G, int N)

{

    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < N)

	{

        G[i]=((0.299*R[i])+(0.587*V[i])+(0.114*B[i]))/3;      // operation for grayscale  -> (RED * 0.299 + GREEN * 0.587 + BLEU * 0.114) / 3 

	}

}

// Code sur l'host

int main(int argc, char** argv)

{

   //Image de base et niveau de gris

	IplImage* imgori = cvLoadImage("1.jpg");                   // i load a picture in the same directory

	IplImage* img = cvCloneImage(imgori);

	IplImage* imgnvg = cvCreateImage(cvGetSize(img), IPL_DEPTH_8U, 1);  // i create a picture to put the gray values in it

	// conversion en niveau de gris

	int N = LONG; 

	int haut     = imgori->height;  // i put the size of the picture

	int larg     = imgori->width;

	int emplacement = 0;

	int bleu = 0;

	int rouge = 0;

	int vert = 0;

	size_t size = N * sizeof(float);

	uchar* dataimg    = (uchar *)imgnvg->imageData;

	for(int numerolign=0;numerolign<(haut);numerolign++) 

	{

		for(int numerocolonne=0;numerocolonne<(larg);numerocolonne++)   // i traverse the picture 

		{

			bleu = ((uchar *)(img->imageData + numerolign*img->widthStep))[numerocolonne*img->nChannels + 0]; // B

			vert = ((uchar *)(img->imageData + numerolign*img->widthStep))[numerocolonne*img->nChannels + 1]; // G

			rouge = ((uchar *)(img->imageData + numerolign*img->widthStep))[numerocolonne*img->nChannels + 2]; // R

			

			h_bleu[emplacement] = bleu;

			h_vert[emplacement] = vert;           // i put the RGB values in 3 tables on the host 

			h_rouge[emplacement] = rouge;

			emplacement++;

			

		}

	}

//allocating host memory

    h_bleu = (float*)malloc(size);

    if (h_bleu == 0) Cleanup();

    h_vert = (float*)malloc(size);   

    if (h_vert == 0) Cleanup();

    h_rouge = (float*)malloc(size);

    if (h_rouge == 0) Cleanup();

    h_gris = (float*)malloc(size);

    if (h_gris == 0) Cleanup();

//allocating device memory

    cutilSafeCall( cudaMalloc((void**)&d_bleu, size) );

    cutilSafeCall( cudaMalloc((void**)&d_vert, size) );

    cutilSafeCall( cudaMalloc((void**)&d_rouge, size) );

    cutilSafeCall( cudaMalloc((void**)&d_gris, size) );

// copy from host to device

    cutilSafeCall( cudaMemcpy(d_bleu, h_bleu, size, cudaMemcpyHostToDevice) );

    cutilSafeCall( cudaMemcpy(d_vert, h_vert, size, cudaMemcpyHostToDevice) );

    cutilSafeCall( cudaMemcpy(d_rouge, h_rouge, size, cudaMemcpyHostToDevice) );

// kernel

    int threadsPerBlock = 256;

    int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;

    NVG<<<blocksPerGrid, threadsPerBlock>>>(d_rouge, d_vert, d_bleu, d_gris, N);

// copy gray from device to host

    cutilSafeCall( cudaMemcpy(h_gris, d_gris, size, cudaMemcpyDeviceToHost) );

	int gris = 0;

	emplacement = 0;

	for(int numerolign=0;numerolign<(haut);numerolign++) 

	{

		for(int numerocolonne=0;numerocolonne<(larg);numerocolonne++) // i put the gray values in the picture

		{

			gris = h_gris[emplacement];

			dataimg[(numerolign*haut)+numerocolonne]=gris;

			emplacement++;

		}

	}

	// Affichage

    cvNamedWindow( "originale", CV_WINDOW_AUTOSIZE );  //i open a window and i put original picture in it 

    cvShowImage( "originale", img );

	cvNamedWindow( "nvg", CV_WINDOW_AUTOSIZE );   //i open a window and i put gray picture in it 

    cvShowImage( "nvg", imgnvg );

	 cvWaitKey(0);

	

cvDestroyAllWindows();

//free memory

	cvReleaseImage( &imgori );

    cvReleaseImage( &imgnvg );            

	cvReleaseImage( &img );

Cleanup();

	system("PAUSE");  

	

}

void Cleanup(void)

{

    if (d_rouge)

        cudaFree(d_rouge);

    if (d_vert)

        cudaFree(d_vert);

    if (d_bleu)

        cudaFree(d_bleu);

	if (d_gris)

        cudaFree(d_gris);

if (h_rouge)

        free(h_rouge);

    if (h_vert)

        free(h_vert);

    if (h_bleu)

        free(h_bleu);

	 if (h_gris)

        free(h_gris);

}

i copy the error but it’s in french i try to translate it :

Exception non gérée à 0x0080119a dans testcudaimage.exe : 0xC0000005: Violation d’accès lors de l’écriture à l’emplacement 0x00000000.

→ Exception not managed to 0x0080119a in testcudaimage.exe: 0xC0000005: Access violation at the time of the writing to the site 0x00000000.

You are trying to dereference a null pointer inside main() somewhere. Probably nothing to do with CUDA at all, but it is really impossible to say more than that.

Hello, i made big mistakes, i was tired External Image

But now i have got a strange problem. When i want to have a grayscale picture it works but look at my function when i want to apply a Sobel Filter :

__global__ void Nvg(const int* Rouge, const int* Bleu, int* Vert, int* Gris, int* Sobel, int Nbr_pixel, int height, int width)

{

        int i = blockDim.x * blockIdx.x + threadIdx.x;

        int j = 0;

	int k = 0;

	int Gx1 =0;

	int Gx2 =0;

	int Gx3 =0;

	int Gx4 =0;

	int Gx5 =0;

	int Gx6 =0;

	int Gx7 =0;

	int Gx8 =0;

	int Gx9 =0;

	int Gy1 =0;

	int Gy2 =0;

	int Gy3 =0;

	int Gy4 =0;

	int Gy5 =0;

	int Gy6 =0;

	int Gy7 =0;

	int Gy8 =0;

	int Gy9 =0;

	int Gx=0;

	int Gy=0;

	int final=0;

	double dGx = 0;

	double dGy = 0;

	double G = 0;

if (i < Nbr_pixel)

	{

            Gris[i] = ((Rouge[i] * 0.299) + (Bleu[i] * 0.114) + (Vert[i] * 0.587))/3;  

	}

	for(j=1;j<(height-1);j++) 

	{

		for(k=1;k<(width-1);k++)

		{

			Gx1 = Gy1 = Gris[((j-1)*width)+(k-1)];

			Gx2 = Gy2 = Gris[((j-1)*width)+k];

			Gx3 = Gy3 = Gris[((j-1)*width)+(k+1)];

			Gx4 = Gy4 = Gris[(j*width)+(k-1)];

			Gx5 = Gy5 = Gris[(j*width)+k];           // I am shure about this manipulation, i made the same program in C++ 

			Gx6 = Gy6 = Gris[(j*width)+(k+1)];       // and i have to compare with CUDA

			Gx7 = Gy7 = Gris[((j+1)*width)+(k-1)];

			Gx8 = Gy8 = Gris[((j+1)*width)+k];

			Gx9 = Gy9 = Gris[((j+1)*width)+(k+1)];

			Gx=(Gx1*(1))+(Gx2*(0))+(Gx3*(-1))+(Gx4*(2))+(Gx5*(0))+(Gx6*(-2))+(Gx7*(1))+(Gx8*(0))+(Gx9*(-1));

			Gy=(Gy1*(1))+(Gy2*(2))+(Gy3*(1))+(Gy4*(0))+(Gy5*(0))+(Gy6*(0))+(Gy7*(-1))+(Gy8*(-2))+(Gy9*(-1));

			dGx = Gx;

			dGy = Gy;

			G = sqrt(pow(dGx,2)+pow(dGy,2));

			final = G;

			Sobel[(j*width)+k] = final;

		}

	}

}

When i run the program, windows says that he losts display driver and my screen becomes black during few seconds and than windows recovers display drivers and the screen is normal but my program is closed.

Do operations MUST be inside the “if(i < Nbr_pixel)” ?

No one see what’s wrong with it ?