Unable to Allocate on Memory and Stack Overflow

Hi

I just started a week ago to program on C++2008 to CUDA

so i tried a simple code

#include <stdio.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <cutil.h>

#include <cutil_inline.h>

#include <windows.h>

#pragma comment(lib, "winmm.lib")

int* a_h;

int* d_A;

void CleanupResources(void);

/*

Function checkCUDAError used to print any error messages that

result from a CUDA API related call. Uses cudaGetLastError to

find the last error that has occured.

This function will need to be called after every CUDA API function

call to accuratly get error messages.

*/

void checkCUDAError(const char *msg) {

	cudaError_t err = cudaGetLastError(); // get the last error that has

										  // been produced by any of the

										 // runtime API calls

	if( cudaSuccess != err) // cudaSuccess is an enum indicating no errors.

	{

		fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err));

	// cudaGetErrorString gives the message string from an error code.

		system("pause");

		exit(EXIT_FAILURE);

	

	}

}

//Codigo GPU

__global__  void VecAdd(const int* A, const int* B, int* C, int N)

{

	int i = threadIdx.x;

	C[i]=A[i]+B[i];

}

//Codigo CPU

int main(int argc, char **argv)

{	

	CUresult result;

	CUdevice device;

	CUcontext context;

	cuInit(0);

	 

	cuDeviceGet(&device, 0);

	printf("GPU Device: %10d\n", device);

	cuCtxCreate(&context, 0, device);

	unsigned int free, total;

	result = cuMemGetInfo(&free, &total);

	cuCtxDetach(context);

	printf("GPU Memory status: %10d %10d\n", free, total);

	system("pause");

	int N = 100;

	size_t size = N * sizeof(int);

	//Alojamiento de los Vectores de Entrada en la Memoria

	a_h = (int*)malloc(size);

	//inicializacion de los Vectores

	for (int i=0; i<N; i++) a_h[i] = i;

	//cudaSetDevice(cutGetMaxGflopsDeviceId());

    cudaSetDeviceFlags(cudaDeviceBlockingSync);

	//cudaSetDevice( cutGetMaxGflopsDeviceId() );

	//alojamiento de los vectores en el GPU

	(cudaMalloc((void**)&a_h,size));

	checkCUDAError("malloc");

	//Copiar vectores del Host a la Memoria del GPU

	(cudaMemcpy(d_A, a_h, size, cudaMemcpyHostToDevice));

	checkCUDAError("memcpy");

	 

	//liberar la Memoria del GPU

	CleanupResources();

	//PlaySound("01-Diver.wav", NULL, SND_FILENAME|SND_SENTRY|SND_ASYNC);

	system("pause");

}

void CleanupResources(void)

{

    // Free device memory

cudaFree(d_A);

// Free host memory

free(a_h);

}

the problem is when i compiled it it give me the error of device kernel image is invalid, and the C++ give me error of CUDAerror on memory…

and when i tried to use the cutil instruction like cutilsafe…, the program crashes, or generate a interrupt… but all cutil and cutil_inline instructions crashes the program.

I’m using a

Pentium D 3.2GHz

Nvidia GeForce 8400GS

Visual Studio 2008

Cuda 4.0

Thanks for your replies

Hi

I just started a week ago to program on C++2008 to CUDA

so i tried a simple code

#include <stdio.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <cutil.h>

#include <cutil_inline.h>

#include <windows.h>

#pragma comment(lib, "winmm.lib")

int* a_h;

int* d_A;

void CleanupResources(void);

/*

Function checkCUDAError used to print any error messages that

result from a CUDA API related call. Uses cudaGetLastError to

find the last error that has occured.

This function will need to be called after every CUDA API function

call to accuratly get error messages.

*/

void checkCUDAError(const char *msg) {

	cudaError_t err = cudaGetLastError(); // get the last error that has

										  // been produced by any of the

										 // runtime API calls

	if( cudaSuccess != err) // cudaSuccess is an enum indicating no errors.

	{

		fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err));

	// cudaGetErrorString gives the message string from an error code.

		system("pause");

		exit(EXIT_FAILURE);

	

	}

}

//Codigo GPU

__global__  void VecAdd(const int* A, const int* B, int* C, int N)

{

	int i = threadIdx.x;

	C[i]=A[i]+B[i];

}

//Codigo CPU

int main(int argc, char **argv)

{	

	CUresult result;

	CUdevice device;

	CUcontext context;

	cuInit(0);

	 

	cuDeviceGet(&device, 0);

	printf("GPU Device: %10d\n", device);

	cuCtxCreate(&context, 0, device);

	unsigned int free, total;

	result = cuMemGetInfo(&free, &total);

	cuCtxDetach(context);

	printf("GPU Memory status: %10d %10d\n", free, total);

	system("pause");

	int N = 100;

	size_t size = N * sizeof(int);

	//Alojamiento de los Vectores de Entrada en la Memoria

	a_h = (int*)malloc(size);

	//inicializacion de los Vectores

	for (int i=0; i<N; i++) a_h[i] = i;

	//cudaSetDevice(cutGetMaxGflopsDeviceId());

    cudaSetDeviceFlags(cudaDeviceBlockingSync);

	//cudaSetDevice( cutGetMaxGflopsDeviceId() );

	//alojamiento de los vectores en el GPU

	(cudaMalloc((void**)&a_h,size));

	checkCUDAError("malloc");

	//Copiar vectores del Host a la Memoria del GPU

	(cudaMemcpy(d_A, a_h, size, cudaMemcpyHostToDevice));

	checkCUDAError("memcpy");

	 

	//liberar la Memoria del GPU

	CleanupResources();

	//PlaySound("01-Diver.wav", NULL, SND_FILENAME|SND_SENTRY|SND_ASYNC);

	system("pause");

}

void CleanupResources(void)

{

    // Free device memory

cudaFree(d_A);

// Free host memory

free(a_h);

}

the problem is when i compiled it it give me the error of device kernel image is invalid, and the C++ give me error of CUDAerror on memory…

and when i tried to use the cutil instruction like cutilsafe…, the program crashes, or generate a interrupt… but all cutil and cutil_inline instructions crashes the program.

I’m using a

Pentium D 3.2GHz

Nvidia GeForce 8400GS

Visual Studio 2008

Cuda 4.0

Thanks for your replies

Hi,

[font=arial, verdana, tahoma, sans-serif]cudaMalloc((void**)&a_h,size) should be cudaMalloc((void**)&d_A,size)[/font]

Hi,

[font=arial, verdana, tahoma, sans-serif]cudaMalloc((void**)&a_h,size) should be cudaMalloc((void**)&d_A,size)[/font]

Hello!

-> Moreover the cudaMalloc() (a_h is a host memory pointer and d_A is a device memory pointer to pass to kernel) problem, i think that you can’t detach your context, that line is incorrect.

Hello!

-> Moreover the cudaMalloc() (a_h is a host memory pointer and d_A is a device memory pointer to pass to kernel) problem, i think that you can’t detach your context, that line is incorrect.

Thanks for your replies, but the probems remains…

It’s like the program cannot initialize the memory gpu for the program.

When i tried with the

cudaSetDevice(cutGetMaxGflopsDeviceId());

the program crashes,

How can allocate the data in the memory.

The error its the same: device kernel image is invalid

Thanks for your time

Thanks for your replies, but the probems remains…

It’s like the program cannot initialize the memory gpu for the program.

When i tried with the

cudaSetDevice(cutGetMaxGflopsDeviceId());

the program crashes,

How can allocate the data in the memory.

The error its the same: device kernel image is invalid

Thanks for your time

Problems Solve

I didnt notice that i was using the PAHT CUDA v4 in one directory and in the Linking properties I was using the CUDA PATH, so i was loaded the cudart 32 making confict with the v4

thanks again

Problems Solve

I didnt notice that i was using the PAHT CUDA v4 in one directory and in the Linking properties I was using the CUDA PATH, so i was loaded the cudart 32 making confict with the v4

thanks again

Hi, can you give more details on how you solved this problem? I am having the same issue. What is the PATH CUDA v4?

Thanks!

Your CUDA PATH is your install path for your CUDA files. So if you look at your system variables, you should have one called CUDA_PATH, one called CUDA_PATH_V3_2, and one called CUDA_PATH_V4_0, where the first two point to CUDA 3.2-related items, and the 3rd one points to CUDA 4.0 items. For example, on my system my CUDA_PATH_V4_0 variable is C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0. Assuming you’re on windows 7, you can check out your environment variables by: open start menu–>right click on computer–>properties–>Advanced system setting–>environment variables.

In this case, if you’re using CUDA 4.0 functionality but linking to CUDA 3.2 libraries, your code may have issues (if you’re using something that’s 4.0 specific)