I just started a week ago to program on C++2008 to CUDA
so i tried a simple code
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cutil.h>
#include <cutil_inline.h>
#include <windows.h>
#pragma comment(lib, "winmm.lib")
int* a_h;
int* d_A;
void CleanupResources(void);
/*
Function checkCUDAError used to print any error messages that
result from a CUDA API related call. Uses cudaGetLastError to
find the last error that has occured.
This function will need to be called after every CUDA API function
call to accuratly get error messages.
*/
void checkCUDAError(const char *msg) {
cudaError_t err = cudaGetLastError(); // get the last error that has
// been produced by any of the
// runtime API calls
if( cudaSuccess != err) // cudaSuccess is an enum indicating no errors.
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err));
// cudaGetErrorString gives the message string from an error code.
system("pause");
exit(EXIT_FAILURE);
}
}
//Codigo GPU
__global__ void VecAdd(const int* A, const int* B, int* C, int N)
{
int i = threadIdx.x;
C[i]=A[i]+B[i];
}
//Codigo CPU
int main(int argc, char **argv)
{
CUresult result;
CUdevice device;
CUcontext context;
cuInit(0);
cuDeviceGet(&device, 0);
printf("GPU Device: %10d\n", device);
cuCtxCreate(&context, 0, device);
unsigned int free, total;
result = cuMemGetInfo(&free, &total);
cuCtxDetach(context);
printf("GPU Memory status: %10d %10d\n", free, total);
system("pause");
int N = 100;
size_t size = N * sizeof(int);
//Alojamiento de los Vectores de Entrada en la Memoria
a_h = (int*)malloc(size);
//inicializacion de los Vectores
for (int i=0; i<N; i++) a_h[i] = i;
//cudaSetDevice(cutGetMaxGflopsDeviceId());
cudaSetDeviceFlags(cudaDeviceBlockingSync);
//cudaSetDevice( cutGetMaxGflopsDeviceId() );
//alojamiento de los vectores en el GPU
(cudaMalloc((void**)&a_h,size));
checkCUDAError("malloc");
//Copiar vectores del Host a la Memoria del GPU
(cudaMemcpy(d_A, a_h, size, cudaMemcpyHostToDevice));
checkCUDAError("memcpy");
//liberar la Memoria del GPU
CleanupResources();
//PlaySound("01-Diver.wav", NULL, SND_FILENAME|SND_SENTRY|SND_ASYNC);
system("pause");
}
void CleanupResources(void)
{
// Free device memory
cudaFree(d_A);
// Free host memory
free(a_h);
}
the problem is when i compiled it it give me the error of device kernel image is invalid, and the C++ give me error of CUDAerror on memory…
and when i tried to use the cutil instruction like cutilsafe…, the program crashes, or generate a interrupt… but all cutil and cutil_inline instructions crashes the program.
I just started a week ago to program on C++2008 to CUDA
so i tried a simple code
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cutil.h>
#include <cutil_inline.h>
#include <windows.h>
#pragma comment(lib, "winmm.lib")
int* a_h;
int* d_A;
void CleanupResources(void);
/*
Function checkCUDAError used to print any error messages that
result from a CUDA API related call. Uses cudaGetLastError to
find the last error that has occured.
This function will need to be called after every CUDA API function
call to accuratly get error messages.
*/
void checkCUDAError(const char *msg) {
cudaError_t err = cudaGetLastError(); // get the last error that has
// been produced by any of the
// runtime API calls
if( cudaSuccess != err) // cudaSuccess is an enum indicating no errors.
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err));
// cudaGetErrorString gives the message string from an error code.
system("pause");
exit(EXIT_FAILURE);
}
}
//Codigo GPU
__global__ void VecAdd(const int* A, const int* B, int* C, int N)
{
int i = threadIdx.x;
C[i]=A[i]+B[i];
}
//Codigo CPU
int main(int argc, char **argv)
{
CUresult result;
CUdevice device;
CUcontext context;
cuInit(0);
cuDeviceGet(&device, 0);
printf("GPU Device: %10d\n", device);
cuCtxCreate(&context, 0, device);
unsigned int free, total;
result = cuMemGetInfo(&free, &total);
cuCtxDetach(context);
printf("GPU Memory status: %10d %10d\n", free, total);
system("pause");
int N = 100;
size_t size = N * sizeof(int);
//Alojamiento de los Vectores de Entrada en la Memoria
a_h = (int*)malloc(size);
//inicializacion de los Vectores
for (int i=0; i<N; i++) a_h[i] = i;
//cudaSetDevice(cutGetMaxGflopsDeviceId());
cudaSetDeviceFlags(cudaDeviceBlockingSync);
//cudaSetDevice( cutGetMaxGflopsDeviceId() );
//alojamiento de los vectores en el GPU
(cudaMalloc((void**)&a_h,size));
checkCUDAError("malloc");
//Copiar vectores del Host a la Memoria del GPU
(cudaMemcpy(d_A, a_h, size, cudaMemcpyHostToDevice));
checkCUDAError("memcpy");
//liberar la Memoria del GPU
CleanupResources();
//PlaySound("01-Diver.wav", NULL, SND_FILENAME|SND_SENTRY|SND_ASYNC);
system("pause");
}
void CleanupResources(void)
{
// Free device memory
cudaFree(d_A);
// Free host memory
free(a_h);
}
the problem is when i compiled it it give me the error of device kernel image is invalid, and the C++ give me error of CUDAerror on memory…
and when i tried to use the cutil instruction like cutilsafe…, the program crashes, or generate a interrupt… but all cutil and cutil_inline instructions crashes the program.
→ Moreover the cudaMalloc() (a_h is a host memory pointer and d_A is a device memory pointer to pass to kernel) problem, i think that you can’t detach your context, that line is incorrect.
→ Moreover the cudaMalloc() (a_h is a host memory pointer and d_A is a device memory pointer to pass to kernel) problem, i think that you can’t detach your context, that line is incorrect.
I didnt notice that i was using the PAHT CUDA v4 in one directory and in the Linking properties I was using the CUDA PATH, so i was loaded the cudart 32 making confict with the v4
I didnt notice that i was using the PAHT CUDA v4 in one directory and in the Linking properties I was using the CUDA PATH, so i was loaded the cudart 32 making confict with the v4
Your CUDA PATH is your install path for your CUDA files. So if you look at your system variables, you should have one called CUDA_PATH, one called CUDA_PATH_V3_2, and one called CUDA_PATH_V4_0, where the first two point to CUDA 3.2-related items, and the 3rd one points to CUDA 4.0 items. For example, on my system my CUDA_PATH_V4_0 variable is C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0. Assuming you’re on windows 7, you can check out your environment variables by: open start menu–>right click on computer–>properties–>Advanced system setting–>environment variables.
In this case, if you’re using CUDA 4.0 functionality but linking to CUDA 3.2 libraries, your code may have issues (if you’re using something that’s 4.0 specific)