Hello,
I am a student who just started learning to use gpu parallel programming for a project on my own. Currently using cuda 8.0. I have encountered an illegal memory access error when running my program for image processing. No problem with compilation.
I am hoping for some advice on cudaMalloc, and if i did it correctly.
In this code, i am attempting to get a pointer to the data stored in the type Eigen ArrayXXf color, then passed over to another function to do mathematical manipulation on gpu in parallel and finally store it back to color ArrayXXf.
I believe the issues lies with my allocation of pointer from host to device and if the pointer at the device could link back. But i cant figure out where i went wrong.
The problem should lie on line 50 whenever I attempt to write to dev_color. Reading from dev_color however does not result in any illegal memory error and i am able to obtain 0.5 as initialised
the two functions, cudaSafeCall and cudaCheckError, are obtained from https://gist.github.com/ashwin/2652488 as a method to check if the kernel function runs properly.
inline void __cudaSafeCall( cudaError err, const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
if ( cudaSuccess != err )
{
fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
inline void __cudaCheckError( const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if ( cudaSuccess != err )
{
fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
err = cudaDeviceSynchronize();
if( cudaSuccess != err )
{
fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
__global__ void kernel(int dev_size, int width, int height, float* dev_color)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i < dev_size) //check that kernal function call is within loop size
{
...//other parts of the code
if((0<=x && x<width) && (0<=y && y<height))
{
dev_color[x*width + y] = 12.3; //crash here
}
}
}
void test_cu(size_t size, int width, int height, float *c_ptr)
{
size_t free, total;
int dev_size = (int) size; //size = 307200
printf("height= %d, width=%d", height, width); //h=360, w=480
float *dev_color;
int mem_size = sizeof(float) *width *height; //691200
CudaSafeCall(cudaMalloc( (void**) &dev_color, mem_size));
cudaMemcpy(dev_color, c_ptr, mem_size, cudaMemcpyHostToDevice);
cudaMemGetInfo(&free,&total);
printf("\n1) %d KB free of total %d KB\n",free/1024,total/1024); //checking memory allocation
int ThreadsPerBlk = 1024;
float blk_div = (float) dev_size/1024;
int NumBlk = (int) ceil(blk_div);
kernel<<<NumBlk, ThreadsPerBlk>>>(dev_size, width, height, dev_color); //run this kernel function on gpu
CudaCheckError(); //cuda sync error
cudaDeviceSynchronize();
cudaMemcpy(c_ptr, dev_color, mem_size, cudaMemcpyDeviceToHost);
cudaFree(dev_color);
}
void main()
{
...//other parts of codes
color = ArrayXXf::Constant(width, height, 0.5); //width=480, height=360
float *c_ptr = color.data(); //obtain pointer to arrayXXf
test_cu(size, width, height, c_ptr);
//remap the float pointer back to the ArrayXXf
Map<ArrayXXf>(c_ptr, width, height) = color;
...//other parts of codes
}
the error is shown below:
cudaCheckError() with sync failed at /path/test_cu.cu:339 : an illegal memory access was encountered
the line ^339 refers to the function CudaCheckError(); on line 78 of the part of the code shown above.
I have done some research/googling that illegal memory access could be due to allocating too much memory or if the array is out of bound? Thus, perhaps my allocation of memory space or pointer is performed wrongly.
Any advice/help is appreciated.
Thank you,
Samuel