cudaMemcpy fails

I’m getting error code 11: invalid argument when trying to copy from the host to the device.

The following is the code:

#include <stdlib.h>

#include <stdio.h>

#include <cuda.h>

// Image libraries

#include <IL/il.h>

#include <IL/ilu.h>

#include <IL/ilut.h>

// global variables for devIL

int imageWidth;

int imageHeight;

//-----------------------------------------------------------

// global variables for CUDA code

unsigned char* devImageData;

unsigned char* devIntegralImage;

//-----------------------------------------------------------

// global variable for other usages

int errorCode = 0;

//-----------------------------------------------------------

void checkErrorMessage(int errorCode)

{

	switch (errorCode){

		case 0:

			printf("Error Code: %d\n", errorCode);

			printf("CUDA Error Code: %s\n", cudaGetErrorString(cudaGetLastError()));

			printf("GPU resource allocated successfully.\n");

			break;

		case 1:

			printf("Error Code: %d\n", errorCode);

			printf("CUDA Error Code: %s\n", cudaGetErrorString(cudaGetLastError()));

			printf("Cannot allocate for intensity image on the gpu.\n");

			exit(1);

			break;

		case 2:

			printf("Error Code: %d\n", errorCode); 

			printf("CUDA Error Code: %s\n", cudaGetErrorString(cudaGetLastError()));

			printf("Cannot allocate for integral image on the gpu.\n");

			exit(1);

			break;

		case 3:

			printf("Error Code: %d\n", errorCode);

			printf("CUDA Error Code: %s\n", cudaGetErrorString(cudaGetLastError()));

			printf("Cannot copy data from CPU to GPU.\n");

			exit(1);

			break;

	};

}

// return error code:

// 1: failed to allocate for devImageData

// 2: failed to allocate for devIntegraImage

int InitAllocateGPUResources(unsigned char* devimageData, unsigned char* devintegralImage, int height, int width)

{

	if (cudaMalloc((void**)&devimageData, sizeof(unsigned char)*width*height) != cudaSuccess)

	{

		return 1;

	}

	

	if (cudaMalloc((void**)&devintegralImage, sizeof(unsigned char)*width*height) != cudaSuccess)

	{

		return 2;

	}

	

	return 0;

}

void FreeGPUResources(unsigned char* devimageData, unsigned char* devintegralImage)

{

	if (devimageData)

	{

		cudaFree(devimageData);

	}

	

	if (devintegralImage)

	{

		cudaFree(devintegralImage);

	}

}

// return erro code:

// 3: failed to copy from CPU to GPU

int copyData_CPUToGPU(unsigned char* imagedata, unsigned char* devimageData, int height, int width)

{

	if (cudaMemcpy(devimageData, imagedata, width*height, cudaMemcpyHostToDevice) != cudaSuccess)

	{

		return 3;

	}

	

	return 0;

}

//-----------------------------------------------------------

int main(int argc, char** argv)

{

	ILuint	ImgId;

	// We use the filename specified in the first argument of the command-line.

	if (argc < 2) {

		fprintf(stderr, "Usage : %s <file> [output]\n", argv[0]);

		return 1;

	}

	// Check if the shared lib's version matches the executable's version.

	if (ilGetInteger(IL_VERSION_NUM) < IL_VERSION) {

		printf("DevIL version is different...exiting!\n");

		return 2;

	}

	// Initialize DevIL.

	ilInit();

#ifdef ILU_ENABLED

	iluInit();

#endif 

	// Generate the main image name to use.

	ilGenImages(1, &ImgId);

	// Bind this image name.

	ilBindImage(ImgId);

	// Loads the image specified by File into the image named by ImgId.

	if (!ilLoadImage(argv[1])) {

		printf("Could not open file...exiting.\n");

		exit(1);

	}

	// Convert to intensity map

	ilConvertImage(IL_LUMINANCE, IL_UNSIGNED_BYTE);

	// each pixel is represented by 1 byte

	ILubyte* imageBytes = ilGetData();

	imageWidth = ilGetInteger(IL_IMAGE_WIDTH);

	imageHeight = ilGetInteger(IL_IMAGE_HEIGHT);

	// set up GPU resources

	errorCode = InitAllocateGPUResources(devImageData, devIntegralImage, imageHeight, imageWidth);

	checkErrorMessage(errorCode);

	

	unsigned char* imageData = (unsigned char*)malloc(imageWidth*imageHeight*sizeof(unsigned char));

	int i;

	for (i = 0; i < imageWidth*imageHeight; i++)

	{

		imageData[i] = (unsigned char)imageBytes[i];

	}

	// Copy from cpu to gpu

	errorCode = copyData_CPUToGPU(imageData, devImageData, imageHeight, imageWidth);

	checkErrorMessage(errorCode);

	//GenIntegralImage();

	

	// We're done with the image, so let's delete it.

	ilDeleteImages(1, &ImgId);

	free(imageData);

	FreeGPUResources(devImageData, devIntegralImage);

	

	return 0;

}

The output is:

Error Code: 0

CUDA Error Code: no error

GPU resource allocated successfully.

Error Code: 3

CUDA Error Code: invalid argument

Cannot copy data from CPU to GPU.

I wrote a test program (as shown below, and cudaMemcpy returned cudaSuccess):

#include <cuda.h>

#include <stdio.h>

#include <stdlib.h>

float* device;

int main(int argc, char* argv[])

{

	float* host = (float*)malloc(sizeof(float)*10);

	if (cudaMalloc((void**)&device, 10*sizeof(float)) != cudaSuccess)

	{

		printf("Failed to allocate device memory.\n");

		exit(1);

	}

	if (cudaMemcpy(device, host, sizeof(float)*10, cudaMemcpyHostToDevice)!=cudaSuccess)

	{

		printf("Failed to copy from host to device.\n");

		exit(1);

	}

	return 0;

}

The system configuration is: Ubuntu 11.04 with CUDA 4.2; and I’m using the devIL library for image operations.

Any idea why the cudaMemcpy was not working in the program?

Thank you!

Print what imageWidth and imageHeight is.

if (cudaMemcpy(devimageData, imagedata, width*height, cudaMemcpyHostToDevice) != cudaSuccess)

this should be

if (cudaMemcpy(devimageData, imagedata, widthheightsizeof(unsigned char), cudaMemcpyHostToDevice) != cudaSuccess)

OK now I see the problem. You have to define your pointers as references, otherwise you will only change copies of the pointers in the allocation functions and your global pointers will still be null pointers.

int InitAllocateGPUResources(unsigned char*& devimageData, unsigned char*& devintegralImage, int height, int width)
{
if (cudaMalloc((void**)&devimageData, sizeof(unsigned char)widthheight) != cudaSuccess)
{
return 1;
}

    if (cudaMalloc((void**)&devintegralImage, sizeof(unsigned char)*width*height) != cudaSuccess)
    {
            return 2;
    }
    
    return 0;

}

It works now, thank you very much!