I have a problem with a cudaMemcpy, it returns cudaErrorInvalidValue but I don’t understand why.
I have a main program which call a DLL where cuda functions are. I feed a gpuMat class with data from a sliding windows.
gpuMat is an header on a device pointer, just used to store some informations (number of cols, rows,… of the image, http://docs.opencv.org/modules/gpu/doc/data_structures.html#gpu-gpumat)
My code :
Main :
int const height = 10008;
int const width = 7092;
size_t size = sizeof(uchar) * height * width + sizeof(uchar)*height * 2;
img_windowsed *host_data1;
uchar *dev_data1;
host_data1 = (img_windowsed*)malloc(sizeof(img_windowsed));
host_data1->start = (uchar*)malloc(size);
host_data1->cursor = host_data1->start;
host_data1->end = host_data1->start + size;
for (int j = 0; j < size; j++)
host_data1->start[j] = j % 256;
if (plockmem(host_data1->start, size) == 0 )
{
dev_data1 = (uchar*)cuda_image(host_data1->cursor, size);
//it works perfectly, returns a new pointer
if (dev_data1 != NULL )
{ //create a GpuMat with 1 row, and width cols from a device_pointer, OK
GpuMat gtest1 = create_gray_gpumat_from_data(1, width, dev_data1, 2);
#define NUMBER_OF_LINES 1400
for (int i = 0; i < height; i += NUMBER_OF_LINES)
{ //error if gtest1.rows + NUMBER_OF_LINES goes over about 3000
if (update_gpumat_stream(host_data1, gtest1, NUMBER_OF_LINES) != 0)
break;
}
Functions :
int update_gpumat_stream(img_windowsed * data_to_add, GpuMat &img, int rows_added){
uchar * ptr;
if (data_to_add->cursor + rows_added*img.step > data_to_add->end)
return -2;
ptr = img.data + (img.rows - 1)*img.step + (img.cols - 1)*img.step;
if (upload_rows(ptr, data_to_add->cursor, (size_t)rows_added*img.step) == 0)
{
img.rows += rows_added;
data_to_add->cursor += rows_added*img.step;
if (data_to_add->cursor >= data_to_add->end)
data_to_add->cursor = data_to_add->start;
return 0;
}
else
return -1;
}
int upload_rows(uchar * dst, uchar * data, size_t size){
return cudaMemcpy((void*)dst, (void*)data, size, cudaMemcpyHostToDevice);
}
void * cuda_image(uchar * data, size_t size) {
uchar *ptr;
cudaMalloc(&ptr, size); //Allocate GPU space
if (upload_rows(ptr, data, size) == cudaSuccess)
return ptr;
else
return NULL;
}
int plockmem(void * ptr, size_t size){
int err=cudaHostRegister(ptr, size, cudaHostRegisterDefault);
if (err == cudaSuccess)
return 0;
else
return 1;
}
I don’t understand because, in my function cuda_image I use without errors cudaMemcpy and if I define NUMBER_OF_LINES to height I have cudaErrorInvalidValue at the call from line 27 in the main.
Thanks
EDIT : My error seems to come from update_gpumat_stream, on line : ptr = img.data + (img.rows - 1)*img.step + (img.cols - 1)*img.step;
If I change it to ptr = img.data I can send 10000 rows without errors.