Problem with kernel call

Hi,

I want to work with images in a kernel without changing their format. Currently I am facing the following problem:

main.cpp

//forward declaration of functions in kernels.cu
void do_test(const cv::gpu::PtrStepSz<float>& src, cv::gpu::PtrStepSz<float>& dst);

main(){
// img_in and img_out are both with CV_32FC1 format
do_test(img_in, img_out);
}

kernels.cu

__global__ void do_test_kernel(cv::gpu::PtrStepSz<float> src, cv::gpu::PtrStepSz<float> dst)
{
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if(x < dst.cols && y < dst.rows) {
       dst.ptr(y)[x] = src.ptr(y)[x];
    }
}

void do_test(const cv::gpu::PtrStepSz<float>& src, cv::gpu::PtrStepSz<float>& dst)
{
    dim3 block(32,8);
    dim3 grid(dst.cols/block.x, dst.rows/block.y);

    do_test_kernel<<<grid,block>>>(src,dst);
}

When compiling, the error at the forward declaration is:
in passing argument 2 of ‘void do_test(const cv::gpu::PtrStepSz&, cv::gpu::PtrStepSz&)’
and at the function call in the main() function the error is:
invalid initialization of reference of type ‘cv::gpu::PtrStepSz&’ from expression of type ‘cv::gpu::GpuMat’

What am I doing wrong? Thanks for your answers.

Greetings
ManuKlause

Hi ManuKlause ,

Could you try

do_test((cv::gpu::PtrStepSz<float>)img_in,(cv::gpu::PtrStepSz<float>) img_out);

I tried this typecast in my main() function, but it failed with

error: in passing argument 2 of ‘void do_test(const cv::gpu::PtrStepSz&, cv::gpu::PtrStepSz&)’
void do_test(const cv::gpu::PtrStepSz& img_in, cv::gpu::PtrStepSz& img_out);
at line 4

and with
error: invalid initialization of non-const reference of type ‘cv::gpu::PtrStepSz&’ from an rvalue of type ‘cv::gpu::PtrStepSz’
do_test((cv::gpu::PtrStepSz)image_f_left_in,(cv::gpu::PtrStepSz) image_f_left_out);
at line 8

Removing all reference symbols didn’t help either. It failes with an undefined reference at line 4.

Why not assign the img_out to a PtrStepSz first and then pass that PtrStepSz?
It should work.

I assigned img_in and img_out to PtrStepSz, which helped first.

PtrStepSz<float> input_image;
PtrStepSz<float> output_image;

input_image = img_in; // img_in has type CV_32FC1

do_test(input_image, output_image);

img_out = cv::gpu:.GpuMat(output_image.rows, output_image.cols, CV_32FC1, output_image.data, output_image.step)

However, input_image has the rows, cols and step of the GpuMat img_in, but out_image has 0 rows, 0 cols and 0 step, so does img_out. I can’t debug into

do_test(input_image, output_image);

, the debugger of Qt skips this line.
In my kernel, I just copy the input to the output.

Please try following function declaration and passed GpuMat to it.

void do_test(const cv::gpu::PtrStepSz<float>& src, cv::gpu::PtrStepSz<float> dst)

It works. Thank you a lot!

The program is now as follows

main.cpp

//forward declaration of functions in kernels.cu
void do_test(const cv::gpu::PtrStepSz<float>& src, cv::gpu::PtrStepSz<float> dst);

cv::gpu::GpuMat src(newHeight, newWidth, CV_32FC1);
cv::gpu::GpuMat dst(newHeight, newWidth, CV_32FC1);

do_test(src, dst);


kernels.cu

__global__ void do_test_kernel(cv::gpu::PtrStepSz<float> src, cv::gpu::PtrStepSz<float> dst)
{
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if(x < src.cols && y < src.rows) {
       dst.ptr(y)[x] = src.ptr(y)[x];
    }
}

void do_test(const cv::gpu::PtrStepSz<float>& src, cv::gpu::PtrStepSz<float> dst)
{
    dim3 block(32,8);
    dim3 grid(dst.cols/block.x, dst.rows/block.y);

    do_test_kernel<<<grid,block>>>(src,dst);

    cudaDeviceSynchronize();
}