Real-time CLAHE processing of video, framerate issue. Gstreamer + nvivafilter + OpenCV

I think I got it… Seems there is 256 bytes stride. This should be better (so far, not tested so much):

static std::vector<cv::cuda::GpuMat> uv(2);
static void cv_process_NV12(void** pPitch, int32_t width, int32_t height) {

    //printf ("cv_process_NV12  %d x %d\n", width, height);

    const int stride = 256;
    int num_strides = ((int)width)/stride;
    int use_width = num_strides*stride;
    if (use_width < (int)width)
	   use_width += stride;
    int use_height = height;

 
    cv::cuda::GpuMat d_Mat_Y(use_height, use_width, CV_8UC1, pPitch[0]);

    // U and V are interleaved
    cv::cuda::GpuMat d_Mat_UV(use_height/2, use_width/2, CV_8UC2, pPitch[1]);
    cv::cuda::split(d_Mat_UV, uv);
    cv::cuda::GpuMat d_Mat_Cb = uv[0];
    cv::cuda::GpuMat d_Mat_Cr = uv[1];

    // Some process... here just setting a kind of blue
    cv::Rect Yroi(0, 0, 100, 100);
    cv::Rect UVroi(0, 0, 50, 50);  

    d_Mat_Y(Yroi).setTo(100);
    d_Mat_Cr(UVroi).setTo(0);
    d_Mat_Cb(UVroi).setTo(255);

    // reinterleave U&V
    cv::cuda::merge(uv, d_Mat_UV);

 
    // Final check
    if (d_Mat_Y.data != (uchar*) pPitch[0])
	   std::cerr << "Error: reallocated buffer for d_Mat_Y" << std::endl;
    if (d_Mat_UV.data != (uchar*) pPitch[1])
	   std::cerr << "Error: reallocated buffer for d_Mat_UV" << std::endl;
}

Be aware that there may be a (black?) border on right side of image because of stride padding.
If this fools your filter, you may get your original Y mat with:

cv::Rect Yroi(0, 0, width-1, height-1);
d_Mat_Original_Y=d_Mat_Y(Yroi);
// Apply your filter on original now