I think I got it… Seems there is 256 bytes stride. This should be better (so far, not tested so much):
static std::vector<cv::cuda::GpuMat> uv(2);
static void cv_process_NV12(void** pPitch, int32_t width, int32_t height) {
//printf ("cv_process_NV12 %d x %d\n", width, height);
const int stride = 256;
int num_strides = ((int)width)/stride;
int use_width = num_strides*stride;
if (use_width < (int)width)
use_width += stride;
int use_height = height;
cv::cuda::GpuMat d_Mat_Y(use_height, use_width, CV_8UC1, pPitch[0]);
// U and V are interleaved
cv::cuda::GpuMat d_Mat_UV(use_height/2, use_width/2, CV_8UC2, pPitch[1]);
cv::cuda::split(d_Mat_UV, uv);
cv::cuda::GpuMat d_Mat_Cb = uv[0];
cv::cuda::GpuMat d_Mat_Cr = uv[1];
// Some process... here just setting a kind of blue
cv::Rect Yroi(0, 0, 100, 100);
cv::Rect UVroi(0, 0, 50, 50);
d_Mat_Y(Yroi).setTo(100);
d_Mat_Cr(UVroi).setTo(0);
d_Mat_Cb(UVroi).setTo(255);
// reinterleave U&V
cv::cuda::merge(uv, d_Mat_UV);
// Final check
if (d_Mat_Y.data != (uchar*) pPitch[0])
std::cerr << "Error: reallocated buffer for d_Mat_Y" << std::endl;
if (d_Mat_UV.data != (uchar*) pPitch[1])
std::cerr << "Error: reallocated buffer for d_Mat_UV" << std::endl;
}
Be aware that there may be a (black?) border on right side of image because of stride padding.
If this fools your filter, you may get your original Y mat with:
cv::Rect Yroi(0, 0, width-1, height-1);
d_Mat_Original_Y=d_Mat_Y(Yroi);
// Apply your filter on original now