Proper way to convert an EGLFrame to cv::Mat?

Hi, I am trying to convert an EGLFrame (with the CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR color format) to an RGB cv::Mat inside my custom shared library for the nvivafilter. Here is the code:

/**
  * Performs CUDA Operations on egl image.
  *
  * @param image : EGL image
  */
static void gpu_process (EGLImageKHR image, void ** usrptr){
    CUresult status;
    CUeglFrame eglFrame;
    CUgraphicsResource pResource = NULL;

    cudaFree(0);
    status = cuGraphicsEGLRegisterImage(&pResource, image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
    if (status != CUDA_SUCCESS) {
        printf("cuGraphicsEGLRegisterImage failed : %d \n", status);
        return;
    }

    status = cuGraphicsResourceGetMappedEglFrame( &eglFrame, pResource, 0, 0);
    if (status != CUDA_SUCCESS) {
        printf ("cuGraphicsSubResourceGetMappedArray failed\n");
    }

    status = cuCtxSynchronize();
    if (status != CUDA_SUCCESS) {
        printf ("cuCtxSynchronize failed \n");
    }

    cv::Mat yuvImg(eglFrame.height*3/2, eglFrame.width, CV_32SC1);
    // copy the luma plane into the memory chunk with size of eglFrame.width * eglFrame.height 
    cudaMemcpy2D(yuvImg.data, eglFrame.width, eglFrame.frame.pPitch[0], eglFrame.pitch, eglFrame.width, eglFrame.height,                 
         cudaMemcpyDeviceToHost);
    // copy the interleaved chroma plane into the memory chunk with size of eglFrame.width * eglFrame.height / 2 and the offset of eglFrame.height * eglFrame.width
    cudaMemcpy2D(yuvImg.data + eglFrame.height * eglFrame.width, eglFrame.width, eglFrame.frame.pPitch[1], eglFrame.pitch,     
         eglFrame.width, eglFrame.height/2, cudaMemcpyDeviceToHost);
    cv::Mat rgbImg;
    yuvImg.convertTo(yuvImg, CV_8UC1);
    cv::cvtColor(yuvImg, rgbImg, cv::COLOR_YUV2BGR_NV21);

    cv::imwrite("./rgb_test_out.jpg", rgbImg);
}

However, after the conversion I get the wrong RGB image (the output image is duplicated 8 times horizontally with chroma and luma in their original positions; please see the attached screenshot). Please, could you tell me what is a proper way to do it? Is there anything that I am missing?

Update: resolved it by using the CV_8UC1 type instead of CV_32SC1 for yuvImg. Does anyone know if two pPitch planes (chroma and luma) guaranteed to be contiguous in memory? Asking to see if I can improve performance by eliminating one cudaMemcpy2D.

Hi,
Please run

... ! nvivafilter ! 'video/x-raw(memory:NVMM),format=RGBA' ! ...

and add cv::Mat in either pre_process() or post_process().
cv::Mat is a CPU buffer and you should map it to a CPU pointer. Also for NV12, it may not be mapped correctly if pitch is not equal to with. Suggest you use RGBA.