Jetson MultiMedia API to cv::cuda::GpuMat


I have a Xavier NX with four IMX335C cameras set up with master-slave synchronization. I have adapted the syncSensor-example to allow for software synchronization of all four cameras, but I am stuck trying to convert the images into a opencv cv::cuda::GpuMat format.

I have added a function in ScopedCudaEGLStreamFrameAcquire where I plan to do the conversion;

void ScopedCudaEGLStreamFrameAcquire::getGpUMat(cv::cuda::GpuMat &BGRMat)
    /* Input format:
        cudaEglColorFormatYUV420SemiPlanar_ER = 38
        Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, 
        U/V width = 1/2 Y width, U/V height = 1/2 Y height.

    cv::cuda::GpuMat YUV420Mat = cv::cuda::GpuMat(m_frame.height, m_frame.width, CV_8UC2, m_frame.frame.pPitch[0]);
    BGRMat.create(m_frame.height, m_frame.width, CV_8UC3);
    //cv::cuda::cvtColor(YUV420Mat, BGRMat, CV_RGBA2BGR); /* Does not support YUV420 -> BGR remapping */

My problem here is that the CUeglFrame is YUV420 and there is no opencv function to map this format into BGR (standard cv::mat format) using cuda. As there is a lot of data (4x 2616x1964px @ ~20FPS) I want to avoid a lot of copying, and I am looking for a way of keeping the remapping on GPU. I have previously loaded the images via gstreamer using the cv::VideoReader and uploaded them to GPU manually, but this doesen’t allow sw synchronization, is slow and takes up a lot of the CPU. Is there a way to convert the CUeglFrame into another format where it can be mapped to a cv::cuda::GpuMat?

I have also tested the patch in LibArgus EGLStream to nvivafilter - #12 by DaneLLL using NvBuffer (ABGR32) → EGImageKHR → CUeglFrame → cv::cuda::GpuMat (BGR) but my output is warped. I have attached an image showing the output and the relevant code;

bool ConsumerThread::threadExecute()
    IEGLOutputStream *iEglOutputStream = interface_cast<IEGLOutputStream>(m_stream);
    IFrameConsumer *iFrameConsumer = interface_cast<IFrameConsumer>(m_consumer);

    CONSUMER_PRINT("Waiting until producer is connected...\n");
    if (iEglOutputStream->waitUntilConnected() != STATUS_OK)
        ORIGINATE_ERROR("Stream failed to connect.");
    CONSUMER_PRINT("Producer has connected; continuing.\n");

    while (true)
        UniqueObj<Frame> frame(iFrameConsumer->acquireFrame());
        IFrame *iFrame = interface_cast<IFrame>(frame);

        if (!iFrame)

        /* Get the IImageNativeBuffer extension interface */
        NV::IImageNativeBuffer *iNativeBuffer = interface_cast<NV::IImageNativeBuffer>(iFrame->getImage());

        if (!iNativeBuffer)
            ORIGINATE_ERROR("IImageNativeBuffer not supported by Image.");

        if (m_dmabuf == -1)
            m_dmabuf = iNativeBuffer->createNvBuffer(iEglOutputStream->getResolution(),
            if (m_dmabuf == -1)
                CONSUMER_PRINT("\tFailed to create NvBuffer\n");
        else if (iNativeBuffer->copyToNvBuffer(m_dmabuf) != STATUS_OK)
            ORIGINATE_ERROR("Failed to copy frame to NvBuffer.");

        CONSUMER_PRINT("Frame nr. %4i with stamp: %4.4f\n", static_cast<int>(iFrame->getNumber()), iFrame->getTime() / 1e9);

        EGLImageKHR egl_image = NULL;
        egl_image = NvEGLImageFromFd(NULL, m_dmabuf);
        cv::cuda::GpuMat bgrCudaMat;
        HandleEGLImage(&egl_image, bgrCudaMat);
        NvDestroyEGLImage(NULL, egl_image);

        if (static_cast<int>(iFrame->getNumber() == 20))
            cv::Mat cpuMat;
            cv::imwrite("~/Pictures/argus_output.jpg", cpuMat);
static void cv_process(void *pdata, int32_t width, int32_t height, cv::cuda::GpuMat &bgrMat)
    CONSUMER_PRINT("CV_PROCESS Height: %4i, Width: %4i\n", height, width);
    cv::cuda::GpuMat abgrMat(height, width, CV_8UC4, pdata);
    cv::cuda::cvtColor(abgrMat, bgrMat, cv::COLOR_BGRA2BGR);

static void Handle_EGLImage(EGLImageKHR image, cv::cuda::GpuMat &bgrMat)
    CUresult status;
    CUeglFrame eglFrame;
    CUgraphicsResource pResource = NULL;

    status = cuGraphicsEGLRegisterImage(&pResource, image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
    if (status != CUDA_SUCCESS)
        printf("cuGraphicsEGLRegisterImage failed: %d, cuda process stop\n", status);

    status = cuGraphicsResourceGetMappedEglFrame(&eglFrame, pResource, 0, 0);

    if (status != CUDA_SUCCESS)
        printf("cuGraphicsSubResourceGetMappedArray failed\n");

    status = cuCtxSynchronize();

    if (status != CUDA_SUCCESS)
        printf("cuCtxSynchronize failed\n");

    if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH)
        cv_process(eglFrame.frame.pPitch[0], eglFrame.width, eglFrame.height, bgrMat);

    status = cuCtxSynchronize();

    if (status != CUDA_SUCCESS)
        printf("cuCtxSynchronize failed after memcpy\n");

    status = cuGraphicsUnregisterResource(pResource);

    if (status != CUDA_SUCCESS)
        printf("cuGraphicsEGLUnRegisterResource failed: %d\n", status);

void HandleEGLImage(void *pEGLImage, cv::cuda::GpuMat &bgrMat)
    EGLImageKHR *pImage = (EGLImageKHR *)pEGLImage;
    Handle_EGLImage(*pImage, bgrMat);

I am new with jetson and cuda, if someone could point me in the correct direction or even tell me if either is doable without having to deep dive into cuda programming it would be much appreciated!

Thank you!