Hi, I am trying to convert an EGLFrame (with the CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR color format) to an RGB cv::Mat inside my custom shared library for the nvivafilter. Here is the code:
/**
* Performs CUDA Operations on egl image.
*
* @param image : EGL image
*/
static void gpu_process (EGLImageKHR image, void ** usrptr){
CUresult status;
CUeglFrame eglFrame;
CUgraphicsResource pResource = NULL;
cudaFree(0);
status = cuGraphicsEGLRegisterImage(&pResource, image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
if (status != CUDA_SUCCESS) {
printf("cuGraphicsEGLRegisterImage failed : %d \n", status);
return;
}
status = cuGraphicsResourceGetMappedEglFrame( &eglFrame, pResource, 0, 0);
if (status != CUDA_SUCCESS) {
printf ("cuGraphicsSubResourceGetMappedArray failed\n");
}
status = cuCtxSynchronize();
if (status != CUDA_SUCCESS) {
printf ("cuCtxSynchronize failed \n");
}
cv::Mat yuvImg(eglFrame.height*3/2, eglFrame.width, CV_32SC1);
// copy the luma plane into the memory chunk with size of eglFrame.width * eglFrame.height
cudaMemcpy2D(yuvImg.data, eglFrame.width, eglFrame.frame.pPitch[0], eglFrame.pitch, eglFrame.width, eglFrame.height,
cudaMemcpyDeviceToHost);
// copy the interleaved chroma plane into the memory chunk with size of eglFrame.width * eglFrame.height / 2 and the offset of eglFrame.height * eglFrame.width
cudaMemcpy2D(yuvImg.data + eglFrame.height * eglFrame.width, eglFrame.width, eglFrame.frame.pPitch[1], eglFrame.pitch,
eglFrame.width, eglFrame.height/2, cudaMemcpyDeviceToHost);
cv::Mat rgbImg;
yuvImg.convertTo(yuvImg, CV_8UC1);
cv::cvtColor(yuvImg, rgbImg, cv::COLOR_YUV2BGR_NV21);
cv::imwrite("./rgb_test_out.jpg", rgbImg);
}
However, after the conversion I get the wrong RGB image (the output image is duplicated 8 times horizontally with chroma and luma in their original positions; please see the attached screenshot). Please, could you tell me what is a proper way to do it? Is there anything that I am missing?
Update: resolved it by using the CV_8UC1 type instead of CV_32SC1 for yuvImg. Does anyone know if two pPitch planes (chroma and luma) guaranteed to be contiguous in memory? Asking to see if I can improve performance by eliminating one cudaMemcpy2D.