How to create opencv gpumat from nvstream?

There is an example in dsexample plugin but cpu version. I’ve followed the example and I got gpumat to create but got an error while trying to do any operation on it.

gpu_mat = cv::cuda::GpuMat(height, width, CV_8UC4, mapped_ptr, buf_params.pitch[0]);
cv::cuda::cvtColor(gpu_mat, gpu_bgr, CV_RGBA2BGR);

error: (-217:Gpu API call) unspecified launch failure in function ‘call’

2 Likes

Hi,

Error -217 is from OpenCV library.
You may use the invalid image format or non-supported function call.

For example:
[url]using GPU module with own code - OpenCV Q&A Forum

Thanks.

Thank AsstaLLL.
But the code below works fine for cpu and it uses the same CV_8UC4. Do the cpu and gpu memory have different format?

if (NvBufferMemSyncForCpu (dmabuf_fd, 0, &mapped_ptr) != 0) 
{
    flow_ret = GST_FLOW_ERROR;
    goto done;
}
cpu_mat = cv::Mat (height, width, CV_8UC4, mapped_ptr, buf_params.pitch[0]);
cv::cvtColor (cpu_mat,  bgr_cpu_mat, CV_RGBA2BGR);
gpu_mat.upload(bgr_cpu_mat);

Opencv doc is not so clear about this, but not all conversions are available from CUDA. It won’t make an error at compile time (as the conversion codes are defined for CPU version), but it will fail at runtime.
More conversions are available as opencv releases go, so you’d get better chance using lastest versions. Be aware that since opencv4, cuda modules are now in extra package opencv-contrib.
Be sure if you have several opencv versions on your system to have your application linking with right version libs and loading the right dynamic ones at run time.

Can you try another conversion available from your cudaimgproc/src/color.cpp ?

I’ve tried all of these options:

CV_RGBA2BGR565
CV_RGBA2GRAY
CV_RGBA2BGR555
CV_RGBA2mRGBA

but they gave the same error.

Does :

ldd your-application

shows loading the right version of opencv dynamic libs ?
Which opencv version are you running ?

I’m runing opencv v.3.4.5. Here is the output from ldd, but I don’t see libopencv_cudaimgproc in the output

libopencv_videoio.so.3.4 => /usr/local/lib/libopencv_videoio.so.3.4 (0x0000007fb5a2a000)
libopencv_imgcodecs.so.3.4 => /usr/local/lib/libopencv_imgcodecs.so.3.4 (0x0000007fb5746000)
libopencv_cudawarping.so.3.4 => /usr/local/lib/libopencv_cudawarping.so.3.4 (0x0000007fb4e2b000)
libopencv_imgproc.so.3.4 => /usr/local/lib/libopencv_imgproc.so.3.4 (0x0000007fb4a30000)
libopencv_core.so.3.4 => /usr/local/lib/libopencv_core.so.3.4 (0x0000007fb44d8000)

Looks surprizing…Did you include and specify opencv_cudaimgproc for compiling/linking your application ?

Sorry I did ldd on the main app not the plugin. Here is the result from the plugin.

libopencv_cudabgsegm.so.3.4 => /usr/local/lib/libopencv_cudabgsegm.so.3.4 (0x0000007f885d7000)
libopencv_cudawarping.so.3.4 => /usr/local/lib/libopencv_cudawarping.so.3.4 (0x0000007f87cbc000)
libopencv_cudaimgproc.so.3.4 => /usr/local/lib/libopencv_cudaimgproc.so.3.4 (0x0000007f87606000)
libopencv_cudafilters.so.3.4 => /usr/local/lib/libopencv_cudafilters.so.3.4 (0x0000007f836ce000)
libopencv_imgproc.so.3.4 => /usr/local/lib/libopencv_imgproc.so.3.4 (0x0000007f832d3000)

Re-reading this, main problem may be using cvtColor between host (Mat) and device (GpuMat).

If you have a RGBA frame in GPU memory and want to have it converted to BGR in cpu memory, you would try something like:

cv::cuda::GpuMat d_Mat_RGBA(height, width, CV_8UC4, mapped_ptr, buf_params.pitch[0]); 

/* A GpuMat for RGB format frame */
cv::cuda::GpuMat d_Mat_RGB (height, width, CV_8UC3);

/* Final RGB frame host Mat */
cv::Mat h_Mat_RGB(height, width, CV_8UC3);


/* Make RGBA to RGB conversion by GPU between two GpuMats */
cv::cuda::cvtColor(d_Mat_RGBA, d_Mat_RGB, CV_RGBA2RGB);

/* Copy to cpu Mat */
d_Mat_RGB.download(h_Mat_RGB);

I’ve tried it but it produced the same error while calling cvtColor function. I checked if d_Mat_RGBA was empty and it was not empty. And I checked if d_Mat_RGB was created by printing out the number of channels, width, and height. The output was correct.

I have checked on a TX2 with R28.2.0 with CUDA 9.0 (.252) and an opencv 3.4.0 build configured with this, not using nvstream but nvivafilter plugin.

This works fine:

...
static void cv_process_RGBA(void *pdata, int32_t width, int32_t height)
{
    /* Create a GpuMat with data pointer */
    cv::cuda::GpuMat d_Mat_RGBA(height, width, CV_8UC4, pdata);

    /* Convert into a RGB GpuMat */
    cv::cuda::GpuMat d_Mat_RGB(height, width, CV_8UC3);
    cv::cuda::cvtColor(d_Mat_RGBA, d_Mat_RGB, CV_RGBA2RGB);

    /* Copy to cpu Mat */
    cv::Mat h_Mat_RGB(height, width, CV_8UC3);
    d_Mat_RGB.download(h_Mat_RGB);

    /* Uncomment this to set the image to purple from CPU */
    //h_Mat_RGB = cv::Scalar(255, 0, 255);

    /* Copy back from CPU Mat to device RGB GpuMat */
    d_Mat_RGB.upload(h_Mat_RGB);

    /* Convert back to RGBA in device, this will be the output */
    cv::cuda::cvtColor(d_Mat_RGB, d_Mat_RGBA, CV_RGB2RGBA);
}

...

static void
gpu_process (EGLImageKHR image, void ** usrptr)
{
  CUresult status;
  CUeglFrame eglFrame;
  CUgraphicsResource pResource = NULL;

  cudaFree(0);
  status = cuGraphicsEGLRegisterImage(&pResource, image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
  if (status != CUDA_SUCCESS) {
    printf("cuGraphicsEGLRegisterImage failed : %d \n", status);
    return;
  }

  status = cuGraphicsResourceGetMappedEglFrame( &eglFrame, pResource, 0, 0);
  if (status != CUDA_SUCCESS) {
    printf ("cuGraphicsSubResourceGetMappedArray failed\n");
  }

  status = cuCtxSynchronize();
  if (status != CUDA_SUCCESS) {
    printf ("cuCtxSynchronize failed \n");
  }

  if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
    if (eglFrame.eglColorFormat == CU_EGL_COLOR_FORMAT_RGBA) {
		/* Perform now your custom opencv processing */
		//printf ("Valid eglcolorformat %d\n", eglFrame.frameType);
		cv_process_RGBA(<b>eglFrame.frame.pPitch[0]</b>, eglFrame.width, eglFrame.height);
    } else {
	printf ("Invalid eglcolorformat %d for opencv\n", eglFrame.frameType);
    }
  }

  status = cuCtxSynchronize();
  if (status != CUDA_SUCCESS) {
    printf ("cuCtxSynchronize failed after memcpy \n");
  }

  status = cuGraphicsUnregisterResource(pResource);
  if (status != CUDA_SUCCESS) {
    printf("cuGraphicsEGLUnRegisterResource failed: %d \n", status);
  }
}
...

being used as such:

gst-launch-1.0 nvcamerasrc ! 'video/x-raw(memory:NVMM)' ! nvivafilter customer-lib-name=./lib-gst-custom-opencv_cudaprocess.so cuda-process=true ! 'video/x-raw(memory:NVMM),format=(string)RGBA' ! nvegltransform ! nveglglessink

Sorry I have no experience with nvstream and I have no idea what is mapped_ptr and buf_params.pitch[0] in your case. Someone else may better advise.

Sorry, I should have mentioned that I’m using deepstream3 and cuda 10. For mapped_ptr and buf_params.pitch[0, I copied from the dsexample plugin in ds3. It has a different structure from ds2. My opencv code works fine with ds2.

Thanks for your time and help.

Please also refer to
https://devtalk.nvidia.com/default/topic/1047563/jetson-tx2/libargus-eglstream-to-nvivafilter/post/5319890/#5319890

For using gpuMat, it should use eglFrame.frame.pPitch[0] obtained from:

cuGraphicsEGLRegisterImage(&pResource, image,
                CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
cuGraphicsResourceGetMappedEglFrame(&eglFrame, pResource, 0, 0);

I have deepstream 4.0 and I can’t find any example implementations with cv::cuda (not in the gst-dsexample).

I want to run some cv::cuda functions on the image-frame in the gpu memory. Is that possible? How can I access the image from the plugin.

Thanks

Hi rog07o4z,
You may upgrade to r32.2.1 + DS4.0.1 via sdkmanager first.

There is OpenCV3.3.1 package installed by sdlmanager. However, it does not contain cv::cuda functions. You may use the script to update OpenCV.
https://github.com/AastaNV/JEP/blob/master/script/install_opencv4.1.1_Jetson.sh

Thanks. I will do that. Is there any example how to access and modify the image stream in memory with opencv::cuda.
Thanks

Hi,
Please apply below code to get_converted_mat() in gstdsexample.cpp

#include <cudaEGL.h>
#include <opencv2/cudafilters.hpp>
#ifdef __aarch64__
  // To use the converted buffer in CUDA, create an EGLImage and then use
  // CUDA-EGL interop APIs
  if (USE_EGLIMAGE) {
    if (NvBufSurfaceMapEglImage (dsexample->inter_buf, 0) !=0 ) {
      goto error;
    }

    // dsexample->inter_buf->surfaceList[0].mappedAddr.eglImage
    // Use interop APIs cuGraphicsEGLRegisterImage and
    // cuGraphicsResourceGetMappedEglFrame to access the buffer in CUDA
[b]#if 1
    static bool create_filter = true;
    static cv::Ptr< cv::cuda::Filter > filter;
    CUresult status;
    CUeglFrame eglFrame;
    CUgraphicsResource pResource = NULL;
    cudaFree(0);
    status = cuGraphicsEGLRegisterImage(&pResource,
		dsexample->inter_buf->surfaceList[0].mappedAddr.eglImage,
                CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
    status = cuGraphicsResourceGetMappedEglFrame(&eglFrame, pResource, 0, 0);
    status = cuCtxSynchronize();
    if (create_filter) {
        filter = cv::cuda::createSobelFilter(CV_8UC4, CV_8UC4, 1, 0, 3, 1, cv::BORDER_DEFAULT);
        //filter = cv::cuda::createGaussianFilter(CV_8UC4, CV_8UC4, cv::Size(31,31), 0, 0, cv::BORDER_DEFAULT);
        create_filter = false;
    }
    cv::cuda::GpuMat d_mat(dsexample->processing_height, dsexample->processing_width, CV_8UC4, eglFrame.frame.pPitch[0]);
    filter->apply (d_mat, d_mat);
    status = cuCtxSynchronize();
    status = cuGraphicsUnregisterResource(pResource);

    // apply back to the original buffer
    transform_params.src_rect = &dst_rect;
    transform_params.dst_rect = &src_rect;
    NvBufSurfTransform (dsexample->inter_buf, &ip_surf, &transform_params);
#endif[/b]
    // Destroy the EGLImage
    NvBufSurfaceUnMapEglImage (dsexample->inter_buf, 0);
  }
#endif

Makefile

# Remove opencv in PKGS
PKGS:= gstreamer-1.0 gstreamer-base-1.0 gstreamer-video-1.0 <s>opencv</s>
# Add opencv4 to CFLAGS and LIBS
CFLAGS+= -I /usr/local/include/opencv4
LIBS+=-L/usr/local/lib -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_videoio -lopencv_cudafilters

Enable dsexample in config file:

[ds-example]
enable=1
processing-width=640
processing-height=480
full-frame=1
unique-id=15
gpu-id=0
2 Likes

Thanks a lot. It worked!

Hello DaneLLL,
I modified your example to use the remap()-function of openCV.

cv::cuda::remap(d_mat, d_mat, dsexample->mat1, dsexample->mat2, cv::INTER_CUBIC, cv::BORDER_CONSTANT, cv::Scalar(0.f));

The code runs without errors, but the output has some weird artifacts in the top half of the image, while the bottom half seems correct.

On CPU the remap()-function works fine.

Was this function ever applied successfully with CUDA?