Dane, thank you for your advice. Per your recommendation above I reduce the gst_dsexample_transform_ip to bare minimum just for experimenting GaussianBlur filter in Cuda as below:
static GstFlowReturn
gst_dsexample_transform_ip (GstBaseTransform * btrans, GstBuffer * inbuf)
{
GstDsExample *dsexample = GST_DSEXAMPLE (btrans);
GstMapInfo in_map_info;
GstFlowReturn flow_ret = GST_FLOW_ERROR;
NvBufSurface *surface = NULL;
dsexample->frame_num++;
CHECK_CUDA_STATUS (cudaSetDevice (dsexample->gpu_id),
"Unable to set cuda device");
memset (&in_map_info, 0, sizeof (in_map_info));
if (!gst_buffer_map (inbuf, &in_map_info, GST_MAP_READ)) {
g_print ("Error: Failed to map gst buffer\n");
goto error;
}
surface = (NvBufSurface *) in_map_info.data;
if (CHECK_NVDS_MEMORY_AND_GPUID (dsexample, surface))
goto error;
//////////////////////cuda filter experiment//////////////////////
#ifdef __aarch64__
/* To use the converted buffer in CUDA, create an EGLImage and then use
* CUDA-EGL interop APIs */
if (USE_EGLIMAGE) {
if (NvBufSurfaceMapEglImage (surface, 0) !=0 ) {
goto error;
}
/* dsexample->inter_buf->surfaceList[0].mappedAddr.eglImage
* Use interop APIs cuGraphicsEGLRegisterImage and
* cuGraphicsResourceGetMappedEglFrame to access the buffer in CUDA */
#if 1
//static bool create_filter = true;
//static cv::Ptr< cv::cuda::Filter > filter;
CUresult status;
CUeglFrame eglFrame;
CUgraphicsResource pResource = NULL;
cudaFree(0);
status = cuGraphicsEGLRegisterImage(&pResource,
surface->surfaceList[0].mappedAddr.eglImage,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
status = cuGraphicsResourceGetMappedEglFrame(&eglFrame, pResource, 0, 0);
status = cuCtxSynchronize();
cv::cuda::GpuMat d_mat(dsexample->processing_height, dsexample->processing_width, CV_8UC4, eglFrame.frame.pPitch[0]);
filter->apply (d_mat, d_mat);
status = cuCtxSynchronize();
status = cuGraphicsUnregisterResource(pResource);
#endif
/* Destroy the EGLImage */
NvBufSurfaceUnMapEglImage (dsexample->inter_buf, 0);
}
#endif
/////////////////////end of experiment////////////////////////////
flow_ret = GST_FLOW_OK;
error:
gst_buffer_unmap (inbuf, &in_map_info);
return flow_ret;
}
I was able to “make” and “sudo make install” successfully, when I run the pipeline, couple things I observed:
- now is very fast. When run even in “30W ALL” mode, never drop a frame any more => that’s very good.
- however the behaviour of filter act funny: it only filter (blur) the top 1/4 of the frame and bottom 3/4 frame are not filtered (not blur).
Question: am I manipulating the “surface” (eglFrame) correctly? if not, how would this in-place transformation (inbuf → filter → inbuf without copying) be done?
Thank you very much for your help again.
P.S. house keeping changes:
//create filter in gst_dsexample_start static gboolean gst_dsexample_start (GstBaseTransform * btrans) { .... filter = cv::cuda::createGaussianFilter(CV_8UC4, CV_8UC4, cv::Size(31,31), 0, 0, cv::BORDER_DEFAULT); .... }
and declare filter variable in gstdsexample.h
cv::Ptr<cv::cuda::Filter> filter;