Hello zhouzhi9,
We have implemented code following ur suggestion, both on custum plugin and probe function and it works goood :-)
On custom plugin frame is copied from GPU to CPU using CudaMemCpy, modified with openCV and then copied back on GPU using CudaMemCpy again, frame is modified and other plugin on pipeline can grab modified frame from input buffer.
Here is the code we have used calling this static function on src_osd_buffer_probe in this case :
static void modify_frame(GstBuffer *inGPUbuf,
int imageW,
int imageH,
int batchIndex,
cudaStream_t npp_stream)
{
NvBufSurface *surface = NULL;
GstMapInfo in_map_info;
memset (&in_map_info, 0, sizeof (in_map_info));
void *host_rgb_buf;
if (!gst_buffer_map (inGPUbuf, &in_map_info, GST_MAP_READ))
{
ERROR << "MainPipelineThread::modify_frame Failed to map gst buffer";
return;
}
surface = (NvBufSurface *) in_map_info.data;
void *buf_surface = surface->buf_data[batchIndex];
CHECK_CUDA_STATUS (cudaMallocHost (&host_rgb_buf,
imageW * imageH * RGBA_BYTES_PER_PIXEL), "Could not allocate cuda host buffer");
CHECK_CUDA_STATUS (cudaMemcpy(host_rgb_buf,buf_surface,
imageW * imageH * RGBA_BYTES_PER_PIXEL,cudaMemcpyDeviceToHost),
"Error copy Device to Host");
cv::Mat cvmat(cv::Size(imageW,imageH),CV_8UC4,host_rgb_buf,imageW * RGBA_BYTES_PER_PIXEL);
//DO YOUR OPENCV ELABORATION ON FRAME HERE
cv::rectangle(cvmat,cv::Rect(10, 10,20,20),cv::Scalar(0,0,255), 3);
///
CHECK_CUDA_STATUS (cudaMemcpy(
buf_surface,
cvmat.data,
cvmat.cols * cvmat.rows * RGBA_BYTES_PER_PIXEL,cudaMemcpyHostToDevice),
"Error copy Host to Device");
CHECK_CUDA_STATUS (cudaStreamSynchronize (npp_stream), "Failed to synchronize cuda stream");
}