I referred to the code of Deepstream sample code snippet - Intelligent Video Analytics / DeepStream SDK - NVIDIA Developer Forums, and I converted the output of nvinfer into a 768x1024 cv::Mat. I tried to place this image in the lower left corner of the nvbufsurface, but encountered some data conversion issues. Below is my code.
static GstPadProbeReturn
tiler_src_pad_buffer_probe (GstPad * pad, GstPadProbeInfo * info,
gpointer u_data)
GstBuffer *buf = (GstBuffer *) info->data;
guint num_rects = 0;
NvDsObjectMeta *obj_meta = NULL;
guint vehicle_count = 0;
guint person_count = 0;
NvDsMetaList * l_frame = NULL;
NvDsMetaList * l_obj = NULL;
NvDsDisplayMeta *display_meta = NULL;
NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta (buf);
// Get original raw data
GstMapInfo in_map_info;
if (!gst_buffer_map (buf, &in_map_info, GST_MAP_READ)) {
g_print ("Error: Failed to map gst buffer\n");
gst_buffer_unmap (buf, &in_map_info);
NvBufSurface *surface = (NvBufSurface *)in_map_info.data;
for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
l_frame = l_frame->next) {
NvDsFrameMeta *frame_meta = (NvDsFrameMeta *) (l_frame->data);
NvDsUserMetaList *usrMetaList = frame_meta->frame_user_meta_list;
if (usrMetaList != NULL) {
NvDsUserMeta *usrMetaData = (NvDsUserMeta *) usrMetaList->data;
if(usrMetaData->base_meta.meta_type == NVDSINFER_TENSOR_OUTPUT_META){
// NvDsFrameMeta *frame_meta = (NvDsFrameMeta *) (l_frame->data);
//TODO for cuda device memory we need to use cudamemcpy
NvBufSurfaceMap (surface, -1, -1, NVBUF_MAP_READ);
/* Cache the mapped data for CPU access */
NvBufSurfaceSyncForCpu (surface, 0, 0); //will do nothing for unified memory type on dGPU
guint surface_height = surface->surfaceList[frame_meta->batch_id].height;
guint surface_width = surface->surfaceList[frame_meta->batch_id].width;
//Create Mat from NvMM memory, refer opencv API for how to create a Mat
cv::Mat nv12_mat = cv::Mat(surface_height*3/2, surface_width, CV_8UC1, surface->surfaceList[frame_meta->batch_id].mappedAddr.addr[0],
NvDsInferTensorMeta *meta = (NvDsInferTensorMeta *) usrMetaData->user_meta_data;
for (unsigned int i = 0; i < meta->num_output_layers; i++) {
NvDsInferLayerInfo *info = &meta->output_layers_info[i];
info->buffer = meta->out_buf_ptrs_host[i];
if (meta->out_buf_ptrs_dev[i]) {
cudaMemcpy (meta->out_buf_ptrs_host[i], meta->out_buf_ptrs_dev[i],
info->inferDims.numElements * 4, cudaMemcpyDeviceToHost);
int ch = meta->output_layers_info->inferDims.d[0];
int fusion_height = meta->output_layers_info->inferDims.d[1];
int fusion_width = meta->output_layers_info->inferDims.d[2];
int o_count = meta->output_layers_info->inferDims.numElements;
int onechannel_size = fusion_height * fusion_width;
float *outputCoverageBuffer =(float *) meta->output_layers_info[0].buffer;
cv::Mat fusion_mat;
using image_type = uint8_t;
int image_format = CV_8UC1;
image_type* uint8Buffer = (image_type *)malloc(o_count * sizeof(image_type));
image_type* uint8Buffer_C1 = (image_type *)malloc(onechannel_size * sizeof(image_type));
image_type* uint8Buffer_C2 = (image_type *)malloc(onechannel_size * sizeof(image_type));
image_type* uint8Buffer_C3 = (image_type *)malloc(onechannel_size * sizeof(image_type));
for(int o_index=0; o_index < o_count; o_index++){
uint8Buffer[o_index] = static_cast<uint8_t>(std::min(std::max(outputCoverageBuffer[o_index] * 255.0f, 0.0f), 255.0f));
for(int o_index=0; o_index < onechannel_size; o_index++){
uint8Buffer_C1[o_index] = uint8Buffer[o_index];
uint8Buffer_C2[o_index] = uint8Buffer[o_index + onechannel_size];
uint8Buffer_C3[o_index] = uint8Buffer[o_index + 2 * onechannel_size];
std::vector<cv::Mat> channels;
for(int idx=2;idx>=0;idx--){
cv::Mat dumpimg;
if (idx == 0) dumpimg = cv::Mat(fusion_height, fusion_width, image_format, uint8Buffer_C1);
else if (idx == 1) dumpimg = cv::Mat(fusion_height, fusion_width, image_format, uint8Buffer_C2);
else dumpimg = cv::Mat(fusion_height, fusion_width, image_format, uint8Buffer_C3);
cv::merge(channels, fusion_mat);
NvBufSurface *inter_buf = nullptr;
NvBufSurfaceCreateParams create_params;
create_params.gpuId = surface->gpuId;
create_params.width = surface_height*3/2;
create_params.height = surface_width;
create_params.size = 0;
create_params.colorFormat = NVBUF_COLOR_FORMAT_RGBA;
create_params.layout = NVBUF_LAYOUT_PITCH;
#ifdef __aarch64__
create_params.memType = NVBUF_MEM_DEFAULT;
create_params.memType = NVBUF_MEM_CUDA_UNIFIED;
//Create another scratch RGBA NvBufSurface
if (NvBufSurfaceCreate (&inter_buf, 1,
&create_params) != 0) {
GST_ERROR ("Error: Could not allocate internal buffer ");
if(NvBufSurfaceMap (inter_buf, 0, -1, NVBUF_MAP_READ_WRITE) != 0)
std::cout << "map error" << std::endl;
NvBufSurfaceSyncForCpu (inter_buf, 0, 0);
cv::Mat trans_mat = cv::Mat(surface_height*3/2, surface_width, CV_8UC4, inter_buf->surfaceList[frame_meta->batch_id].mappedAddr.addr[0],
cv::Mat dstROI = trans_mat(cv::Rect(0, fusion_height, fusion_mat.cols, fusion_mat.rows));
cv::cvtColor(fusion_mat, fusion_mat, cv::COLOR_BGRA2RGBA);
// 将源矩阵复制到目标矩阵的ROI区域
char file_name[128];
sprintf(file_name, "fusion_stream%2d_%03d_2.png", frame_meta->source_id, frame_number);
cv::imwrite(file_name, fusion_mat);
NvBufSurfaceSyncForDevice(inter_buf, 0, 0);
inter_buf->numFilled = 1;
NvBufSurfTransformConfigParams transform_config_params;
NvBufSurfTransformParams transform_params;
NvBufSurfTransformRect src_rect;
NvBufSurfTransformRect dst_rect;
cudaStream_t cuda_stream;
CHECK_CUDA_STATUS (cudaStreamCreate (&cuda_stream),
"Could not create cuda stream");
transform_config_params.compute_mode = NvBufSurfTransformCompute_Default;
transform_config_params.gpu_id = surface->gpuId;
transform_config_params.cuda_stream = cuda_stream;
/* Set the transform session parameters for the conversions executed in this
* thread. */
NvBufSurfTransform_Error err = NvBufSurfTransformSetSessionParams (&transform_config_params);
if (err != NvBufSurfTransformError_Success) {
std::cout <<"NvBufSurfTransformSetSessionParams failed with error "<< err << std::endl;
/* Set the transform ROIs for source and destination, only do the color format conversion*/
src_rect = {0, 0, surface_height*3/2, surface_width};
dst_rect = {0, 0, surface_height*3/2, surface_width};
/* Set the transform parameters */
transform_params.src_rect = &src_rect;
transform_params.dst_rect = &dst_rect;
transform_params.transform_flag =
transform_params.transform_filter = NvBufSurfTransformInter_Default;
/* Transformation format conversion, Transform rotated RGBA mat to NV12 memory in original input surface*/
err = NvBufSurfTransform (inter_buf, surface, &transform_params);
if (err != NvBufSurfTransformError_Success) {
std::cout<< "NvBufSurfTransform failed with error %d while converting buffer" << err <<std::endl;
// nvds_copy_obj_meta();
NvBufSurfaceUnMap(inter_buf, 0, 0);
NvBufSurfaceUnMap(surface, 0, 0);
NvBufSurfaceSyncForDevice(inter_buf, 0, 0)
and NvBufSurfaceSyncForCpu(inter_buf, 0, 0)
always return -1 and the error nvbufsurface: Wrong buffer index (0)
. My model output can be the Y channel of YUV or an RGB image; in this code, I am using a model that outputs an RGB image, and the model’s output is correct. The data in the nv12
format of nvbufsurface
mapped to cv::Mat
only includes one channel, while the model output is saved in the format of a three-channel RGB image. Is it feasible to directly use OpenCV for format conversion? Or should I use a model that outputs the Y channel?