dsexample causing memory leak with cv::Mat

I am trying to copy the cropped cv::Mat created from get_converted_mat() and attach it as a object level metadata, as inspired by deepstream_user_metadata_app.c example. Here is what I changed in dsexample.cpp

typedef struct _NvDecoderrMeta
{
   
   cv::Mat ass;

} NvDecoderrMeta;


void *set_metadata_ptr(cv::Mat& newFrame);
static gpointer copy_user_meta(gpointer data, gpointer user_data);
static void release_user_meta(gpointer data, gpointer user_data);

void *set_metadata_ptr(cv::Mat& newFrame)
{
  int i = 0;
    NvDecoderrMeta *user_metadata = (NvDecoderrMeta *)g_malloc0(sizeof(NvDecoderrMeta));
 
  newFrame.copyTo(user_metadata->ass);

  newFrame.release();

  return (void *)user_metadata;
}

/* copy function set by user. "data" holds a pointer to NvDsUserMeta*/
static gpointer copy_user_meta(gpointer data, gpointer user_data)
{
  NvDsUserMeta *user_meta = (NvDsUserMeta *)data;
  NvDecoderrMeta *src_user_metadata = (NvDecoderrMeta *)user_meta->user_meta_data;
  NvDecoderrMeta *dst_user_metadata = (NvDecoderrMeta *)g_malloc0(sizeof(NvDecoderrMeta));
  memcpy(dst_user_metadata, src_user_metadata, sizeof(NvDecoderrMeta));
  return (gpointer)dst_user_metadata;
}

/* release function set by user. "data" holds a pointer to NvDsUserMeta*/
static void release_user_meta(gpointer data, gpointer user_data)
{
  NvDsUserMeta *user_meta = (NvDsUserMeta *)data;

  if(user_meta->user_meta_data) {
    g_free(user_meta->user_meta_data);
    user_meta->user_meta_data = NULL;
  }
}

static cv::Mat
    get_converted_mat (GstDsExample * dsexample, NvBufSurface *input_buf, gint idx,
        NvOSD_RectParams * crop_rect_params, gdouble & ratio, gint input_width,
        gint input_height)
    {
      NvBufSurfTransform_Error err;
      NvBufSurfTransformConfigParams transform_config_params;
      NvBufSurfTransformParams transform_params;
      NvBufSurfTransformRect src_rect;
      NvBufSurfTransformRect dst_rect;
      NvBufSurface ip_surf;
      cv::Mat in_mat, out_mat;
      ip_surf = *input_buf;

      ip_surf.numFilled = ip_surf.batchSize = 1;
      ip_surf.surfaceList = &(input_buf->surfaceList[idx]);

      gint src_left = GST_ROUND_UP_2(crop_rect_params->left);
      gint src_top = GST_ROUND_UP_2(crop_rect_params->top);
      gint src_width = GST_ROUND_DOWN_2(crop_rect_params->width);
      gint src_height = GST_ROUND_DOWN_2(crop_rect_params->height);
      //g_print("ltwh = %d %d %d %d \n", src_left, src_top, src_width, src_height);

      guint dest_width, dest_height;
      dest_width = src_width;
      dest_height = src_height;

      NvBufSurface *nvbuf;
      NvBufSurfaceCreateParams create_params;
      create_params.gpuId  = dsexample->gpu_id;
      create_params.width  = dest_width;
      create_params.height = dest_height;
      create_params.size = 0;
      create_params.colorFormat = NVBUF_COLOR_FORMAT_RGBA;
      create_params.layout = NVBUF_LAYOUT_PITCH;
    #ifdef __aarch64__
      create_params.memType = NVBUF_MEM_DEFAULT;
    #else
      create_params.memType = NVBUF_MEM_CUDA_UNIFIED;
    #endif
      NvBufSurfaceCreate (&nvbuf, 1, &create_params);

      // Configure transform session parameters for the transformation
      transform_config_params.compute_mode = NvBufSurfTransformCompute_Default;
      transform_config_params.gpu_id = dsexample->gpu_id;
      transform_config_params.cuda_stream = dsexample->cuda_stream;

      // Set the transform session parameters for the conversions executed in this
      // thread.
      err = NvBufSurfTransformSetSessionParams (&transform_config_params);
      if (err != NvBufSurfTransformError_Success) {
        GST_ELEMENT_ERROR (dsexample, STREAM, FAILED,
            ("NvBufSurfTransformSetSessionParams failed with error %d", err), (NULL));
       // goto error;
      }

      // Calculate scaling ratio while maintaining aspect ratio
      ratio = MIN (1.0 * dest_width/ src_width, 1.0 * dest_height / src_height);

      if ((crop_rect_params->width == 0) || (crop_rect_params->height == 0)) {
        GST_ELEMENT_ERROR (dsexample, STREAM, FAILED,
            ("%s:crop_rect_params dimensions are zero",__func__), (NULL));
        //goto error;
      }

    #ifdef __aarch64__
      if (ratio <= 1.0 / 16 || ratio >= 16.0) {
        // Currently cannot scale by ratio > 16 or < 1/16 for Jetson
       // goto error;
      }
    #endif
      // Set the transform ROIs for source and destination
      src_rect = {(guint)src_top, (guint)src_left, (guint)src_width, (guint)src_height};
      dst_rect = {0, 0, (guint)dest_width, (guint)dest_height};

      // Set the transform parameters
      transform_params.src_rect = &src_rect;
      transform_params.dst_rect = &dst_rect;
      transform_params.transform_flag =
        NVBUFSURF_TRANSFORM_FILTER | NVBUFSURF_TRANSFORM_CROP_SRC |
          NVBUFSURF_TRANSFORM_CROP_DST;
      transform_params.transform_filter = NvBufSurfTransformInter_Default;

      //Memset the memory
      NvBufSurfaceMemSet (nvbuf, 0, 0, 0);

      GST_DEBUG_OBJECT (dsexample, "Scaling and converting input buffer\n");

      // Transformation scaling+format conversion if any.
      err = NvBufSurfTransform (&ip_surf, nvbuf, &transform_params);
      if (err != NvBufSurfTransformError_Success) {
        GST_ELEMENT_ERROR (dsexample, STREAM, FAILED,
            ("NvBufSurfTransform failed with error %d while converting buffer", err),
            (NULL));
        //goto error;
      }
      // Map the buffer so that it can be accessed by CPU
      if (NvBufSurfaceMap (nvbuf, 0, 0, NVBUF_MAP_READ) != 0){
      //  goto error;
      }

      // Cache the mapped data for CPU access
      NvBufSurfaceSyncForCpu (nvbuf, 0, 0);

      // Use openCV to remove padding and convert RGBA to BGR. Can be skipped if
      // algorithm can handle padded RGBA data.
      in_mat =
          cv::Mat (dest_height, dest_width,
          CV_8UC4, nvbuf->surfaceList[0].mappedAddr.addr[0],
          nvbuf->surfaceList[0].pitch);
      out_mat =
          cv::Mat (cv::Size(dest_width, dest_height), CV_8UC3);

      cv::cvtColor (in_mat, out_mat, CV_RGBA2BGR);

      in_mat.release();
	

      if (NvBufSurfaceUnMap (nvbuf, 0, 0)){
        //goto error;
      }
      NvBufSurfaceDestroy(nvbuf);

    #ifdef __aarch64__
      // To use the converted buffer in CUDA, create an EGLImage and then use
      // CUDA-EGL interop APIs
      if (USE_EGLIMAGE) {
        if (NvBufSurfaceMapEglImage (dsexample->inter_buf, 0) !=0 ) {
         // goto error;
        }

        // dsexample->inter_buf->surfaceList[0].mappedAddr.eglImage
        // Use interop APIs cuGraphicsEGLRegisterImage and
        // cuGraphicsResourceGetMappedEglFrame to access the buffer in CUDA

        // Destroy the EGLImage
        NvBufSurfaceUnMapEglImage (dsexample->inter_buf, 0);
      }
    #endif

      /* We will first convert only the Region of Interest (the entire frame or the
       * object bounding box) to RGB and then scale the converted RGB frame to
       * processing resolution. */
     // return GST_FLOW_OK;
	return out_mat;

  //  error:
    //  return GST_FLOW_ERROR;
    }
/**
 * Called when element recieves an input buffer from upstream element.
 */
static GstFlowReturn
gst_dsexample_transform_ip (GstBaseTransform * btrans, GstBuffer * inbuf)
{
  GstDsExample *dsexample = GST_DSEXAMPLE (btrans);
  GstMapInfo in_map_info;
  GstFlowReturn flow_ret = GST_FLOW_ERROR;
  gdouble scale_ratio = 1.0;
  DsExampleOutput *output;

  NvBufSurface *surface = NULL;
  NvDsBatchMeta *batch_meta = NULL;
  NvDsFrameMeta *frame_meta = NULL;
  NvDsMetaList * l_frame = NULL;
  guint i = 0;

  dsexample->frame_num++;
  CHECK_CUDA_STATUS (cudaSetDevice (dsexample->gpu_id),
      "Unable to set cuda device");

  memset (&in_map_info, 0, sizeof (in_map_info));
  if (!gst_buffer_map (inbuf, &in_map_info, GST_MAP_READ)) {
    g_print ("Error: Failed to map gst buffer\n");
    goto error;
  }

  surface = (NvBufSurface *) in_map_info.data;
  GST_DEBUG_OBJECT (dsexample,
      "Processing Frame %" G_GUINT64_FORMAT " Surface %p\n",
      dsexample->frame_num, surface);

  if (CHECK_NVDS_MEMORY_AND_GPUID (dsexample, surface))
    goto error;

  batch_meta = gst_buffer_get_nvds_batch_meta (inbuf);
  if (batch_meta == nullptr) {
    GST_ELEMENT_ERROR (dsexample, STREAM, FAILED,
        ("NvDsBatchMeta not found for input buffer."), (NULL));
    return GST_FLOW_ERROR;
  }

  if (dsexample->process_full_frame) {
    for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
      l_frame = l_frame->next)
    {
      frame_meta = (NvDsFrameMeta *) (l_frame->data);
      NvOSD_RectParams rect_params;

      // Scale the entire frame to processing resolution
      rect_params.left = 0;
      rect_params.top = 0;
      rect_params.width = dsexample->video_info.width;
      rect_params.height = dsexample->video_info.height;

      // Scale and convert the frame
      // if (get_converted_mat (dsexample, surface, i, &rect_params,
      //       scale_ratio, dsexample->video_info.width,
      //       dsexample->video_info.height) != GST_FLOW_OK) {
      //   goto error;
      // }

      // Process to get the output
      output =
          DsExampleProcess (dsexample->dsexamplelib_ctx,
          dsexample->cvmat->data);
      // Attach the metadata for the full frame
      attach_metadata_full_frame (dsexample, frame_meta, scale_ratio, output, i);
      i++;
      free (output);
    }

  } else {
    // Using object crops as input to the algorithm. The objects are detected by
    // the primary detector
    NvDsMetaList * l_obj = NULL;
    NvDsObjectMeta *obj_meta = NULL;

    for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
      l_frame = l_frame->next)
    {
      frame_meta = (NvDsFrameMeta *) (l_frame->data);
      for (l_obj = frame_meta->obj_meta_list; l_obj != NULL;
          l_obj = l_obj->next)
      {
        obj_meta = (NvDsObjectMeta *) (l_obj->data);


        /* Should not process on objects smaller than MIN_INPUT_OBJECT_WIDTH x MIN_INPUT_OBJECT_HEIGHT
         * since it will cause hardware scaling issues. */
        if (obj_meta->rect_params.width < MIN_INPUT_OBJECT_WIDTH ||
            obj_meta->rect_params.height < MIN_INPUT_OBJECT_HEIGHT)
          continue;

        // Crop and scale the object
        //if (
        	cv::Mat frameBuf = get_converted_mat (dsexample,surface, frame_meta->batch_id, &obj_meta->rect_params,scale_ratio, dsexample->video_info.width,dsexample->video_info.height) ;
        	//!= GST_FLOW_OK) {
          // Error in conversion, skip processing on object. */
        //  continue;
       // }

        // Process the object crop to obtain label
        output = DsExampleProcess (dsexample->dsexamplelib_ctx,
            dsexample->cvmat->data);

        // Attach labels for the object
        attach_metadata_object (dsexample, obj_meta, output, frameBuf);
        frameBuf.release();

      

        free (output);
      }
    }
  }

  flow_ret = GST_FLOW_OK;

error:
  gst_buffer_unmap (inbuf, &in_map_info);
  return flow_ret;
}
static void
attach_metadata_object (GstDsExample * dsexample, NvDsObjectMeta * obj_meta,
    DsExampleOutput * output, cv::Mat& frameBuf)
{
  if (output->numObjects == 0)
    return;
  NvDsBatchMeta *batch_meta = obj_meta->base_meta.batch_meta;
  
NvDsUserMeta *user_meta = NULL;
  NvDsMetaType user_meta_type = NVDS_USER_FRAME_META_EXAMPLE;
  
  NvDsClassifierMeta *classifier_meta =
    nvds_acquire_classifier_meta_from_pool (batch_meta);

  classifier_meta->unique_component_id = dsexample->unique_id;

  NvDsLabelInfo *label_info =
    nvds_acquire_label_info_meta_from_pool (batch_meta);
  g_strlcpy (label_info->result_label, output->object[0].label, MAX_LABEL_SIZE);
  nvds_add_label_info_meta_to_classifier(classifier_meta, label_info);
  nvds_add_classifier_meta_to_object (obj_meta, classifier_meta);

			
user_meta = nvds_acquire_user_meta_from_pool(batch_meta);

        /* Set NvDsUserMeta below */
        user_meta->user_meta_data = (void *)set_metadata_ptr(frameBuf);
        user_meta->base_meta.meta_type = user_meta_type;
        user_meta->base_meta.copy_func = (NvDsMetaCopyFunc)copy_user_meta;
        user_meta->base_meta.release_func = (NvDsMetaReleaseFunc)release_user_meta;

        /* We want to add NvDsUserMeta to frame level */
        nvds_add_user_meta_to_obj(obj_meta, user_meta);


  nvds_acquire_meta_lock (batch_meta);
  NvOSD_TextParams & text_params = obj_meta->text_params;
  NvOSD_RectParams & rect_params = obj_meta->rect_params;


  nvds_release_meta_lock (batch_meta);
}

I have only changed these functions in dsexample.cpp to get the cropped cv::Mat data on obj_meta parameter to be later used on. But when I run

gst-launch-1.0 filesrc location= /home/nvidia/Downloads/deepstream_sdk_v4.0.1_x86_64/samples/streams/sample_720p.mp4 ! qtdemux ! h264parse ! nvv4l2decoder ! m.sink_0 nvstreammux name=m batch-size=1 width=1280 height=720 ! nvinfer config-file-path= /home/nvidia/Downloads/deepstream_sdk_v4.0.1_x86_64/samples/configs/deepstream-app/config_infer_primary.txt ! nvvideoconvert ! dsexample full-frame=0 ! nvdsosd ! nveglglessink

The program runs successfully, but the memory rises gradually. I wonder if the cv::Mat causing the memory leak. Any help is highly appreciated.