Custom Yolo Postprocessing Probe gets slower after 1-2 days of execution

We are processing a 60 720p cameras using DeepStream 6.2 . We are using a Tesla T4, the official DeepStream container and the Python bindings.
We noticed that our pipeline was slowing down after a day or two. We therefore decided to log the execution time of each our python probes.
Our analysis shows a python probe getting slower and slower. That probe executed the yolo postprocessing in python:

Yolo is the only model for which we have a custom postprocessing: we output the tensor meta and we process it in Python.

Is there anything that we should do to free up memory or release objects after processing the tensor meta in python?

The reason why we originally came up with a custom postprocessing for yolo was that the ones available in C++ were running on a single core and were much slower when running our pipeline on 60 cameras (the FPS was bounded to 60FPS while we could do about 400 FPS with our custom postprocessing).

This is the main part of the probe code:

import traceback

import pyds

import gi
gi.require_version("Gst", "1.0")
from gi.repository import GObject, Gst

import ctypes
import io
import logging

import numpy as np

from pipeline.models.common.utils import layer_finder, make_nodi
from .yolov5_postprocessing import YoloV5Postprocessing
import settings


logger = logging.getLogger(__name__)

UNTRACKED_OBJECT_ID = 0xffffffffffffffff

yolov5_postprocessing = YoloV5Postprocessing()


def get_label_names_from_file(filepath):
    """ Read a label file and convert it to string list """
    f = io.open(filepath, "r")
    labels = f.readlines()
    labels = [elm[:-1] for elm in labels]
    f.close()
    return labels


def yolov5_parse_custom_meta(output_layer_info, frame_height, frame_width):
    """ Get data from output_layer_info and return an object_list with several NvDsInferObjectDetectionInfo.

        Keyword arguments:
        - output_layer_info : represents the neural network's output.
            (NvDsInferLayerInfo list)
        - frame_height : frame height used to scale bounding boxes (int)
        - frame_width : frame width used to scale bounding boxes (int)

        Return:
        - Object list with bounding boxes (NvDsInferObjectDetectionInfo list)
    """
    # Yolo V5
    # Input data [batch_size, 3, 640, 640] <class 'numpy.float32'>
    # Output prob [batch_size, 6001, 1, 1] <class 'numpy.float32'>
    output_layer = layer_finder(output_layer_info, "prob")

    if not output_layer:
        logger.error("Error in retrieving output layer")

    # Convert output to numpy
    ptr = ctypes.cast(pyds.get_ptr(output_layer.buffer), ctypes.POINTER(ctypes.c_float))
    # shape = output_layer.dims.d[:output_layer.dims.numDims]
    output_numpy = np.ctypeslib.as_array(ptr, shape=(6001,))
    output_numpy = np.squeeze(output_numpy)

    result_boxes, result_scores, result_classid = yolov5_postprocessing.post_process(output_numpy, frame_height,
                                                                                     frame_width)
    object_list = list()
    for box, score, classid in zip(result_boxes, result_scores, result_classid):
        obj = make_nodi(classid, score, box, frame_height, frame_width)
        if obj:
            object_list.append(obj)

    return object_list


def add_obj_meta_to_frame(frame_object, batch_meta, frame_meta, label_names):
    """ Inserts an object into the metadata """

    # Remap label id if specified in settings
    label = label_names[frame_object.classId]
    if label in settings.AI_YOLO_MAP_LABELS:
        try:
            label = settings.AI_YOLO_MAP_LABELS[label]
            frame_object.classId = label_names.index(label)
        except Exception:
            logger.error(traceback.format_exc())

    # this is a good place to insert objects into the metadata.
    # Here's an example of inserting a single object.
    obj_meta = pyds.nvds_acquire_obj_meta_from_pool(batch_meta)
    # Set bbox properties. These are in input resolution.
    rect_params = obj_meta.rect_params
    rect_params.left = frame_object.left
    rect_params.top = frame_object.top
    rect_params.width = frame_object.width
    rect_params.height = frame_object.height

    # Semi-transparent yellow backgroud
    rect_params.has_bg_color = 0
    rect_params.bg_color.set(1, 1, 0, 0.4)

    # Red border of width 3
    rect_params.border_width = 3
    rect_params.border_color.set(1, 0, 0, 1)

    # Set object info including class, detection confidence, etc.
    obj_meta.confidence = frame_object.detectionConfidence
    obj_meta.class_id = frame_object.classId

    # There is no tracking ID upon detection. The tracker will
    # assign an ID.
    obj_meta.object_id = UNTRACKED_OBJECT_ID

    # This value must match the secondary models `operate-on-gie-id` value
    obj_meta.unique_component_id = 1

    lbl_id = frame_object.classId
    if lbl_id >= len(label_names):
        lbl_id = 0

    # Set the object classification label.
    obj_meta.obj_label = label_names[lbl_id]

    # Set display text for the object.
    txt_params = obj_meta.text_params
    if txt_params.display_text:
        pyds.free_buffer(txt_params.display_text)

    txt_params.x_offset = int(rect_params.left)
    txt_params.y_offset = max(0, int(rect_params.top) - 10)
    txt_params.display_text = (
        f"{label_names[lbl_id]} {frame_object.detectionConfidence:.2f} "
    )
    # Font , font-color and font-size
    txt_params.font_params.font_name = "Serif"
    txt_params.font_params.font_size = 10
    # set(red, green, blue, alpha); set to White
    txt_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)

    # Text background color
    txt_params.set_bg_clr = 1
    # set(red, green, blue, alpha); set to Black
    txt_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)

    # Insert the object into current frame meta
    # This object has no parent
    pyds.nvds_add_obj_meta_to_frame(frame_meta, obj_meta, None)

    return obj_meta


def pgie_src_pad_buffer_probe(self, pad, info, u_data):

    gst_buffer = info.get_buffer()
    if not gst_buffer:
        logger.error("Unable to get GstBuffer ")
        return

    # Retrieve batch metadata from the gst_buffer
    # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
    # C address of gst_buffer as input, which is obtained with hash(gst_buffer)
    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))

    # Acquire lock
    pyds.nvds_acquire_meta_lock(batch_meta)

    # Get frame list
    l_frame = batch_meta.frame_meta_list

    label_names = get_label_names_from_file("labels.txt")

    while l_frame is not None:

        try:
            # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
            # The casting also keeps ownership of the underlying memory
            # in the C code, so the Python garbage collector will leave
            # it alone.
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break

        l_user = frame_meta.frame_user_meta_list
        while l_user is not None:
            try:
                # Note that l_user.data needs a cast to pyds.NvDsUserMeta
                # The casting also keeps ownership of the underlying memory
                # in the C code, so the Python garbage collector will leave
                # it alone.
                user_meta = pyds.NvDsUserMeta.cast(l_user.data)
            except StopIteration:
                break

            if (
                    user_meta.base_meta.meta_type
                    != pyds.NvDsMetaType.NVDSINFER_TENSOR_OUTPUT_META
            ):
                continue

            tensor_meta = pyds.NvDsInferTensorMeta.cast(user_meta.user_meta_data)

            # Boxes in the tensor meta should be in network resolution which is
            # found in tensor_meta.network_info. Use this info to scale boxes to
            # the input frame resolution.
            layers_info = []

            for i in range(tensor_meta.num_output_layers):
                layer = pyds.get_nvds_LayerInfo(tensor_meta, i)
                layers_info.append(layer)

            frame_object_list = yolov5_parse_custom_meta(
                # Note: it is important to use the resolution of nvstreammux otherwise the secondary
                # models will receive wrong bounding boxes
                # Therefore, you cannot use the original frames bounding boxes size.
                layers_info, settings.DEEPSTREAM_NVSTREAMMUX_HEIGHT, settings.DEEPSTREAM_NVSTREAMMUX_WIDTH
            )

            for frame_object in frame_object_list:
                obj_meta = add_obj_meta_to_frame(frame_object, batch_meta, frame_meta, label_names)

            # Mark frame as processed otherwise tracker won't work on it
            # (it wouldn't output any object)
            frame_meta.bInferDone = 1

            try:
                l_user = l_user.next
            except StopIteration:
                break

        try:
            l_frame = l_frame.next
        except StopIteration:
            break

    # Release lock
    pyds.nvds_release_meta_lock(batch_meta)

    return Gst.PadProbeReturn.OK

This is the yolo configuration:

[property]
gpu-id=0
#model-file=/opt/nvidia/deepstream/deepstream-6.0/samples/models/Primary_Detector/resnet10.caffemodel
#proto-file=/opt/nvidia/deepstream/deepstream-6.0/samples/models/Primary_Detector/resnet10.prototxt
labelfile-path=/src/src/pipeline/models/yolov5_tensorrt/pipeline/labels.txt
#int8-calib-file=/opt/nvidia/deepstream/deepstream-6.0/samples/models/Primary_Detector/cal_trt.bin
force-implicit-batch-dim=1
batch-size=16
network-mode=2
num-detected-classes=80
interval=0
gie-unique-id=1
output-tensor-meta=1
is-classifier=1
#output-blob-names=prob
#tensor-meta-pool-size=6001
#custom-lib-path=/opt/nvidia/deepstream/deepstream-6.0/sources/libs/nvdsinfer_customparser/libnvds_infercustomparser.so
#parse-bbox-func-name=NvDsInferParseYolo
#output-blob-names=conv2d_bbox;conv2d_cov/Sigmoid
#scaling-filter=0
#scaling-compute-hw=0
# Set model color format to RGB
model-color-format=0
# Set primary model
process-mode=1
# Preprocessing
symmetric-padding=0
maintain-aspect-ratio=1
net-scale-factor=0.0039215697906911373
offsets=0;0;0

There is no update from you for a period, assuming this is not an issue anymore. Hence we are closing this topic. If need further support, please open a new one. Thanks

https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_Python_Sample_Apps.html#memory-management

It is not recommended to implement the postprocess in python if the algorithm involves lots of array operation and transformation.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.