Regarding doubts about deepstream custom parser for onnx with deepstream batch

Please provide complete information as applicable to your setup.

• Hardware Platform (Jetson / GPU) -Jetson Orin

• DeepStream Version -6.4

• JetPack Version (valid for Jetson only) -6.0+b106

• TensorRT Version -8.6.4

• NVIDIA GPU Driver Version (valid for GPU only) -12.2

• Issue Type( questions, new requirements, bugs) - We are implementing multi-camera object tracking using NVIDIA Metropolis services. First, we detect persons using the resnet50_peoplenet_transformer_op17 model, and then we extract embeddings using the resnet50_market1501_aicity156 model. We are able to detect persons and extract embeddings with an output size of 256. However, we need to ensure that the embedding extraction is correct in our probe function implemented in Python. would you like to suggest to us should we use c for detection and getting embedding? or else python code is ok for implementing multi camera
object tracking .

def tiler_sink_pad_buffer_probe(pad, info, u_data):
global attendace_data, prediction, frame_no, obj_id_confidences, previous_obj_id, obj_id, count

gst_buffer = info.get_buffer()
if not gst_buffer:
    print("Unable to get GstBuffer")
    return Gst.PadProbeReturn.OK
# Retrieve batch metadata from the buffer
batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
l_frame = batch_meta.frame_meta_list

while l_frame is not None:
    try:
        frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
    except StopIteration:
        break

    obj_source_id = frame_meta.source_id
    l_obj = frame_meta.obj_meta_list
    frame_no += 1

    # Get frame surface and convert to numpy array
    n_frame = pyds.get_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id)
    frame = np.array(n_frame, copy=True, order='C')
    bgr_frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)

    while l_obj is not None:
        try:
            obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
        except StopIteration:
            break

        # Process objects from the primary model
        if obj_meta.unique_component_id == 1:
            obj_id, confidence = obj_meta.object_id, obj_meta.confidence

            x, y = obj_meta.rect_params.left, obj_meta.rect_params.top
            bbox_width, bbox_height = obj_meta.rect_params.width, obj_meta.rect_params.height

            x1, y1 = max(int(x - padding), 0), max(int(y - padding), 0)
            x2, y2 = int(x + bbox_width + padding), int(y + bbox_height + padding)

            if confidence >= 0.4:
                # Crop and convert image for saving
                cropped_image = bgr_frame[y1:y2, x1:x2]
                rgb_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)

                # Access the secondary model output (landmarks)
                l_user_meta = obj_meta.obj_user_meta_list
                while l_user_meta is not None:
                    try:
                        user_meta = pyds.NvDsUserMeta.cast(l_user_meta.data)
                    except StopIteration:
                        break
                    if user_meta and user_meta.base_meta.meta_type == pyds.NvDsMetaType.NVDSINFER_TENSOR_OUTPUT_META:
                        try:
                            tensor_meta = pyds.NvDsInferTensorMeta.cast(user_meta.user_meta_data)
                        except StopIteration:
                            break

                        # Extract embedding from tensor output
                        for i in range(tensor_meta.num_output_layers):
                            layer = pyds.get_nvds_LayerInfo(tensor_meta, i)
                            if layer is not None and layer.buffer:
                                layer_name = layer.layerName
                                print("layer_name:",layer_name)
                                # Get the buffer containing the embeddings
                                ptr = ctypes.cast(pyds.get_ptr(layer.buffer), ctypes.POINTER(ctypes.c_float))
                                # Convert the buffer into a NumPy array (embedding vector)
                                embedding = np.ctypeslib.as_array(ptr, shape=(256,))  # Output dimension is 256
                                # print("Embedding shape:", embedding.shape)
                                print("Embedding:", embedding)
                    try:
                        l_user_meta = l_user_meta.next
                    except StopIteration:
                        break

        try:
            l_obj = l_obj.next
        except StopIteration:
            break

    try:
        l_frame = l_frame.next
    except StopIteration:
        break

return Gst.PadProbeReturn.OK

#nvidia_inception and #nvidia.ai

import sys

root_dir = ‘/home/jetson/Desktop/face_recogn_code’

sys.path.append(root_dir)

import gi
import configparser
gi.require_version(‘Gst’, ‘1.0’)
from gi.repository import GLib, Gst
import math
import queue
from common.is_aarch_64 import is_aarch64
from common.bus_call import bus_call
from common.FPS import PERF_DATA
import numpy as np
import pyds
import cv2
import os
import csv
from datetime import datetime
import time
import threading
import json
import ctypes
from uuid import uuid4
zero_time = time.time()
cwd = os.getcwd()
frame_count = {}
saved_count = {}
perf_data = None
frame_no = 0
padding = 5
processed_obj_ids = set()

def tiler_sink_pad_buffer_probe(pad, info, u_data):
global attendace_data, prediction, frame_no, obj_id_confidences, previous_obj_id, obj_id, count

gst_buffer = info.get_buffer()
if not gst_buffer:
    print("Unable to get GstBuffer")
    return Gst.PadProbeReturn.OK
# Retrieve batch metadata from the buffer
batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
l_frame = batch_meta.frame_meta_list

while l_frame is not None:
    try:
        frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
    except StopIteration:
        break

    obj_source_id = frame_meta.source_id
    l_obj = frame_meta.obj_meta_list
    frame_no += 1

    # Get frame surface and convert to numpy array
    n_frame = pyds.get_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id)
    frame = np.array(n_frame, copy=True, order='C')
    bgr_frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)

    while l_obj is not None:
        try:
            obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
        except StopIteration:
            break

        # Process objects from the primary model
        if obj_meta.unique_component_id == 1:
            obj_id, confidence = obj_meta.object_id, obj_meta.confidence

            x, y = obj_meta.rect_params.left, obj_meta.rect_params.top
            bbox_width, bbox_height = obj_meta.rect_params.width, obj_meta.rect_params.height

            x1, y1 = max(int(x - padding), 0), max(int(y - padding), 0)
            x2, y2 = int(x + bbox_width + padding), int(y + bbox_height + padding)

            if confidence >= 0.4:
                # Crop and convert image for saving
                cropped_image = bgr_frame[y1:y2, x1:x2]
                rgb_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)

                # Access the secondary model output (landmarks)
                l_user_meta = obj_meta.obj_user_meta_list
                while l_user_meta is not None:
                    try:
                        user_meta = pyds.NvDsUserMeta.cast(l_user_meta.data)
                    except StopIteration:
                        break
                    if user_meta and user_meta.base_meta.meta_type == pyds.NvDsMetaType.NVDSINFER_TENSOR_OUTPUT_META:
                        try:
                            tensor_meta = pyds.NvDsInferTensorMeta.cast(user_meta.user_meta_data)
                        except StopIteration:
                            break

                        # Extract embedding from tensor output
                        for i in range(tensor_meta.num_output_layers):
                            layer = pyds.get_nvds_LayerInfo(tensor_meta, i)
                            if layer is not None and layer.buffer:
                                layer_name = layer.layerName
                                print("layer_name:",layer_name)
                                # Get the buffer containing the embeddings
                                ptr = ctypes.cast(pyds.get_ptr(layer.buffer), ctypes.POINTER(ctypes.c_float))
                                # Convert the buffer into a NumPy array (embedding vector)
                                embedding = np.ctypeslib.as_array(ptr, shape=(256,))  # Output dimension is 256
                                print("Embedding shape:", embedding.shape)
                                # print("Embedding:", embedding)
                    try:
                        l_user_meta = l_user_meta.next
                    except StopIteration:
                        break

        try:
            l_obj = l_obj.next
        except StopIteration:
            break

    try:
        l_frame = l_frame.next
    except StopIteration:
        break

return Gst.PadProbeReturn.OK

def cb_newpad(decodebin, decoder_src_pad, data):
print(“In cb_newpad\n”)
caps = decoder_src_pad.get_current_caps()
gststruct = caps.get_structure(0)
gstname = gststruct.get_name()
source_bin = data
features = caps.get_features(0)

if (gstname.find("video") != -1):
    if features.contains("memory:NVMM"):
        bin_ghost_pad = source_bin.get_static_pad("src")
        if not bin_ghost_pad.set_target(decoder_src_pad):
            sys.stderr.write("Failed to link decoder src pad to source bin ghost pad\n")
    else:
        sys.stderr.write(" Error: Decodebin did not pick nvidia decoder plugin.\n")

def decodebin_child_added(child_proxy, Object, name, user_data):
print(“Decodebin child added:”, name, “\n”)
if name.find(“decodebin”) != -1:
Object.connect(“child-added”, decodebin_child_added, user_data)

if not is_aarch64() and name.find("nvv4l2decoder") != -1:

    Object.set_property("cudadec-memtype", 2)

if "source" in name:
    source_element = child_proxy.get_by_name("source")
    if source_element.find_property('drop-on-latency') != None:
        Object.set_property("drop-on-latency", True)

def create_source_bin(index, uri):
# print(“Creating source bin”)
bin_name = “source-bin-%02d” % index
# print(bin_name)
nbin = Gst.Bin.new(bin_name)
if not nbin:
sys.stderr.write(" Unable to create source bin \n")

uri_decode_bin = Gst.ElementFactory.make("uridecodebin", "uri-decode-bin") 
if not uri_decode_bin:
    sys.stderr.write(" Unable to create uri decode bin \n")

uri_decode_bin.set_property("uri", uri)

uri_decode_bin.connect("pad-added", cb_newpad, nbin)
uri_decode_bin.connect("child-added", decodebin_child_added, nbin)

Gst.Bin.add(nbin, uri_decode_bin)
bin_pad = nbin.add_pad(Gst.GhostPad.new_no_target("src", Gst.PadDirection.SRC))
if not bin_pad:
    sys.stderr.write(" Failed to add ghost pad in source bin \n")
    return None
return nbin

def main():
# args = [‘rtsp://admin:Sieora123@10.147.20.64:554/Streaming/Channels/101?transportmode=unicast&profile=Profile_1’]
args = [“file:///nvme0n1/mmtc_detection/perception/deepstream-fewshot-learning-app/friends.mp4”]
global perf_data
perf_data = PERF_DATA(len(args))
number_sources = len(args)
Gst.init(None)
print("Creating Pipeline \n ")
pipeline = Gst.Pipeline()
if not pipeline:
sys.stderr.write(“Unable to create Pipeline\n”)

is_live = False
INPUT_WIDTH, INPUT_HEIGHT = 1920, 1080

streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
if not streammux:
    sys.stderr.write("Unable to create NvStreamMux\n")

streammux.set_property('width', INPUT_WIDTH)
streammux.set_property('height', INPUT_HEIGHT)
streammux.set_property('batch-size', number_sources)
streammux.set_property('batched-push-timeout', 400000)
pipeline.add(streammux)

for i in range(number_sources):
    print(f"Creating source_bin {i}\n")
    uri_name = args[i]
    if uri_name.startswith("rtsp://"):
        is_live = True

    source_bin = create_source_bin(i, uri_name)
    if not source_bin:
        sys.stderr.write("Unable to create source bin\n")
    
    pipeline.add(source_bin)
    sink_pad = streammux.get_request_pad(f"sink_{i}")
    src_pad = source_bin.get_static_pad("src")
    src_pad.link(sink_pad)

pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
if not pgie:
    sys.stderr.write("Unable to create pgie\n")

pgie.set_property('config-file-path', "model/mtmc_pgie_config.txt")
pipeline.add(pgie)
streammux.link(pgie)

sgie = Gst.ElementFactory.make("nvinfer", "secondary-inference")
if not sgie:
    sys.stderr.write("Unable to create sgie\n")

sgie.set_property('config-file-path', "model/mtmc_sgie_config.txt")
pipeline.add(sgie)
pgie.link(sgie)

nvvidconv = Gst.ElementFactory.make("nvvideoconvert", "video-converter")
if not nvvidconv:
    sys.stderr.write("Unable to create video converter\n")
pipeline.add(nvvidconv)
sgie.link(nvvidconv)

caps = Gst.ElementFactory.make("capsfilter", "filter")
caps.set_property('caps', Gst.Caps.from_string("video/x-raw(memory:NVMM), format=RGBA"))
pipeline.add(caps)
nvvidconv.link(caps)

tiler = Gst.ElementFactory.make("nvmultistreamtiler", "tiler")
if not tiler:
    sys.stderr.write("Unable to create tiler\n")

tiler_rows = int(math.sqrt(number_sources))
tiler_columns = int(math.ceil((1.0 * number_sources) / tiler_rows))
tiler.set_property("rows", tiler_rows)
tiler.set_property("columns", tiler_columns)
tiler.set_property("width", 1920)
tiler.set_property("height", 1080)
pipeline.add(tiler)
caps.link(tiler)

nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")
if not nvosd:
    sys.stderr.write("Unable to create nvosd\n")

pipeline.add(nvosd)
tiler.link(nvosd)

# Creating and adding the tracker
print("Creating nvtracker \n ")
tracker = Gst.ElementFactory.make("nvtracker", "tracker")
if not tracker:
    sys.stderr.write("Unable to create tracker\n")

# # Read and set tracker properties from config file
# config = configparser.ConfigParser()
# config.read('model/dsnvanalytics_tracker_config.txt')
# for key in config['tracker']:
#     if key == 'tracker-width':
#         tracker_width = config.getint('tracker', key)
#         tracker.set_property('tracker-width', tracker_width)
#     if key == 'tracker-height':
#         tracker_height = config.getint('tracker', key)
#         tracker.set_property('tracker-height', tracker_height)
#     if key == 'gpu-id':
#         tracker_gpu_id = config.getint('tracker', key)
#         tracker.set_property('gpu-id', tracker_gpu_id)
#     if key == 'll-lib-file':
#         tracker_ll_lib_file = config.get('tracker', key)
#         tracker.set_property('ll-lib-file', tracker_ll_lib_file)
#     if key == 'll-config-file':
#         tracker_ll_config_file = config.get('tracker', key)
#         tracker.set_property('ll-config-file', tracker_ll_config_file)
#     if key == 'enable-batch-process':
#         tracker_enable_batch_process = config.getint('tracker', key)
#         tracker.set_property('enable-batch-process', tracker_enable_batch_process)
#     if key == 'enable-past-frame':
#         tracker_enable_past_frame = config.getint('tracker', key)
#         tracker.set_property('enable-past-frame', tracker_enable_past_frame)
config = configparser.ConfigParser()
config.read('model/dsnvanalytics_tracker_config.txt')
for key in config['tracker']:
    if key == 'tracker-width':
        tracker_width = config.getint('tracker', key)
        tracker.set_property('tracker-width', tracker_width)
    if key == 'tracker-height':
        tracker_height = config.getint('tracker', key)
        tracker.set_property('tracker-height', tracker_height)
    if key == 'gpu-id':
        tracker_gpu_id = config.getint('tracker', key)
        tracker.set_property('gpu-id', tracker_gpu_id)
    if key == 'll-lib-file':
        tracker_ll_lib_file = config.get('tracker', key)
        tracker.set_property('ll-lib-file', tracker_ll_lib_file)
    if key == 'll-config-file':
        tracker_ll_config_file = config.get('tracker', key)
        tracker.set_property('ll-config-file', tracker_ll_config_file)
    if key == 'tracking-surface-type':
        tracking_surface_type = config.getint('tracker', key)
        tracker.set_property('tracking-surface-type', tracking_surface_type)
    if key == 'compute-hw':
        compute_hw = config.getint('tracker', key)
        tracker.set_property('compute-hw', compute_hw)
    if key == 'display-tracking-id':
        display_tracking_id = config.getint('tracker', key)
        tracker.set_property('display-tracking-id', display_tracking_id)
    if key == 'tracking-id-reset-mode':
        tracking_id_reset_mode = config.getint('tracker', key)
        tracker.set_property('tracking-id-reset-mode', tracking_id_reset_mode)
    if key == 'input-tensor-meta':
        input_tensor_meta = config.getboolean('tracker', key)
        tracker.set_property('input-tensor-meta', input_tensor_meta)
    if key == 'tensor-meta-gie-id':
        tensor_meta_gie_id = config.getint('tracker', key)
        tracker.set_property('tensor-meta-gie-id', tensor_meta_gie_id)
    if key == 'user-meta-pool-size':
        user_meta_pool_size = config.getint('tracker', key)
        tracker.set_property('user-meta-pool-size', user_meta_pool_size)
    if key == 'sub-batches':
        sub_batches = config.getint('tracker', key)
        tracker.set_property('sub-batches', sub_batches)
    if key=="display-tracking-id":
        tracker.set_property('display-tracking-id', True)

pipeline.add(tracker)
nvosd.link(tracker)  # Ensure the link between nvosd and tracker

sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
if not sink:
    sys.stderr.write("Unable to create sink\n")

sink.set_property("sync", 0)
sink.set_property("qos", 0)
pipeline.add(sink)
tracker.link(sink)  # Ensure the link between tracker and sink

if is_live:
    print("At least one of the sources is live")
    streammux.set_property('live-source', 1)

loop = GLib.MainLoop()
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect("message", bus_call, loop)

tiler_sink_pad = tiler.get_static_pad("sink")
if tiler_sink_pad:
    tiler_sink_pad.add_probe(Gst.PadProbeType.BUFFER, tiler_sink_pad_buffer_probe, 0)

print("Starting pipeline \n")
pipeline.set_state(Gst.State.PLAYING)
try:
    loop.run()
except Exception as e:
    print(f"Exception: {e}")
pipeline.set_state(Gst.State.NULL)

if name == ‘main’:
sys.exit(main())

it could not display tracker id in bounding so how can i resolve it?

#nvidia_inception and #nvidia.ai

Seems you want MTMC. Can you refer the MTMC here: NVIDIA Multi-Camera Tracking AI Workflow?

yes i am following this but could you please suggest us to whether should we implement using python or c for extract embedding and tracker issue resolved python so do not need that

MTMC (NVIDIA Multi-Camera Tracking AI Workflow) already implemented the features. User just need deploy with docker compose or K8s. Why need implement it again?

You already implement it with Python. Are you meet any issue?

                    # Extract embedding from tensor output
                    for i in range(tensor_meta.num_output_layers):
                        layer = pyds.get_nvds_LayerInfo(tensor_meta, i)
                        if layer is not None and layer.buffer:
                            layer_name = layer.layerName
                            print("layer_name:",layer_name)
                            # Get the buffer containing the embeddings
                            ptr = ctypes.cast(pyds.get_ptr(layer.buffer), ctypes.POINTER(ctypes.c_float))
                            # Convert the buffer into a NumPy array (embedding vector)
                            embedding = np.ctypeslib.as_array(ptr, shape=(256,))  # Output dimension is 256
                            print("Embedding shape:", embedding.shape)
                            # print("Embedding:", embedding)

we did not get any error in python but i wanted know about whether extract embedding is right or wrong in the probe function for sgie model

Yes, application can get the tensor output in the probe function.

yes we are able to get embeddings?