• Hardware Platform (GPU) : GTX1660
• DeepStream Version 7.0
Hi sir
sir iam have a python deepstream pipeline for the object detection using yolov9 and thats working i need to integerate any depth estimation models like midas model as a secondary gie and i need to calculate the depth to each object detected from the yolov9 model how i can do that
iam attaching my deepstream python file
import sys
import threading
sys.path.append(‘…/’)
import os
import gi
import configparser
gi.require_version(‘Gst’, ‘1.0’)
from gi.repository import GLib, Gst
from common.platform_info import PlatformInfo
from common.bus_call import bus_call
import pyds
import numpy as np
import cv2
from os import path
PGIE_CLASS_ID_PERSON = 1
PGIE_CLASS_ID_BICYCLE = 2
PGIE_CLASS_ID_CAR = 3
MUXER_BATCH_TIMEOUT_USEC = 33000
def osd_src_pad_buffer_probe(pad,info,u_data):
frame_number=0
num_rects=0
gst_buffer = info.get_buffer()
if not gst_buffer:
print("Unable to get GstBuffer ")
return
# Retrieve batch metadata from the gst_buffer
# Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
# C address of gst_buffer as input, which is obtained with hash(gst_buffer)
batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
l_frame = batch_meta.frame_meta_list
while l_frame is not None:
try:
# Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
# The casting is done by pyds.NvDsFrameMeta.cast()
# The casting also keeps ownership of the underlying memory
# in the C code, so the Python garbage collector will leave
# it alone.
frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
except StopIteration:
break
#Intiallizing object counter with 0.
obj_counter = {
PGIE_CLASS_ID_PERSON:0,
PGIE_CLASS_ID_BICYCLE:0,
PGIE_CLASS_ID_CAR:0,
}
frame_number=frame_meta.frame_num
num_rects = frame_meta.num_obj_meta
l_obj=frame_meta.obj_meta_list
save_image = False
while l_obj is not None:
try:
# Casting l_obj.data to pyds.NvDsObjectMeta
obj_meta=pyds.NvDsObjectMeta.cast(l_obj.data)
except StopIteration:
break
if obj_meta.class_id in obj_counter:
obj_counter[obj_meta.class_id] += 1
left = obj_meta.detector_bbox_info.org_bbox_coords.left
top = obj_meta.detector_bbox_info.org_bbox_coords.top
width = obj_meta.detector_bbox_info.org_bbox_coords.width
height = obj_meta.detector_bbox_info.org_bbox_coords.height
print(f"bbox left: {left}, top: {top}, width: {width}, height: {height}")
if frame_number % 300 == 0:
# Getting Image data using nvbufsurface
# the input should be address of buffer and batch_id
n_frame = pyds.get_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id)
frame_copy = np.array(n_frame, copy=True, order='C')
# convert the array into cv2 default color format
frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_RGBA2BGRA)
if platform_info.is_integrated_gpu():
# If Jetson, since the buffer is mapped to CPU for retrieval, it must also be unmapped
pyds.unmap_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id) # The unmap call should be made after operations with the original array are complete.
# The original array cannot be accessed after this call.
save_image = True
obj_meta.rect_params.border_color.set(0.0, 0.0, 1.0, 0.8) #0.8 is alpha (opacity)
try:
l_obj=l_obj.next
except StopIteration:
break
# Acquiring a display meta object. The memory ownership remains in
# the C code so downstream plugins can still access it. Otherwise
# the garbage collector will claim it when this probe function exits.
display_meta=pyds.nvds_acquire_display_meta_from_pool(batch_meta)
display_meta.num_labels = 1
py_nvosd_text_params = display_meta.text_params[0]
# Setting display text to be shown on screen
# Note that the pyds module allocates a buffer for the string, and the
# memory will not be claimed by the garbage collector.
# Reading the display_text field here will return the C address of the
# allocated string. Use pyds.get_string() to get the string content.
py_nvosd_text_params.display_text = "Frame Number={} Number of Objects={} Vehicle_count={} Person_count={}".format(frame_number, num_rects, obj_counter[PGIE_CLASS_ID_CAR], obj_counter[PGIE_CLASS_ID_PERSON])
# Now set the offsets where the string should appear
py_nvosd_text_params.x_offset = 10
py_nvosd_text_params.y_offset = 12
# Font , font-color and font-size
py_nvosd_text_params.font_params.font_name = "Serif"
py_nvosd_text_params.font_params.font_size = 10
# set(red, green, blue, alpha); set to White
py_nvosd_text_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)
# Text background color
py_nvosd_text_params.set_bg_clr = 1
# set(red, green, blue, alpha); set to Black
py_nvosd_text_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)
# Using pyds.get_string() to get display_text as string
print(pyds.get_string(py_nvosd_text_params.display_text))
pyds.nvds_add_display_meta_to_frame(frame_meta, display_meta)
if save_image:
img_path = f"{folder_name}/frame_{frame_number}.jpg"
print(f"Saving frame {frame_number} to {img_path}")
cv2.imwrite(img_path, frame_copy)
try:
l_frame=l_frame.next
except StopIteration:
break
return Gst.PadProbeReturn.OK
def main(args):
# Check input arguments
if len(args) < 2:
sys.stderr.write(“usage: %s \n” % args[0])
sys.exit(1)
global folder_name
folder_name = args[-1]
if path.exists(folder_name):
sys.stderr.write("The output folder %s already exists. Please remove it first.\n" % folder_name)
sys.exit(1)
os.mkdir(folder_name)
print("Frames will be saved in ", folder_name)
global platform_info
platform_info = PlatformInfo()
# Standard GStreamer initialization
Gst.init(None)
# Create gstreamer elements
# Create Pipeline element that will form a connection of other elements
print("Creating Pipeline \n ")
pipeline = Gst.Pipeline()
if not pipeline:
sys.stderr.write(" Unable to create Pipeline \n")
# Source element for reading from the file
print("Creating Source \n ")
source = Gst.ElementFactory.make("filesrc", "file-source")
if not source:
sys.stderr.write(" Unable to create Source \n")
# Since the data format in the input file is elementary h264 stream,
# we need a h264parser
print("Creating H264Parser \n")
h264parser = Gst.ElementFactory.make("h264parse", "h264-parser")
if not h264parser:
sys.stderr.write(" Unable to create h264 parser \n")
# Use nvdec_h264 for hardware accelerated decode on GPU
print("Creating Decoder \n")
decoder = Gst.ElementFactory.make("nvv4l2decoder", "nvv4l2-decoder")
if not decoder:
sys.stderr.write(" Unable to create Nvv4l2 Decoder \n")
# Create nvstreammux instance to form batches from one or more sources.
streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
if not streammux:
sys.stderr.write(" Unable to create NvStreamMux \n")
# Use nvinfer to run inferencing on decoder's output,
# behaviour of inferencing is set through config file
pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
if not pgie:
sys.stderr.write(" Unable to create pgie \n")
tracker = Gst.ElementFactory.make("nvtracker", "tracker")
if not tracker:
sys.stderr.write(" Unable to create tracker \n")
sgie1 = Gst.ElementFactory.make("nvinfer", "secondary1-nvinference-engine")
if not sgie1:
sys.stderr.write(" Unable to make sgie1 \n")
# Use convertor to convert from NV12 to RGBA as required by nvosd
nvvidconv = Gst.ElementFactory.make("nvvideoconvert", "convertor")
if not nvvidconv:
sys.stderr.write(" Unable to create nvvidconv \n")
print("Creating filter1 \n ")
caps1 = Gst.Caps.from_string("video/x-raw(memory:NVMM), format=RGBA")
filter1 = Gst.ElementFactory.make("capsfilter", "filter1")
if not filter1:
sys.stderr.write(" Unable to get the caps filter1 \n")
filter1.set_property("caps", caps1)
# Create OSD to draw on the converted RGBA buffer
nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")
if not nvosd:
sys.stderr.write(" Unable to create nvosd \n")
# Finally render the osd output
if platform_info.is_integrated_gpu():
print("Creating nv3dsink \n")
sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
if not sink:
sys.stderr.write(" Unable to create nv3dsink \n")
else:
if platform_info.is_platform_aarch64():
print("Creating nv3dsink \n")
sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
else:
print("Creating EGLSink \n")
sink = Gst.ElementFactory.make("nveglglessink", "nvvideo-renderer")
sink.set_property('sync', True)
if not sink:
sys.stderr.write(" Unable to create egl sink \n")
print("Playing file %s " %args[1])
source.set_property('location', args[1])
if os.environ.get('USE_NEW_NVSTREAMMUX') != 'yes': # Only set these properties if not using new gst-nvstreammux
streammux.set_property('width', 1920)
streammux.set_property('height', 1080)
streammux.set_property('batched-push-timeout', MUXER_BATCH_TIMEOUT_USEC)
streammux.set_property('batch-size', 1)
pgie.set_property('config-file-path', "config_pgie_yolov9_det.txt")
sgie1.set_property('config-file-path', "dstest2_sgie1_config.txt")
#Set properties of tracker
config = configparser.ConfigParser()
config.read('dstest2_tracker_config.txt')
config.sections()
for key in config['tracker']:
if key == 'tracker-width' :
tracker_width = config.getint('tracker', key)
tracker.set_property('tracker-width', tracker_width)
if key == 'tracker-height' :
tracker_height = config.getint('tracker', key)
tracker.set_property('tracker-height', tracker_height)
if key == 'gpu-id' :
tracker_gpu_id = config.getint('tracker', key)
tracker.set_property('gpu_id', tracker_gpu_id)
if key == 'll-lib-file' :
tracker_ll_lib_file = config.get('tracker', key)
tracker.set_property('ll-lib-file', tracker_ll_lib_file)
if key == 'll-config-file' :
tracker_ll_config_file = config.get('tracker', key)
tracker.set_property('ll-config-file', tracker_ll_config_file)
if not platform_info.is_integrated_gpu():
# Use CUDA unified memory in the pipeline so frames
# can be easily accessed on CPU in Python.
mem_type = int(pyds.NVBUF_MEM_CUDA_UNIFIED)
streammux.set_property("nvbuf-memory-type", mem_type)
nvvidconv.set_property("nvbuf-memory-type", mem_type)
print("Adding elements to Pipeline \n")
pipeline.add(source)
pipeline.add(h264parser)
pipeline.add(decoder)
pipeline.add(streammux)
pipeline.add(pgie)
pipeline.add(tracker)
pipeline.add(sgie1)
pipeline.add(nvvidconv)
pipeline.add(filter1)
pipeline.add(nvosd)
pipeline.add(sink)
# we link the elements together
# file-source -> h264-parser -> nvh264-decoder ->
# nvinfer -> nvvidconv -> nvosd -> video-renderer
print("Linking elements in the Pipeline \n")
source.link(h264parser)
h264parser.link(decoder)
sinkpad = streammux.request_pad_simple("sink_0")
if not sinkpad:
sys.stderr.write(" Unable to get the sink pad of streammux \n")
srcpad = decoder.get_static_pad("src")
if not srcpad:
sys.stderr.write(" Unable to get source pad of decoder \n")
srcpad.link(sinkpad)
streammux.link(pgie)
pgie.link(tracker)
tracker.link(sgie1)
sgie1.link(nvvidconv)
nvvidconv.link(filter1)
filter1.link(nvosd)
nvosd.link(sink)
# create an event loop and feed gstreamer bus mesages to it
loop = GLib.MainLoop()
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect ("message", bus_call, loop)
# Lets add probe to get informed of the meta data generated, we add probe to
# the sink pad of the osd element, since by that time, the buffer would have
# had got all the metadata.
osdsrcpad = nvosd.get_static_pad("src")
if not osdsrcpad:
sys.stderr.write(" Unable to get src pad of nvosd \n")
osdsrcpad.add_probe(Gst.PadProbeType.BUFFER, osd_src_pad_buffer_probe, 0)
# start play back and listen to events
print("Starting pipeline \n")
pipeline.set_state(Gst.State.PLAYING)
try:
loop.run()
except:
pass
# cleanup
pipeline.set_state(Gst.State.NULL)
if name == ‘main’:
sys.exit(main(sys.argv))
and the config file of pgie and sgie are
config_pgie_yolov9_det.txt (661 Bytes)
dstest2_sgie1_config.txt (449 Bytes)
in sgie the mentioned midas model can be downloaded from
wget https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-small.onnx
the pgie is working i want to pass main frame to sgie and using the bounding box coordinates from the pgie i want to take the mid point of that bounding box and i want to pass to sgie depth model and want to get the depth value then i want to print that in the top of bounding of pgie output frame
and i like to get the pgie and sgie output as 2 tiled display
please help me sir