Deepstream External python file integration

• Hardware Platform (GPU)
• DeepStream Version
Deepstream 7.0

hi sir
i was trying to integrate yolov9 deepstream to midas for depth estimation

this is the github file which i was refering
i edited the nvdsinfer_yolo.cpp file in the nvdsinfer_yolo folder for passing the data to my midas python file i was able to get the x1 x2 y1 y2 bounding box coordinates i need to pass the video frame also to the python file
i was unable to get that video frame please help me

i am pasting the my edited nvdsinfer_yolo.cpp file

include
include
include “nvdsinfer_custom_impl.h”
include
include

include <python.h>
include

define MIN(a,b) ((a) < (b) ? (a) : (b))
define MAX(a,b) ((a) > (b) ? (a) : (b))
define CLIP(a,min,max) (MAX(MIN(a, max), min))

/* C-linkage to prevent name-mangling */
extern “C”
bool NvDsInferYoloNMS (std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);

extern “C” bool NvDsInferYoloMask(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);

extern “C”
bool NvDsInferYoloNMS (std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList) {
if(outputLayersInfo.size() != 5)
{
std::cerr << “Mismatch in the number of output buffers.”
<< “Expected 4 output buffers, detected in the network :”
<< outputLayersInfo.size() << std::endl;
return false;
}

auto layerFinder = [&outputLayersInfo](const std::string &name)
    -> const NvDsInferLayerInfo *{
    for (auto &layer : outputLayersInfo) {
        if (layer.layerName && name == layer.layerName) {
            return &layer;
        }
    }
    return nullptr;
};

const NvDsInferLayerInfo *num_detsLayer = layerFinder("num_dets");
const NvDsInferLayerInfo *boxesLayer = layerFinder("det_boxes");
const NvDsInferLayerInfo *scoresLayer = layerFinder("det_scores");
const NvDsInferLayerInfo *classesLayer = layerFinder("det_classes");
const NvDsInferLayerInfo *indicesLayer = layerFinder("det_indices");

if (!num_detsLayer || !boxesLayer || !scoresLayer || !classesLayer || !indicesLayer) {
    if (!num_detsLayer) {
        std::cerr << "  - num_detsLayer: Missing or unsupported data type." << std::endl;
    }

    if (!boxesLayer) {
        std::cerr << "  - boxesLayer: Missing or unsupported data type." << std::endl;
    }

    if (!scoresLayer) {
        std::cerr << "  - scoresLayer: Missing or unsupported data type." << std::endl;
    }

    if (!classesLayer) {
        std::cerr << "  - classesLayer: Missing or unsupported data type." << std::endl;
    }

    if (!indicesLayer) {
        std::cerr << "  - indicesLayer: Missing or unsupported data type." << std::endl;
    }
    return false;
}

if(num_detsLayer->inferDims.numDims != 1U) {
    std::cerr << "Network num_dets dims is : " <<
        num_detsLayer->inferDims.numDims << " expect is 1"<< std::endl;
    return false;
}
if(boxesLayer->inferDims.numDims != 2U) {
    std::cerr << "Network det_boxes dims is : " <<
        boxesLayer->inferDims.numDims << " expect is 2"<< std::endl;
    return false;
}
if(scoresLayer->inferDims.numDims != 1U) {
    std::cerr << "Network det_scores dims is : " <<
        scoresLayer->inferDims.numDims << " expect is 1"<< std::endl;
    return false;
}
if(classesLayer->inferDims.numDims != 1U) {
    std::cerr << "Network det_classes dims is : " <<
        classesLayer->inferDims.numDims << " expect is 1"<< std::endl;
    return false;
}
if(indicesLayer->inferDims.numDims != 1U) {
    std::cerr << "Network det_indices dims is : " <<
        indicesLayer->inferDims.numDims << " expect is 1"<< std::endl;
    return false;
}

const char* log_enable = std::getenv("ENABLE_DEBUG");

int* p_keep_count = (int *) num_detsLayer->buffer;
int* p_indices = (int *) indicesLayer->buffer;
float* p_bboxes = (float *) boxesLayer->buffer;

NvDsInferDims inferDims_p_bboxes = boxesLayer->inferDims;
int numElements_p_bboxes=inferDims_p_bboxes.numElements;

float* p_scores = (float *) scoresLayer->buffer;
unsigned int* p_classes = (unsigned int *) classesLayer->buffer;
const float threshold = detectionParams.perClassThreshold[0];

float max_bbox=0;
for (int i=0; i < numElements_p_bboxes; i++)
{
    if ( max_bbox < p_bboxes[i] )
        max_bbox=p_bboxes[i];
}

if (p_keep_count[0] > 0)
{
    assert (!(max_bbox < 2.0));
    for (int i = 0; i < p_keep_count[0]; i++) {

        if ( p_scores[i] < threshold) continue;
        if ((unsigned int) p_classes[i] >= detectionParams.numClassesConfigured) {
            printf("Error: The number of classes configured in the GIE config-file (postprocess > num_detected_classes) is incorrect.\n");
            printf("Detected class index: %u\n", (unsigned int) p_classes[i]);
        }
        assert((unsigned int) p_classes[i] < detectionParams.numClassesConfigured);
    
    //std::cout << "class: " << p_classes[i] << std::endl;
    
    
        NvDsInferObjectDetectionInfo object;
        object.classId = (int) p_classes[i];
        object.detectionConfidence = p_scores[i];

        std::cout << object.detectionConfidence << std::endl;


        object.left=p_bboxes[4*i];
        object.top=p_bboxes[4*i+1];
        object.width=(p_bboxes[4*i+2] - object.left);
        object.height= (p_bboxes[4*i+3] - object.top);
        //std::cout << "x1: " << object.left << ", y1: " << object.top << ", x2: " << p_bboxes[4*i+2] << ", y2: " << p_bboxes[4*i+3] << std::endl;
        
        Py_Initialize()
        Pyobject *name, *load_module, *func, *callfunc, *args;
        name = PyUnicode_fromString((char*)"midas");
        load_module = PyImport_Import(name);
        
        func = PyObject_GetAttrString(load_module,(char*)"midas_1")
        args = PyTuple_Pack(5, PyFloat_FromDouble(object.left),PyFloat_FromDouble(object.top),PyFloat_FromDouble(p_bboxes[4*i+2]),PyFloat_FromDouble(p_bboxes[4*i+])) 
        callfunc = PyObject_CallObject(func,args);
        double midas_out = PyFloat_AsDouble(callfunc)
        
        Py_Finalize();
        std::cout<<midas_out<<std::endl
        
        
        
        if(log_enable != NULL && std::stoi(log_enable)) {
            std::cout << "idx/label/conf/ x/y w/h -- "
            << p_indices[i] << " "
            << p_classes[i] << " "
            << p_scores[i] << " "
            << object.left << " " << object.top << " " << object.width << " "<< object.height << " "
            << std::endl;
        }

        object.left=CLIP(object.left, 0, networkInfo.width - 1);
        object.top=CLIP(object.top, 0, networkInfo.height - 1);
        object.width=CLIP(object.width, 0, networkInfo.width - 1);
        object.height=CLIP(object.height, 0, networkInfo.height - 1);

        objectList.push_back(object);
    }
}
return true;

}

extern “C” bool NvDsInferYoloMask(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
if (outputLayersInfo.size() != 5) {
std::cerr << “Mismatch in the number of output buffers.”
<< “Expected 5 output buffers, detected in the network :”
<< outputLayersInfo.size() << std::endl;
return false;
}

auto layerFinder = [&outputLayersInfo](const std::string &name)
    -> const NvDsInferLayerInfo *{
    for (auto &layer : outputLayersInfo) {
        if (layer.layerName && name == layer.layerName) {
            return &layer;
        }
    }
    return nullptr;
};

const NvDsInferLayerInfo *num_detsLayer = layerFinder("num_dets");
const NvDsInferLayerInfo *boxesLayer = layerFinder("det_boxes");
const NvDsInferLayerInfo *scoresLayer = layerFinder("det_scores");
const NvDsInferLayerInfo *classesLayer = layerFinder("det_classes");
const NvDsInferLayerInfo *masksLayer = layerFinder("det_masks");

if (!num_detsLayer || !boxesLayer || !scoresLayer || !classesLayer || !masksLayer) {
    if (!num_detsLayer) {
        std::cerr << "  - num_detsLayer: Missing or unsupported data type." << std::endl;
    }

    if (!boxesLayer) {
        std::cerr << "  - boxesLayer: Missing or unsupported data type." << std::endl;
    }

    if (!scoresLayer) {
        std::cerr << "  - scoresLayer: Missing or unsupported data type." << std::endl;
    }

    if (!classesLayer) {
        std::cerr << "  - classesLayer: Missing or unsupported data type." << std::endl;
    }

    if (!masksLayer) {
        std::cerr << "  - masksLayer: Missing or unsupported data type." << std::endl;
    }
    return false;
}

if(num_detsLayer->inferDims.numDims != 1U) {
    std::cerr << "Network num_dets dims is : " <<
        num_detsLayer->inferDims.numDims << " expect is 1"<< std::endl;
    return false;
}
if(boxesLayer->inferDims.numDims != 2U) {
    std::cerr << "Network det_boxes dims is : " <<
        boxesLayer->inferDims.numDims << " expect is 2"<< std::endl;
    return false;
}
if(scoresLayer->inferDims.numDims != 1U) {
    std::cerr << "Network det_scores dims is : " <<
        scoresLayer->inferDims.numDims << " expect is 1"<< std::endl;
    return false;
}
if(classesLayer->inferDims.numDims != 1U) {
    std::cerr << "Network det_classes dims is : " <<
        classesLayer->inferDims.numDims << " expect is 1"<< std::endl;
    return false;
}
if(masksLayer->inferDims.numDims != 2U) {
    std::cerr << "Network det_masks dims is : " <<
        masksLayer->inferDims.numDims << " expect is 2"<< std::endl;
    return false;
}

const char* log_enable = std::getenv("ENABLE_DEBUG");


int* p_keep_count = (int *) num_detsLayer->buffer;
float* p_bboxes = (float *) boxesLayer->buffer;
float* p_scores = (float *) scoresLayer->buffer;
unsigned int* p_classes = (unsigned int *) classesLayer->buffer;
float *p_mask = (float *)  masksLayer->buffer;

const float threshold = detectionParams.perClassThreshold[0];

NvDsInferDims inferDims_p_bboxes = boxesLayer->inferDims;
int numElements_p_bboxes=inferDims_p_bboxes.numElements;

const int mask_resolution = sqrt(masksLayer->inferDims.d[1]);

if(log_enable != NULL && std::stoi(log_enable)) {
    std::cout << "keep cout: " << p_keep_count[0] << std::endl;
}

float max_bbox=0;
for (int i=0; i < numElements_p_bboxes; i++){
    if ( max_bbox < p_bboxes[i] )
        max_bbox=p_bboxes[i];
}

if (p_keep_count[0] > 0){
    assert (!(max_bbox < 2.0));

   for (int i = 0; i < p_keep_count[0]; i++) {
       
        if ( p_scores[i] < threshold) continue;

        if ((unsigned int) p_classes[i] >= detectionParams.numClassesConfigured) {
            printf("Error: The number of classes configured in the GIE config-file (postprocess > num_detected_classes) is incorrect.\n");
            printf("Detected class index: %u\n", (unsigned int) p_classes[i]);
            continue;
        }
        //assert((unsigned int) p_classes[i] < detectionParams.numClassesConfigured);
        
        NvDsInferInstanceMaskInfo object;
        object.classId = (int) p_classes[i];
        object.detectionConfidence = p_scores[i];

        object.left=p_bboxes[4*i];
        object.top=p_bboxes[4*i+1];
        object.width=(p_bboxes[4*i+2] - object.left);
        object.height= (p_bboxes[4*i+3] - object.top);

        if (log_enable != NULL && std::stoi(log_enable)) {
            std::cout << "label/conf/ x/y w/h -- "
            << p_classes[i] << " "
            << p_scores[i] << " "
            << object.left << " " << object.top << " " << object.width << " "<< object.height << " "
            << std::endl;
        }

        object.left=CLIP(object.left, 0, networkInfo.width - 1);
        object.top=CLIP(object.top, 0, networkInfo.height - 1);
        object.width=CLIP(object.width, 0, networkInfo.width - 1);
        object.height=CLIP(object.height, 0, networkInfo.height - 1);


        object.mask_size = sizeof(float) * mask_resolution * mask_resolution;
        object.mask = new float[mask_resolution * mask_resolution];
        object.mask_width = mask_resolution;
        object.mask_height = mask_resolution;

        const float* rawMask = reinterpret_cast<const float*>(p_mask + i * mask_resolution * mask_resolution);

        //float *rawMask = reinterpret_cast<float *>(p_mask + mask_resolution * mask_resolution * i);
        
        memcpy(object.mask, rawMask, sizeof(float) * mask_resolution * mask_resolution);

        objectList.push_back(object);
   }
}
return true;

}

CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferYoloNMS);
CHECK_CUSTOM_INSTANCE_MASK_PARSE_FUNC_PROTOTYPE(NvDsInferYoloMask);

here how i can pass the video frame from the NvdsInferyoloNMS function to the python file midas

Sorry, I don’t understand your intention.

You have rewritten the pipeline using yolov9 in python, and then you want to get the bbox and video frame, right?

You don’t have to modify any code of nvdsinfer_yolo, just add the probe function to the src pad of pgie.

This sample is similar to your requirement.

where should i add the prob function, in the same file or any other file
i actually wanted to get the bounding boxes annotations and x1 y1 x2 y2 and video frame pass to midas.py file for detecting the depth value and pass back to the video frame display its depth in the top of bounding box

and in this github no other files are available but when i type deepstream-app -c deepstream_yolov9_det.txt working video output also coming

  1. First use python to build your own pipeline.

  2. Add probe function to gie’s src pad, refer to the above example.

I guess midas.py is your custom module. If you want to access bbox/video frame in this module, you also need the above process.

1 Like

can you please help me to build the my own pipeline or provide any links to do that

and i have a doubt that when iam running deepstream -c deepstream_yolov9_det.txt
its automatically taking the executable files in the bin file of deepstream 7.0 why?
when if it had a executable file in its directory its directly taking the in file
and how i can edit that c file of the executable file in the bin folder

1 Like

You need to build the pipeline by yourself.

deepstream-app is open source, the code is in /opt/nvidia/deepstream/deepstream/sources.

I suggest you read the deepstream related documents first

https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_ref_app_deepstream.html

1 Like

Thank you so much sir
sir is it possible to build my own pipeline in python is there any reference for that

and sir midas is a depth estimation model will i able to integrate midas model to this pipeline

1.Save as deepstream_test_yolov9.py

#!/usr/bin/env python3

################################################################################
# SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import sys
import threading
sys.path.append('../')
import os
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GLib, Gst
from common.platform_info import PlatformInfo
from common.bus_call import bus_call

import pyds
import numpy as np
import cv2
from os import path

PGIE_CLASS_ID_PERSON = 1
PGIE_CLASS_ID_BICYCLE = 2
PGIE_CLASS_ID_CAR = 3
MUXER_BATCH_TIMEOUT_USEC = 33000

def osd_src_pad_buffer_probe(pad,info,u_data):
    frame_number=0
    num_rects=0

    gst_buffer = info.get_buffer()
    if not gst_buffer:
        print("Unable to get GstBuffer ")
        return

    # Retrieve batch metadata from the gst_buffer
    # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
    # C address of gst_buffer as input, which is obtained with hash(gst_buffer)
    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
    l_frame = batch_meta.frame_meta_list
    while l_frame is not None:
        try:
            # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
            # The casting is done by pyds.NvDsFrameMeta.cast()
            # The casting also keeps ownership of the underlying memory
            # in the C code, so the Python garbage collector will leave
            # it alone.
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break

        #Intiallizing object counter with 0.
        obj_counter = {
            PGIE_CLASS_ID_PERSON:0,
            PGIE_CLASS_ID_BICYCLE:0,
            PGIE_CLASS_ID_CAR:0,
        }
        frame_number=frame_meta.frame_num
        num_rects = frame_meta.num_obj_meta
        l_obj=frame_meta.obj_meta_list
        save_image = False
        while l_obj is not None:
            try:
                # Casting l_obj.data to pyds.NvDsObjectMeta
                obj_meta=pyds.NvDsObjectMeta.cast(l_obj.data)
            except StopIteration:
                break
            if obj_meta.class_id in obj_counter:
                obj_counter[obj_meta.class_id] += 1
            left = obj_meta.detector_bbox_info.org_bbox_coords.left
            top = obj_meta.detector_bbox_info.org_bbox_coords.top
            width = obj_meta.detector_bbox_info.org_bbox_coords.width
            height = obj_meta.detector_bbox_info.org_bbox_coords.height
            print(f"bbox left: {left}, top: {top}, width: {width}, height: {height}")

            if frame_number % 300 == 0:
                # Getting Image data using nvbufsurface
                # the input should be address of buffer and batch_id
                n_frame = pyds.get_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id)
                frame_copy = np.array(n_frame, copy=True, order='C')
                # convert the array into cv2 default color format
                frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_RGBA2BGRA)
                if platform_info.is_integrated_gpu():
                    # If Jetson, since the buffer is mapped to CPU for retrieval, it must also be unmapped 
                    pyds.unmap_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id) # The unmap call should be made after operations with the original array are complete.
                                                                                            #  The original array cannot be accessed after this call.
                save_image = True

            obj_meta.rect_params.border_color.set(0.0, 0.0, 1.0, 0.8) #0.8 is alpha (opacity)
            try: 
                l_obj=l_obj.next
            except StopIteration:
                break

        # Acquiring a display meta object. The memory ownership remains in
        # the C code so downstream plugins can still access it. Otherwise
        # the garbage collector will claim it when this probe function exits.
        display_meta=pyds.nvds_acquire_display_meta_from_pool(batch_meta)
        display_meta.num_labels = 1
        py_nvosd_text_params = display_meta.text_params[0]
        # Setting display text to be shown on screen
        # Note that the pyds module allocates a buffer for the string, and the
        # memory will not be claimed by the garbage collector.
        # Reading the display_text field here will return the C address of the
        # allocated string. Use pyds.get_string() to get the string content.
        py_nvosd_text_params.display_text = "Frame Number={} Number of Objects={} Vehicle_count={} Person_count={}".format(frame_number, num_rects, obj_counter[PGIE_CLASS_ID_CAR], obj_counter[PGIE_CLASS_ID_PERSON])

        # Now set the offsets where the string should appear
        py_nvosd_text_params.x_offset = 10
        py_nvosd_text_params.y_offset = 12

        # Font , font-color and font-size
        py_nvosd_text_params.font_params.font_name = "Serif"
        py_nvosd_text_params.font_params.font_size = 10
        # set(red, green, blue, alpha); set to White
        py_nvosd_text_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)

        # Text background color
        py_nvosd_text_params.set_bg_clr = 1
        # set(red, green, blue, alpha); set to Black
        py_nvosd_text_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)
        # Using pyds.get_string() to get display_text as string
        print(pyds.get_string(py_nvosd_text_params.display_text))
        pyds.nvds_add_display_meta_to_frame(frame_meta, display_meta)
        
        if save_image:
            img_path = f"{folder_name}/frame_{frame_number}.jpg"
            print(f"Saving frame {frame_number} to {img_path}")
            cv2.imwrite(img_path, frame_copy)
        try:
            l_frame=l_frame.next
        except StopIteration:
            break
    return Gst.PadProbeReturn.OK

def main(args):
    # Check input arguments
    if len(args) < 2:
        sys.stderr.write("usage: %s <media file or uri> <output folder>\n" % args[0])
        sys.exit(1)

    global folder_name
    folder_name = args[-1]
    if path.exists(folder_name):
        sys.stderr.write("The output folder %s already exists. Please remove it first.\n" % folder_name)
        sys.exit(1)

    os.mkdir(folder_name)
    print("Frames will be saved in ", folder_name)

    global platform_info
    platform_info = PlatformInfo()
    # Standard GStreamer initialization
    Gst.init(None)

    # Create gstreamer elements
    # Create Pipeline element that will form a connection of other elements
    print("Creating Pipeline \n ")
    pipeline = Gst.Pipeline()

    if not pipeline:
        sys.stderr.write(" Unable to create Pipeline \n")

    # Source element for reading from the file
    print("Creating Source \n ")
    source = Gst.ElementFactory.make("filesrc", "file-source")
    if not source:
        sys.stderr.write(" Unable to create Source \n")

    # Since the data format in the input file is elementary h264 stream,
    # we need a h264parser
    print("Creating H264Parser \n")
    h264parser = Gst.ElementFactory.make("h264parse", "h264-parser")
    if not h264parser:
        sys.stderr.write(" Unable to create h264 parser \n")

    # Use nvdec_h264 for hardware accelerated decode on GPU
    print("Creating Decoder \n")
    decoder = Gst.ElementFactory.make("nvv4l2decoder", "nvv4l2-decoder")
    if not decoder:
        sys.stderr.write(" Unable to create Nvv4l2 Decoder \n")

    # Create nvstreammux instance to form batches from one or more sources.
    streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
    if not streammux:
        sys.stderr.write(" Unable to create NvStreamMux \n")

    # Use nvinfer to run inferencing on decoder's output,
    # behaviour of inferencing is set through config file
    pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
    if not pgie:
        sys.stderr.write(" Unable to create pgie \n")

    # Use convertor to convert from NV12 to RGBA as required by nvosd
    nvvidconv = Gst.ElementFactory.make("nvvideoconvert", "convertor")
    if not nvvidconv:
        sys.stderr.write(" Unable to create nvvidconv \n")

    print("Creating filter1 \n ")
    caps1 = Gst.Caps.from_string("video/x-raw(memory:NVMM), format=RGBA")
    filter1 = Gst.ElementFactory.make("capsfilter", "filter1")
    if not filter1:
        sys.stderr.write(" Unable to get the caps filter1 \n")
    filter1.set_property("caps", caps1)

    # Create OSD to draw on the converted RGBA buffer
    nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")
    if not nvosd:
        sys.stderr.write(" Unable to create nvosd \n")

    # Finally render the osd output
    if platform_info.is_integrated_gpu():
        print("Creating nv3dsink \n")
        sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
        if not sink:
            sys.stderr.write(" Unable to create nv3dsink \n")
    else:
        if platform_info.is_platform_aarch64():
            print("Creating nv3dsink \n")
            sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
        else:
            print("Creating EGLSink \n")
            sink = Gst.ElementFactory.make("fakesink", "nvvideo-renderer")
            sink.set_property('sync', True)
        if not sink:
            sys.stderr.write(" Unable to create egl sink \n")

    print("Playing file %s " %args[1])
    source.set_property('location', args[1])
    if os.environ.get('USE_NEW_NVSTREAMMUX') != 'yes': # Only set these properties if not using new gst-nvstreammux
        streammux.set_property('width', 1920)
        streammux.set_property('height', 1080)
        streammux.set_property('batched-push-timeout', MUXER_BATCH_TIMEOUT_USEC)
    
    streammux.set_property('batch-size', 1)
    pgie.set_property('config-file-path', "config_pgie_yolov9_det.txt")

    if not platform_info.is_integrated_gpu():
        # Use CUDA unified memory in the pipeline so frames
        # can be easily accessed on CPU in Python.
        mem_type = int(pyds.NVBUF_MEM_CUDA_UNIFIED)
        streammux.set_property("nvbuf-memory-type", mem_type)
        nvvidconv.set_property("nvbuf-memory-type", mem_type)

    print("Adding elements to Pipeline \n")
    pipeline.add(source)
    pipeline.add(h264parser)
    pipeline.add(decoder)
    pipeline.add(streammux)
    pipeline.add(pgie)
    pipeline.add(nvvidconv)
    pipeline.add(filter1)
    pipeline.add(nvosd)
    pipeline.add(sink)

    # we link the elements together
    # file-source -> h264-parser -> nvh264-decoder ->
    # nvinfer -> nvvidconv -> nvosd -> video-renderer
    print("Linking elements in the Pipeline \n")
    source.link(h264parser)
    h264parser.link(decoder)

    sinkpad = streammux.request_pad_simple("sink_0")
    if not sinkpad:
        sys.stderr.write(" Unable to get the sink pad of streammux \n")
    srcpad = decoder.get_static_pad("src")
    if not srcpad:
        sys.stderr.write(" Unable to get source pad of decoder \n")
    srcpad.link(sinkpad)
    streammux.link(pgie)
    pgie.link(nvvidconv)
    nvvidconv.link(filter1)
    filter1.link(nvosd)
    nvosd.link(sink)

    # create an event loop and feed gstreamer bus mesages to it
    loop = GLib.MainLoop()
    bus = pipeline.get_bus()
    bus.add_signal_watch()
    bus.connect ("message", bus_call, loop)

    # Lets add probe to get informed of the meta data generated, we add probe to
    # the sink pad of the osd element, since by that time, the buffer would have
    # had got all the metadata.
    osdsrcpad = nvosd.get_static_pad("src")
    if not osdsrcpad:
        sys.stderr.write(" Unable to get src pad of nvosd \n")

    osdsrcpad.add_probe(Gst.PadProbeType.BUFFER, osd_src_pad_buffer_probe, 0)

    # start play back and listen to events
    print("Starting pipeline \n")
    pipeline.set_state(Gst.State.PLAYING)

    try:
        loop.run()
    except:
        pass
    # cleanup
    pipeline.set_state(Gst.State.NULL)

if __name__ == '__main__':
    sys.exit(main(sys.argv))
  1. Save as config_pgie_yolov9_det.txt
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
model-color-format=0
onnx-file="your path to"/deepstream-yolov9/models/yolov9-c-converted-trt.onnx
model-engine-file="your path to"/deepstream-yolov9/models/yolov9-c-converted-trt.onnx_b1_gpu0_fp16.engine
labelfile-path="your path to"/deepstream-yolov9/labels.txt
batch-size=1
infer-dims=3;640;640
force-implicit-batch-dim=0
# 0: FP32 1: INT8 2: FP16
network-mode=2
num-detected-classes=80
interval=0
gie-unique-id=1
process-mode=1
# 0: Detector 1: Classifier 2: Segmentation 3: Instance Segmentation
network-type=0
cluster-mode=2
maintain-aspect-ratio=1
parse-bbox-func-name=NvDsInferYoloNMS
custom-lib-path="your path to"/deepstream-yolov9/nvdsinfer_yolo/libnvds_infer_yolo.so

[class-attrs-all]
pre-cluster-threshold=0.25
topk=100
  1. Before running, please make sure that DeepStream and DeepStream python bindings are correctly installed, which I have mentioned above.

Run the following command line.

python3 deepstream_test_yolov9.py /opt/nvidia/deepstream/deepstream/samples/streams/sample_720p.h264 out
  1. The complete pipeline is as following.
filesrc --> h264parse --> nvv4l2decoder --> nvstreammux --> nvinfer (with yolov9) --> nvvideoconvert --> nvdsosd --> sink

In fact, deepstream can have multiple GIEs, and you can put your model into the pipeline as a GIE, but you must first understand how deepstream works.

python3 deepstream_object_yolov9.py /opt/nvidia/deepstream/deepstream/samples/streams/sample_720p.h264 output
Frames will be saved in output
Creating Pipeline

Creating Source

Creating H264Parser

Creating Decoder

Creating filter1

Is it Integrated GPU? : 0
Creating EGLSink

Playing file /opt/nvidia/deepstream/deepstream/samples/streams/sample_720p.h264
Adding elements to Pipeline

Linking elements in the Pipeline

Starting pipeline

0:00:05.590349752 35221 0x5ecdf4362940 INFO nvinfer gstnvinfer.cpp:682:gst_nvinfer_logger: NvDsInferContext[UID 1]: Info from NvDsInferContextImpl::deserializeEngineAndBackend() <nvdsinfer_context_impl.cpp:2095> [UID = 1]: deserialized trt engine from :/opt/nvidia/deepstream/deepstream-7.0/sources/deepstream_python_apps/apps/deepstream-object-detection-depth/models/yolov9-c-converted-trt.onnx_b1_gpu0_fp16.engine
WARNING: [TRT]: The getMaxBatchSize() function should not be used with an engine built from a network created with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag. This function will always return 1.
INFO: …/nvdsinfer/nvdsinfer_model_builder.cpp:612 [Implicit Engine Info]: layers num: 6
0 INPUT kFLOAT images 3x640x640
1 OUTPUT kINT32 num_dets 1
2 OUTPUT kFLOAT det_boxes 100x4
3 OUTPUT kFLOAT det_scores 100
4 OUTPUT kINT32 det_classes 100
5 OUTPUT kINT32 det_indices 100

0:00:05.675645796 35221 0x5ecdf4362940 INFO nvinfer gstnvinfer.cpp:682:gst_nvinfer_logger: NvDsInferContext[UID 1]: Info from NvDsInferContextImpl::generateBackendContext() <nvdsinfer_context_impl.cpp:2198> [UID = 1]: Use deserialized engine model: /opt/nvidia/deepstream/deepstream-7.0/sources/deepstream_python_apps/apps/deepstream-object-detection-depth/models/yolov9-c-converted-trt.onnx_b1_gpu0_fp16.engine
0:00:05.678276278 35221 0x5ecdf4362940 INFO nvinfer gstnvinfer_impl.cpp:343:notifyLoadModelStatus: [UID 1]: Load new model:config_pgie_yolov9_det.txt sucessfully
Segmentation fault (core dumped)

sir iam getting error like a segmentation fault (core dumped)

Please make sure that deepstream_test_1.py can run normally.

The above code has been tested by me and can run normally. Please check your environment.

Can you share how this model co-works with yolov9?

We would like to know how to make DeepStream do more

this both are midas model links its in the pytorch hub

in my project i after getting the bounding box coordinates from the pgie thats yolov9 and then i thought to add sgie as midas model from midas model depth value to each object wil be predicted and when final output comes in the window display with bounding box the depth to that object also needed to display
please help me sir

using that depth the distance to that object should be estimated

its not compulsory that i need to do midas model itself but i want depth estimation to be integrated

Hi sir
sir will i be able to integrate the midas model as sgie of this program because iam not getting any output what mistake i have done or is it not possible?

#!/usr/bin/env python3

################################################################################

SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

SPDX-License-Identifier: Apache-2.0

Licensed under the Apache License, Version 2.0 (the “License”);

you may not use this file except in compliance with the License.

You may obtain a copy of the License at

Apache License, Version 2.0

Unless required by applicable law or agreed to in writing, software

distributed under the License is distributed on an “AS IS” BASIS,

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and

limitations under the License.

################################################################################

import sys
import threading
sys.path.append(‘…/’)
import os
import gi
import configparser
gi.require_version(‘Gst’, ‘1.0’)
from gi.repository import GLib, Gst
from common.platform_info import PlatformInfo
from common.bus_call import bus_call

import pyds
import numpy as np
import cv2
from os import path

PGIE_CLASS_ID_PERSON = 1
PGIE_CLASS_ID_BICYCLE = 2
PGIE_CLASS_ID_CAR = 3
MUXER_BATCH_TIMEOUT_USEC = 33000

def osd_src_pad_buffer_probe(pad,info,u_data):
frame_number=0
num_rects=0

gst_buffer = info.get_buffer()
if not gst_buffer:
    print("Unable to get GstBuffer ")
    return

# Retrieve batch metadata from the gst_buffer
# Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
# C address of gst_buffer as input, which is obtained with hash(gst_buffer)
batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
l_frame = batch_meta.frame_meta_list
while l_frame is not None:
    try:
        # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
        # The casting is done by pyds.NvDsFrameMeta.cast()
        # The casting also keeps ownership of the underlying memory
        # in the C code, so the Python garbage collector will leave
        # it alone.
        frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
    except StopIteration:
        break

    #Intiallizing object counter with 0.
    obj_counter = {
        PGIE_CLASS_ID_PERSON:0,
        PGIE_CLASS_ID_BICYCLE:0,
        PGIE_CLASS_ID_CAR:0,
    }
    frame_number=frame_meta.frame_num
    num_rects = frame_meta.num_obj_meta
    l_obj=frame_meta.obj_meta_list
    save_image = False
    while l_obj is not None:
        try:
            # Casting l_obj.data to pyds.NvDsObjectMeta
            obj_meta=pyds.NvDsObjectMeta.cast(l_obj.data)
        except StopIteration:
            break
        if obj_meta.class_id in obj_counter:
            obj_counter[obj_meta.class_id] += 1
        left = obj_meta.detector_bbox_info.org_bbox_coords.left
        top = obj_meta.detector_bbox_info.org_bbox_coords.top
        width = obj_meta.detector_bbox_info.org_bbox_coords.width
        height = obj_meta.detector_bbox_info.org_bbox_coords.height
        print(f"bbox left: {left}, top: {top}, width: {width}, height: {height}")

        if frame_number % 300 == 0:
            # Getting Image data using nvbufsurface
            # the input should be address of buffer and batch_id
            n_frame = pyds.get_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id)
            frame_copy = np.array(n_frame, copy=True, order='C')
            # convert the array into cv2 default color format
            frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_RGBA2BGRA)
            if platform_info.is_integrated_gpu():
                # If Jetson, since the buffer is mapped to CPU for retrieval, it must also be unmapped 
                pyds.unmap_nvds_buf_surface(hash(gst_buffer), frame_meta.batch_id) # The unmap call should be made after operations with the original array are complete.
                                                                                        #  The original array cannot be accessed after this call.
            save_image = True

        obj_meta.rect_params.border_color.set(0.0, 0.0, 1.0, 0.8) #0.8 is alpha (opacity)
        try: 
            l_obj=l_obj.next
        except StopIteration:
            break

    # Acquiring a display meta object. The memory ownership remains in
    # the C code so downstream plugins can still access it. Otherwise
    # the garbage collector will claim it when this probe function exits.
    display_meta=pyds.nvds_acquire_display_meta_from_pool(batch_meta)
    display_meta.num_labels = 1
    py_nvosd_text_params = display_meta.text_params[0]
    # Setting display text to be shown on screen
    # Note that the pyds module allocates a buffer for the string, and the
    # memory will not be claimed by the garbage collector.
    # Reading the display_text field here will return the C address of the
    # allocated string. Use pyds.get_string() to get the string content.
    py_nvosd_text_params.display_text = "Frame Number={} Number of Objects={} Vehicle_count={} Person_count={}".format(frame_number, num_rects, obj_counter[PGIE_CLASS_ID_CAR], obj_counter[PGIE_CLASS_ID_PERSON])

    # Now set the offsets where the string should appear
    py_nvosd_text_params.x_offset = 10
    py_nvosd_text_params.y_offset = 12

    # Font , font-color and font-size
    py_nvosd_text_params.font_params.font_name = "Serif"
    py_nvosd_text_params.font_params.font_size = 10
    # set(red, green, blue, alpha); set to White
    py_nvosd_text_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)

    # Text background color
    py_nvosd_text_params.set_bg_clr = 1
    # set(red, green, blue, alpha); set to Black
    py_nvosd_text_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)
    # Using pyds.get_string() to get display_text as string
    print(pyds.get_string(py_nvosd_text_params.display_text))
    pyds.nvds_add_display_meta_to_frame(frame_meta, display_meta)
    
    if save_image:
        img_path = f"{folder_name}/frame_{frame_number}.jpg"
        print(f"Saving frame {frame_number} to {img_path}")
        cv2.imwrite(img_path, frame_copy)
    try:
        l_frame=l_frame.next
    except StopIteration:
        break
return Gst.PadProbeReturn.OK

def main(args):
# Check input arguments
if len(args) < 2:
sys.stderr.write(“usage: %s \n” % args[0])
sys.exit(1)

global folder_name
folder_name = args[-1]
if path.exists(folder_name):
    sys.stderr.write("The output folder %s already exists. Please remove it first.\n" % folder_name)
    sys.exit(1)

os.mkdir(folder_name)
print("Frames will be saved in ", folder_name)

global platform_info
platform_info = PlatformInfo()
# Standard GStreamer initialization
Gst.init(None)

# Create gstreamer elements
# Create Pipeline element that will form a connection of other elements
print("Creating Pipeline \n ")
pipeline = Gst.Pipeline()

if not pipeline:
    sys.stderr.write(" Unable to create Pipeline \n")

# Source element for reading from the file
print("Creating Source \n ")
source = Gst.ElementFactory.make("filesrc", "file-source")
if not source:
    sys.stderr.write(" Unable to create Source \n")

# Since the data format in the input file is elementary h264 stream,
# we need a h264parser
print("Creating H264Parser \n")
h264parser = Gst.ElementFactory.make("h264parse", "h264-parser")
if not h264parser:
    sys.stderr.write(" Unable to create h264 parser \n")

# Use nvdec_h264 for hardware accelerated decode on GPU
print("Creating Decoder \n")
decoder = Gst.ElementFactory.make("nvv4l2decoder", "nvv4l2-decoder")
if not decoder:
    sys.stderr.write(" Unable to create Nvv4l2 Decoder \n")

# Create nvstreammux instance to form batches from one or more sources.
streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
if not streammux:
    sys.stderr.write(" Unable to create NvStreamMux \n")
    

# Use nvinfer to run inferencing on decoder's output,
# behaviour of inferencing is set through config file
pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
if not pgie:
    sys.stderr.write(" Unable to create pgie \n")
    
tracker = Gst.ElementFactory.make("nvtracker", "tracker")
if not tracker:
    sys.stderr.write(" Unable to create tracker \n")
    
sgie1 = Gst.ElementFactory.make("nvinfer", "secondary1-nvinference-engine")
if not sgie1:
    sys.stderr.write(" Unable to make sgie1 \n")

# Use convertor to convert from NV12 to RGBA as required by nvosd
nvvidconv = Gst.ElementFactory.make("nvvideoconvert", "convertor")
if not nvvidconv:
    sys.stderr.write(" Unable to create nvvidconv \n")

print("Creating filter1 \n ")
caps1 = Gst.Caps.from_string("video/x-raw(memory:NVMM), format=RGBA")
filter1 = Gst.ElementFactory.make("capsfilter", "filter1")
if not filter1:
    sys.stderr.write(" Unable to get the caps filter1 \n")
filter1.set_property("caps", caps1)

# Create OSD to draw on the converted RGBA buffer
nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")
if not nvosd:
    sys.stderr.write(" Unable to create nvosd \n")

# Finally render the osd output
if platform_info.is_integrated_gpu():
    print("Creating nv3dsink \n")
    sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
    if not sink:
        sys.stderr.write(" Unable to create nv3dsink \n")
else:
    if platform_info.is_platform_aarch64():
        print("Creating nv3dsink \n")
        sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
    else:
        print("Creating EGLSink \n")
        sink = Gst.ElementFactory.make("nveglglessink", "nvvideo-renderer")
        sink.set_property('sync', True)
    if not sink:
        sys.stderr.write(" Unable to create egl sink \n")

print("Playing file %s " %args[1])
source.set_property('location', args[1])
if os.environ.get('USE_NEW_NVSTREAMMUX') != 'yes': # Only set these properties if not using new gst-nvstreammux
    streammux.set_property('width', 1920)
    streammux.set_property('height', 1080)
    streammux.set_property('batched-push-timeout', MUXER_BATCH_TIMEOUT_USEC)

streammux.set_property('batch-size', 1)
pgie.set_property('config-file-path', "config_pgie_yolov9_det.txt")
sgie1.set_property('config-file-path', "dstest2_sgie1_config.txt")

 #Set properties of tracker
config = configparser.ConfigParser()
config.read('dstest2_tracker_config.txt')
config.sections()

for key in config['tracker']:
    if key == 'tracker-width' :
        tracker_width = config.getint('tracker', key)
        tracker.set_property('tracker-width', tracker_width)
    if key == 'tracker-height' :
        tracker_height = config.getint('tracker', key)
        tracker.set_property('tracker-height', tracker_height)
    if key == 'gpu-id' :
        tracker_gpu_id = config.getint('tracker', key)
        tracker.set_property('gpu_id', tracker_gpu_id)
    if key == 'll-lib-file' :
        tracker_ll_lib_file = config.get('tracker', key)
        tracker.set_property('ll-lib-file', tracker_ll_lib_file)
    if key == 'll-config-file' :
        tracker_ll_config_file = config.get('tracker', key)
        tracker.set_property('ll-config-file', tracker_ll_config_file)

if not platform_info.is_integrated_gpu():
    # Use CUDA unified memory in the pipeline so frames
    # can be easily accessed on CPU in Python.
    mem_type = int(pyds.NVBUF_MEM_CUDA_UNIFIED)
    streammux.set_property("nvbuf-memory-type", mem_type)
    nvvidconv.set_property("nvbuf-memory-type", mem_type)

print("Adding elements to Pipeline \n")
pipeline.add(source)
pipeline.add(h264parser)
pipeline.add(decoder)
pipeline.add(streammux)
pipeline.add(pgie)
pipeline.add(tracker)
pipeline.add(sgie1)
pipeline.add(nvvidconv)
pipeline.add(filter1)
pipeline.add(nvosd)
pipeline.add(sink)

# we link the elements together
# file-source -> h264-parser -> nvh264-decoder ->
# nvinfer -> nvvidconv -> nvosd -> video-renderer
print("Linking elements in the Pipeline \n")
source.link(h264parser)
h264parser.link(decoder)

sinkpad = streammux.request_pad_simple("sink_0")
if not sinkpad:
    sys.stderr.write(" Unable to get the sink pad of streammux \n")
srcpad = decoder.get_static_pad("src")
if not srcpad:
    sys.stderr.write(" Unable to get source pad of decoder \n")
srcpad.link(sinkpad)
streammux.link(pgie)
pgie.link(tracker)
tracker.link(sgie1)
sgie1.link(nvvidconv)
nvvidconv.link(filter1)
filter1.link(nvosd)
nvosd.link(sink)

# create an event loop and feed gstreamer bus mesages to it
loop = GLib.MainLoop()
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect ("message", bus_call, loop)

# Lets add probe to get informed of the meta data generated, we add probe to
# the sink pad of the osd element, since by that time, the buffer would have
# had got all the metadata.
osdsrcpad = nvosd.get_static_pad("src")
if not osdsrcpad:
    sys.stderr.write(" Unable to get src pad of nvosd \n")

osdsrcpad.add_probe(Gst.PadProbeType.BUFFER, osd_src_pad_buffer_probe, 0)

# start play back and listen to events
print("Starting pipeline \n")
pipeline.set_state(Gst.State.PLAYING)

try:
    loop.run()
except:
    pass
# cleanup
pipeline.set_state(Gst.State.NULL)

if name == ‘main’:
sys.exit(main(sys.argv))

config_pgie_yolov9_det.txt (661 Bytes)
dstest2_sgie1_config.txt (449 Bytes)

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.