Peoplnet Performace in Deepstream Pipeline

Please provide complete information as applicable to your setup.

• Hardware Platform: Jetson Xavier NX 8GB
• DeepStream Version: 6.2
• JetPack Version: 5.1.1
• TensorRT Version: 8.5.2.2
• Issue Type: Performace

Hi, I’m using the resnet34_peoplenet_int8 in a simple python pipeline with two cameras. I noticed (using jtop) that my GPU is pretty much getting maxed out with only two cameras - what can be causing this - bellow is my pipeline and config…

Thanks in advance :)

Config…

[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
tlt-model-key=tlt_encode
tlt-encoded-model=Models/PeopleNet/resnet34_peoplenet_int8.etlt
labelfile-path=Models/PeopleNet/labels_peoplenet.txt
model-engine-file=Models/PeopleNet/resnet34_peoplenet_int8.etlt_b2_gpu0_int8.engine
int8-calib-file=Models/PeopleNet/resnet34_peoplenet_int8.txt
input-dims=3;544;960;0
uff-input-blob-name=input_1
batch-size=2
process-mode=1
model-color-format=0
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=1
num-detected-classes=3
cluster-mode=1
interval=0
gie-unique-id=1
output-blob-names=output_bbox/BiasAdd;output_cov/Sigmoid

[class-attrs-all]
pre-cluster-threshold=0.4
## Set eps=0.7 and minBoxes for cluster-mode=1(DBSCAN)
eps=0.7
minBoxes=1

# disable bag detection
[class-attrs-1]
pre-cluster-threshold=1.0

# disable face detection
[class-attrs-2]
pre-cluster-threshold=1.0

[class-attrs-all]
pre-cluster-threshold=0.5
topk=20
nms-iou-threshold=0.5

Python code…

import queue
import sys
import math
import gi 
gi.require_version('Gst', '1.0')

from gi.repository import GObject, Gst
from gi.repository import GLib
from nvidia_source_functions import create_source_bin
from ctypes import *

from bindings.build import pyds 

OUTPUT_WIDTH = 1280
OUTPUT_HEIGHT = 720

STREAM_0 = 'rtsp://username:password@192.168.88.207:554/cam/realmonitor?channel=1&subtype=0'
STREAM_1 = 'rtsp://username:password@192.168.88.208:554/cam/realmonitor?channel=1&subtype=0'


def createNewPipeline():

    print("Creating Pipeline \n ")
    pipeline = Gst.Pipeline()

    if not pipeline:
        sys.stderr.write(" Unable to create Pipeline \n")

    return pipeline

def createStreamMux(number_sources):
     
     streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
     streammux.set_property('live-source', 1) 
     streammux.set_property('width', 1280)
     streammux.set_property('height', 720)
     streammux.set_property('batch-size', number_sources)
     streammux.set_property('batched-push-timeout', 4000000)

     return streammux

def addSourceToPipelineAndLinkToSteamMux(pipeline,streammux,STREAM,sinkPadName,index):

    # add first stream
    source_bin = create_source_bin(index, STREAM)
    pipeline.add(source_bin)

    sinkpad = streammux.get_request_pad(sinkPadName)
    
    srcpad=source_bin.get_static_pad("src")
    srcpad.link(sinkpad)

def createNVOSD():

    nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")
    nvosd.set_property('process-mode', 0)
    nvosd.set_property('display-text', 1)
    return nvosd

def createPGIE():

    pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
    pgie.set_property('config-file-path', 'config_infer_primary.txt')

    return pgie

def createTiler():
    tiler = Gst.ElementFactory.make("nvmultistreamtiler", "nvtiler")
    tiler_rows = 1 #int(math.sqrt(number_sources))
    tiler_columns = 2 #int(math.ceil((1.0*number_sources)/tiler_rows))
    tiler.set_property("rows", tiler_rows)
    tiler.set_property("columns", tiler_columns)
    tiler.set_property("width", OUTPUT_WIDTH)
    tiler.set_property("height", OUTPUT_HEIGHT)
    
    return tiler

def main(args):
    number_sources = 2

    GObject.threads_init()
    Gst.init(None)

    pipeline = createNewPipeline()
    streammux = createStreamMux(number_sources)
    pipeline.add(streammux)

    addSourceToPipelineAndLinkToSteamMux(pipeline,streammux,STREAM_0,'sink_0',0)
    addSourceToPipelineAndLinkToSteamMux(pipeline,streammux,STREAM_1,'sink_1',1)

    queue1 = Gst.ElementFactory.make("queue", "queue1")
    queue2 = Gst.ElementFactory.make("queue", "queue2")
    queue3 = Gst.ElementFactory.make("queue", "queue3")
    queue4 = Gst.ElementFactory.make("queue", "queue4")
    queue5 = Gst.ElementFactory.make("queue", "queue5")

    pipeline.add(queue1)
    pipeline.add(queue2)
    pipeline.add(queue3)
    pipeline.add(queue4)
    pipeline.add(queue5)

    pgie = createPGIE()
    tiler = createTiler()
    nvvidconv = Gst.ElementFactory.make("nvvideoconvert", "convertor")
    nvosd = createNVOSD()
    transform = Gst.ElementFactory.make("nvegltransform", "nvegl-transform")
    sink = Gst.ElementFactory.make("nveglglessink", "nvvideo-renderer")

    sink.set_property("qos", 0)

    pipeline.add(pgie)
    pipeline.add(tiler)
    pipeline.add(nvvidconv)
    pipeline.add(nvosd)
    pipeline.add(transform)
    pipeline.add(sink)

    streammux.link(queue1)
    queue1.link(pgie)
    pgie.link(queue2)
    queue2.link(tiler)
    tiler.link(queue3)
    queue3.link(nvvidconv)
    nvvidconv.link(queue4)
    queue4.link(nvosd)
    nvosd.link(queue5)
    queue5.link(transform)
    transform.link(sink)

    loop = GLib.MainLoop()

    pipeline.set_state(Gst.State.PLAYING)

    try:
        loop.run()
    except:
        pass

    # cleanup
    print("Exiting app\n")
    pipeline.set_state(Gst.State.NULL)


if __name__ == '__main__':
    sys.exit(main(sys.argv))

What you need to focus on is whether real-time has any impact, you can add another camera, I think the real-time should not change. I think this should be a problem with the jtop display.
With the same configuration, I run it under windows and can see the GPU occupancy very clearly, but using jtop or nvtop under linux, as long as I am doing inference anyway, I will see that his GPU occupancy is 100%, but it does not affect my detection.

@autodrive2022 -thanks for the insight - I however don’t have the same issue when running Resnet10. Also - from pure observation I can also see that the Jetson is working Hard ( Hot surface warning and also the fan continually spinning ) - which does not happen with resnet10…

Sorry for the late reply, Is this still an DeepStream issue to support? Thanks
as you know, peoplenet is based on ResNet34. here is the reference. it is more complex than Resnet10. after testing I also get a similar result on xavier. here are some suggestions:

  1. please refer to this doc to get high performance.
  2. modify nvpmodel to get high performance. please refer to optimization
  3. modify Interval of nvinfer plugin to reduce inference times. please find interval in nvinfer.
1 Like

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.