Hardware decoder deallocation?

Hello,
I have a use case where I need to run a pipeline over several hundred videos.
My pipeline looks something like:

uridecodebin  -> nvstreammux -> nvinfer -> nstreamdemux -> nvdsosd -> fakesink

Now the problem I am facing is that after about 50-100 video files, the process either reaches a segfault or just freezes.
When I attach a debugger, I see that the decoder (most likely) is allocating a thread every time and does not deallocate it. So the number of threads simply keep on increasing till it finally reaches segfault.

I have tried this one Jetson Xavier (JP 4.6) as well as a PC with RTX 2080Ti.
The behaviour on both devices is quite different.
On Jetson it will work, but for each file the FPS keeps reducing until it finally freezes whereas for dGPU, the FPS remains consistently high but reaches segfault after 50-100 files.

As a workaround, I am currently creating docker containers to run for each file and it seems to work well as docker removes the container before I run the pipeline for the next file. But it is not an ideal case.

• Hardware Platform (Jetson / GPU): GPU
• DeepStream Version: 6.0
• NVIDIA GPU Driver Version (valid for GPU only) 470
• Issue Type( questions, new requirements, bugs) Bugs?

Any help would be appreciated.
Thanks

I don’t understand how your pipeline run for 50-100 videos.

I do not need to run 1 single pipeline for 50-100 videos.
I run 1 pipeline with 1 video, then I close the pipeline, make a new one and run for the next video and so on.

How did you do that? How did you know you have closed the pipeline correctly? How did you know the problem is caused by deepstream plugins?

Hi
Apologies for the late reply.
I ran some tests and noticed that this happens only when there is a jpeg stream or image.

This is a sample code to reproduce the result. It’s a very basic pipeline (uridecodebin -> fakesink)
NOTE: When I run this code without sleep(0.5), there is an immediate segfault. When the sleep is there it works well but the GPU memory allocation keeps increasing and it never goes down until I kill the program.
You can run this code with any JPEG image to reproduce the result.

import sys
from time import sleep
sys.path.append('/opt/nvidia/deepstream/deepstream/lib')

import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst


def bus_call(bus, message, loop):
    t = message.type
    if t == Gst.MessageType.EOS:
        sys.stdout.write("End-of-stream\n")
        loop.quit()
    elif t == Gst.MessageType.WARNING:
        err, debug = message.parse_warning()
        sys.stderr.write("Warning: %s: %s\n" % (err, debug))
    elif t == Gst.MessageType.ERROR:
        err, debug = message.parse_error()
        sys.stderr.write("Error: %s: %s\n" % (err, debug))
        loop.quit()
    return True


def decodebin_child_added(child_proxy, Object, name, user_data):
    if(name.find("decodebin") != -1):
        Object.connect("child-added", decodebin_child_added, user_data)


def cb_newpad(decodebin, pad, queue):

    caps = pad.get_current_caps()
    gststruct = caps.get_structure(0)
    gstname = gststruct.get_name()

    if(gstname.find("video") != -1):
        sinkpad = queue.get_static_pad("sink")
        if pad.link(sinkpad) == Gst.PadLinkReturn.OK:
            print("Decodebin linked to pipeline")
        else:
            sys.stderr.write("Failed to link decodebin to pipeline\n")

def main(args):
    # Check input arguments
    if len(args) != 2:
        sys.stderr.write("usage: %s <media file or uri>\n" % args[0])
        sys.exit(1)

    GObject.threads_init()
    Gst.init(None)

    print("Creating Pipeline \n ")
    pipeline = Gst.Pipeline()

    source = Gst.ElementFactory.make("uridecodebin", 'src')
    source.set_property('uri', 'file://' + os.path.abspath(args[1]))

    sink = Gst.ElementFactory.make("fakesink", "sink")
    source.connect("pad-added", cb_newpad, sink)
    source.connect("child-added", decodebin_child_added, sink)


    pipeline.add(source)
    pipeline.add(sink)


    loop = GObject.MainLoop()
    bus = pipeline.get_bus()
    bus.add_signal_watch()
    bus.connect("message", bus_call, loop)

    print("Starting pipeline \n")
    pipeline.set_state(Gst.State.PLAYING)
    try:
        loop.run()
    except:
        pass
    pipeline.set_state(Gst.State.NULL)


if __name__ == '__main__':
    for i in range(1000):
        main(sys.argv)
        # No Seg fault when I add sleep
        sleep(0.5)

Thanks.

Hello,
I ran some more tests without using decoders. Apart from the result mentioned above, the following code also has the same behaviour.
NOTE: This code has no decoders, but it has pgie, tracker and sgie.
I am not sure if this is related to the same problem above. But the GPU memory utilisation increases by about 10-15mb for every video file.
For convenience, instead of using a file, I am using a test video source and terminating the pipeline after 100 buffers. I repeat this 1000 times and can see the memory increasing over time.

I suspect I am missing something with respect to deallocation of the gst elements. Because when I use the gst-launch-1.0 command for the same pipeline and run it 1000 times, there is no leak.

import sys
from time import sleep

import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst


def bus_call(bus, message, loop):
    t = message.type
    if t == Gst.MessageType.EOS:
        loop.quit()
    elif t == Gst.MessageType.WARNING:
        err, debug = message.parse_warning()
        sys.stderr.write("Warning: %s: %s\n" % (err, debug))
    elif t == Gst.MessageType.ERROR:
        err, debug = message.parse_error()
        sys.stderr.write("Error: %s: %s\n" % (err, debug))
        loop.quit()
    return True


def main():

    pipeline = Gst.Pipeline()

    # videotestsrc ! ...
    source = Gst.ElementFactory.make("videotestsrc", "source")
    source.set_property('num-buffers', 100)

    # ... ! nvvideoconvert ! ...
    nv12_vid_conv = Gst.ElementFactory.make("nvvideoconvert", "video-convert-nv12")
    
    # ... ! video/x-raw(memory:NVMM), format=NV12 ! ...
    nv12caps = Gst.ElementFactory.make('capsfilter', 'nv12-caps')
    nv12caps.set_property('caps', Gst.caps_from_string('video/x-raw(memory:NVMM), format=NV12'))

    # ... ! nvstreammux ! ...
    streammux = Gst.ElementFactory.make("nvstreammux", "streammux")
    streammux.set_property('width', 2160)
    streammux.set_property('height', 2160)
    streammux.set_property('batch-size', 1)
    streammux.set_property('batched-push-timeout', 4000000)


    # ... ! nvinfer ! nvtracker ! nvinfer ! ...
    pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
    pgie.set_property('config-file-path', "models/pgie/pgie.txt")


    tracker = Gst.ElementFactory.make("nvtracker", "tracker")
    tracker.set_property('tracker-width', 512)
    tracker.set_property('tracker-height', 288)
    tracker.set_property('gpu_id', 0)
    tracker.set_property('ll-lib-file', "/opt/nvidia/deepstream/deepstream/lib/libnvds_nvmultiobjecttracker.so")
    tracker.set_property('ll-config-file', "dcf.yml")
    tracker.set_property('enable_batch_process', True)
    tracker.set_property('enable_past_frame', True)
    
    sgie = Gst.ElementFactory.make("nvinfer", "secondary1-nvinference-engine")
    sgie.set_property('config-file-path', "models/sgie/sgie.txt")

    # ... ! nvvideoconvert ! ...
    rgba_vid_conv = Gst.ElementFactory.make("nvvideoconvert", "video-convert-rgba")

    # ... ! video/x-raw(memory:NVMM), format=NV12 ! ...
    rgbacaps = Gst.ElementFactory.make('capsfilter', 'rgba-caps')
    rgbacaps.set_property('caps', Gst.caps_from_string('video/x-raw(memory:NVMM), format=RGBA'))

    # ... ! nvdsosd ! ...
    nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")

    # ... ! fakesink
    sink = Gst.ElementFactory.make("fakesink", "sink")


    pipeline.add(source)
    pipeline.add(nv12_vid_conv)
    pipeline.add(nv12caps)
    pipeline.add(streammux)
    pipeline.add(pgie)
    pipeline.add(tracker)
    pipeline.add(sgie)
    pipeline.add(rgba_vid_conv)
    pipeline.add(rgbacaps)
    pipeline.add(nvosd)
    pipeline.add(sink)

    source.link(nv12_vid_conv)
    nv12_vid_conv.link(nv12caps)

    sinkpad = streammux.get_request_pad("sink_0")
    srcpad = nv12caps.get_static_pad("src")

    srcpad.link(sinkpad)
    streammux.link(pgie)
    pgie.link(tracker)
    tracker.link(sgie)
    sgie.link(rgba_vid_conv)
    rgba_vid_conv.link(rgbacaps)
    rgbacaps.link(nvosd)
    nvosd.link(sink)

    loop = GObject.MainLoop()

    bus = pipeline.get_bus()
    bus.add_signal_watch()
    bus.connect("message", bus_call, loop)

    pipeline.set_state(Gst.State.PLAYING)
    try:
        loop.run()
    except:
        pass

    pipeline.set_state(Gst.State.NULL)

if __name__ == '__main__':
    Gst.init(None)

    for _ in range(1000):
        main()
        sleep(1)

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.