Nvmultistreamtiler - Segmentation fault

We’ve built a DeepStream 5 pipeline on DGPU and facing issues when the nvmultistreamtiler is added behind a nvstreamdemux and nvstreammux combination.

Fig. 1 represents the pipeline we want to build. To be as flexible as possible we like to have multiple OSDs and further plugins for each source. Therefore, we plan to use a demuxer and muxer to feed the muxed stream into the nvmultistreamtiler to get a tiled display.

The issue is, putting the nvmultistreamtiler behind the demuxer - muxer combination leads to a segmentation fault.

Fig. 2 shows the simple approach to reproduce the issue by just having 1 source / 1 batch and the demuxer directly connected to the muxer via request pads. As soon as the nvmultistreamtiler is added to the pipeline we’re getting the segmentation fault. Without the tiler everything works fine!

A deeper analysis seems to show that the nvstreamdemux messes around with the meta data.

Nevertheless, below the full backtrace of the core dump:

(gdb) bt full
#0  0x0000000200000000 in  ()
#1  0x00007fe875daf670 in copy_user_meta () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#2  0x00007fe875daf594 in copy_user_meta_list () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#3  0x00007fe875daf4e0 in nvds_copy_frame_user_meta_list () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#4  0x00007fe875daea33 in nvds_copy_frame_meta () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#5  0x00007fe860dd6879 in NvTiler::CacheCanvasCopyFrameMetaWithoutSourceIdDuplication(_GList*) ()
    at /usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_multistreamtiler.so
#6  0x00007fe860dd610e in NvTiler::SyncBufferMeta(_NvDsBatchMeta*) () at /usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_multistreamtiler.so
#7  0x00007fe860dd56f3 in NvTiler::Composite(NvBufSurface*, _NvDsBatchMeta*, NvBufSurface*, NvBufSurface*, unsigned int) ()
    at /usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_multistreamtiler.so
#8  0x00007fe860ddbe21 in gst_nvmultistreamtiler_transform(_GstBaseTransform*, _GstBuffer*, _GstBuffer*) ()
    at /usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_multistreamtiler.so
#9  0x00007fe87431a401 in  () at /usr/lib/x86_64-linux-gnu/libgstbase-1.0.so.0
#10 0x00007fe874319b84 in  () at /usr/lib/x86_64-linux-gnu/libgstbase-1.0.so.0
#11 0x00007fe87679888b in  () at /usr/lib/x86_64-linux-gnu/libgstreamer-1.0.so.0
#12 0x00007fe8767a0bb3 in gst_pad_push () at /usr/lib/x86_64-linux-gnu/libgstreamer-1.0.so.0
#13 0x00007fe87409b837 in gst_nvstreammux_src_push_loop () at /usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_multistream.so
#14 0x00007fe8767cd269 in  () at /usr/lib/x86_64-linux-gnu/libgstreamer-1.0.so.0
#15 0x00007fe87622bb40 in  () at /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#16 0x00007fe87622b175 in  () at /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#17 0x00007fe874bee6db in start_thread (arg=0x7fe7b4d1d700) at pthread_create.c:463
        pd = 0x7fe7b4d1d700
        now = <optimized out>
        unwind_buf = 
              {cancel_jmp_buf = {{jmp_buf = {140633147823872, 7321058878155544848, 140633147822016, 0, 140633268306240, 140633494758416, -7325445774422157040, -7329246383210604272}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = <optimized out>
#18 0x00007fe875537a3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) Quit

To make it easier for the NVIDIA developer team I’ve just added a couple of lines to the deepstream_test1_app.c by inserting the directly connected nvstreamdemux and nvstreammux right in front of nvvidconv. The lines 285 - 297 of the original sample app are replaced.
Issue:
Just 8 video frames path through (X86 DGPU) before the application creates a core dump.

I would really appreciate to get an answer from the DeepStream 5 developer team.

Please find the gdb backtrace full below:

(gdb) bt full
#0  0x0000000600000000 in  ()
#1  0x00007f135e8dd670 in copy_user_meta () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#2  0x00007f135e8dd594 in copy_user_meta_list () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#3  0x00007f135e8dd4e0 in nvds_copy_frame_user_meta_list () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#4  0x00007f135e8dca33 in nvds_copy_frame_meta () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#5  0x00007f135e8dc818 in nvds_copy_frame_meta_list () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#6  0x00007f135e8dc79b in batch_meta_copy () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#7  0x00007f135e8dc36a in nvds_batch_meta_copy_func () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_meta.so
#8  0x00007f135eae28bd in gst_nvds_meta_transform () at /opt/nvidia/deepstream/deepstream-5.0/lib/libnvdsgst_meta.so
#9  0x00007f135f28e807 in gst_buffer_copy_into () at /usr/lib/x86_64-linux-gnu/libgstreamer-1.0.so.0
#10 0x00007f134c1616c3 in gst_nvvideoconvert_transform () at /usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libgstnvvideoconvert.so
#11 0x00007f135d00e401 in  () at /usr/lib/x86_64-linux-gnu/libgstbase-1.0.so.0
#12 0x00007f135d00db84 in  () at /usr/lib/x86_64-linux-gnu/libgstbase-1.0.so.0
#13 0x00007f135f2c688b in  () at /usr/lib/x86_64-linux-gnu/libgstreamer-1.0.so.0
#14 0x00007f135f2cebb3 in gst_pad_push () at /usr/lib/x86_64-linux-gnu/libgstreamer-1.0.so.0
#15 0x00007f135a4f9837 in gst_nvstreammux_src_push_loop () at /usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_multistream.so
#16 0x00007f135f2fb269 in  () at /usr/lib/x86_64-linux-gnu/libgstreamer-1.0.so.0
#17 0x00007f135ed59b40 in  () at /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#18 0x00007f135ed59175 in  () at /usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#19 0x00007f135d9226db in start_thread (arg=0x7f12ad5fa700) at pthread_create.c:463
        pd = 0x7f12ad5fa700
        now = <optimized out>
        unwind_buf = 
              {cancel_jmp_buf = {{jmp_buf = {139718194865920, 1134042439184500908, 139718194864064, 0, 139718440265504, 139718591231184, -1037596855802836820, -1037421436172810068}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = <optimized out>
#20 0x00007f135e609a3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

The following code has been inserted by replacing the lines 285 - 297:

// *************************************************
  // TEST: Add Demuxing + Muxing
  // ./deepstream-test1-app '/opt/nvidia/deepstream/deepstream-5.0/samples/streams/sample_720p.h264'
  GstElement *streamdemuxer_t = NULL, *streammuxer_t = NULL;
  GstPad *srcpad_0=NULL, *sinkpad_0=NULL;
  streamdemuxer_t = gst_element_factory_make ("nvstreamdemux", "stream_demuxer_t");
  streammuxer_t = gst_element_factory_make ("nvstreammux", "stream_muxer_t");
  g_object_set (G_OBJECT (streammuxer_t), "batch-size", 1, "width", MUXER_OUTPUT_WIDTH, "height", MUXER_OUTPUT_HEIGHT, "batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);

  gst_bin_add_many (GST_BIN (pipeline), streamdemuxer_t, streammuxer_t, NULL);

  if (!gst_element_link_many (streammux, pgie, streamdemuxer_t, NULL)) {
    g_printerr ("Elements could not be linked: 2. Exiting.\n");
    return -1;
  }
  
  // Link the Demuxer and Muxer via request pads
  srcpad_0 = gst_element_get_request_pad(streamdemuxer_t, "src_0");
  sinkpad_0 = gst_element_get_request_pad(streammuxer_t, "sink_0");

  if (gst_pad_link(srcpad_0, sinkpad_0) != GST_PAD_LINK_OK) {
        g_printerr("Failed to link: srcpad_0 to sinkpad_0!\n");
	    return -1;
  }
  
  gst_object_unref (srcpad_0);
  gst_object_unref (sinkpad_0);
 
  if (!gst_element_link_many (streammuxer_t, nvvidconv, nvosd, sink, NULL)) {
    g_printerr ("Elements could not be linked: 2. Exiting.\n");
    return -1;
  }
  // ***************************************************

There is a known issue in tiler and fixed on GA, can you try with DS 5.0 GA?

The problem boils down to the combination of nvstreamdemux and nvstreammux as described in the 3rd post. Therefore, the current headline is somewhat misleading.

I repeated the test by using the DeepStream 5 GA version, but the issue persists.

The test has been done by taking the NVIDIA deepstream_test1_app.c and replacing the lines 285 - 297 with the code snippet from post 3.
By doing so, the demuxer is directly connected to the muxer and placed in front of the nvvidconv.
After 8 frames the application crashes with a core dump (pls. see gdb backtrace full in Post 3).

The whole pipeline runs with a batch size of 1 and therefore, the demuxer should just push the data of a single source towards the muxer.

Thanks.

Ok, can it work if removed the tiler?

Would you mind to share me a diff file for the change?

The tiler is not necessary and is not causing the issue (pls. see post 5) Well, it is even not present in your version of deepstream_test1_app.c because we have only one batch of data going through the pipeline.

Please find below the diff between the adapted version (Demuxer + Muxer) and the original NVIDIA version.
Just the combination of the demuxer and muxer is causing the segmentation fault after 8 frames.

I’ve added the tiler in another version - just to be sure - but the issue is still there and most likely caused by the demuxer + muxer combination.

285,287c285,296
< #ifdef PLATFORM_TEGRA
<   if (!gst_element_link_many (streammux, pgie,
<       nvvidconv, nvosd, transform, sink, NULL)) {
---
> // *************************************************
>   // TEST: Add Demuxing + Muxing
>   // ./deepstream-test1-app '/opt/nvidia/deepstream/deepstream-5.0/samples/streams/sample_720p.h264'
>   GstElement *streamdemuxer_t = NULL, *streammuxer_t = NULL;
>   GstPad *srcpad_0=NULL, *sinkpad_0=NULL;
>   streamdemuxer_t = gst_element_factory_make ("nvstreamdemux", "stream_demuxer_t");
>   streammuxer_t = gst_element_factory_make ("nvstreammux", "stream_muxer_t");
>   g_object_set (G_OBJECT (streammuxer_t), "batch-size", 1, "width", MUXER_OUTPUT_WIDTH, "height", MUXER_OUTPUT_HEIGHT, "batched-push-time
out", MUXER_BATCH_TIMEOUT_USEC, NULL);
> 
>   gst_bin_add_many (GST_BIN (pipeline), streamdemuxer_t, streammuxer_t, NULL);
> 
>   if (!gst_element_link_many (streammux, pgie, streamdemuxer_t, NULL)) {
291,293c300,313
< #else
<   if (!gst_element_link_many (streammux, pgie,
<       nvvidconv, nvosd, sink, NULL)) {
---
>   
>   // Link the Demuxer and Muxer via request pads
>   srcpad_0 = gst_element_get_request_pad(streamdemuxer_t, "src_0");
>   sinkpad_0 = gst_element_get_request_pad(streammuxer_t, "sink_0");
> 
>   if (gst_pad_link(srcpad_0, sinkpad_0) != GST_PAD_LINK_OK) {
>         g_printerr("Failed to link: srcpad_0 to sinkpad_0!\n");
> 	    return -1;
>   }
>   
>   gst_object_unref (srcpad_0);
>   gst_object_unref (sinkpad_0);
>  
>   if (!gst_element_link_many (streammuxer_t, nvvidconv, nvosd, sink, NULL)) {
297c317
< #endif
---
>   // ***************************************************

Dear NVIDIA Developer Team,

any news on this?

To go a step further I also modified the NVIDIA deepstream-test3 app (pls. see diff file below).
This is a crude version just working for a fixed batch size of 2!

21a22,24
>  
>  // ./deepstream-test3-app file:///'/opt/nvidia/deepstream/deepstream-5.0/sources/apps/sample_apps/deepstream-test3/test.mp4'  file:///'/opt/nvidia/deepstream/deepstream-5.0/sources/apps/sample_apps/deepstream-test3/test.mp4'
>  
386a390
>   g_object_set (G_OBJECT (sink), "sync", 0, NULL);
448,453c452,486
< gst_bin_add_many (GST_BIN (pipeline), queue1, pgie, queue2, tiler, queue3,
<     nvvidconv, queue4, nvosd, queue5, sink, NULL);
<   /* we link the elements together
<    * nvstreammux -> nvinfer -> nvtiler -> nvvidconv -> nvosd -> video-renderer */
<   if (!gst_element_link_many (streammux, queue1, pgie, queue2, tiler, queue3,
<         nvvidconv, queue4, nvosd, queue5, sink, NULL)) {
---
> 
>   // ****************************************************************
>   // Add Demuxer + Muxer for 2 parallel Streams
>   GstElement *streamdemuxer_t = NULL, *streammuxer_t = NULL, *tiler_t = NULL;
>   GstPad *srcpad_0=NULL, *sinkpad_0=NULL, *srcpad_1=NULL, *sinkpad_1=NULL;
>   streamdemuxer_t = gst_element_factory_make ("nvstreamdemux", "stream_demuxer_t");
>   streammuxer_t = gst_element_factory_make ("nvstreammux", "stream_muxer_t");
>   g_object_set (G_OBJECT (streammuxer_t), "batch-size", 2, "width", MUXER_OUTPUT_WIDTH, "height", MUXER_OUTPUT_HEIGHT, "batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
> 
>   // Link the Demuxer and Muxer via request pads
>   srcpad_0 = gst_element_get_request_pad(streamdemuxer_t, "src_0");
>   sinkpad_0 = gst_element_get_request_pad(streammuxer_t, "sink_0");
>   srcpad_1 = gst_element_get_request_pad(streamdemuxer_t, "src_1");
>   sinkpad_1 = gst_element_get_request_pad(streammuxer_t, "sink_1");
> 
>   if (gst_pad_link(srcpad_0, sinkpad_0) != GST_PAD_LINK_OK) {
>         g_printerr("Failed to link: srcpad_0 to sinkpad_0!\n");
> 	    return -1;
>   }
> 
>   if (gst_pad_link(srcpad_1, sinkpad_1) != GST_PAD_LINK_OK) {
>         g_printerr("Failed to link: srcpad_1 to sinkpad_1!\n");
> 	    return -1;
>   }
>   
>   gst_object_unref (srcpad_0);
>   gst_object_unref (sinkpad_0);
>   gst_object_unref (srcpad_1);
>   gst_object_unref (sinkpad_1);
>   
>   gst_bin_add_many (GST_BIN (pipeline), queue1, pgie, streamdemuxer_t, streammuxer_t, queue2, tiler, queue3, nvvidconv, queue4, nvosd, queue5, sink, NULL);
>  
>   // Direct Pipeline
>   /*
>   if (!gst_element_link_many (streammux, queue1, pgie, tiler, nvvidconv, nvosd, sink, NULL)) {
456a490,505
>   */
>   
>   // Modified Pipeline, including Demuxer + Muxer
>   if (!gst_element_link_many (streammux, pgie, streamdemuxer_t, NULL)) {
>     g_printerr ("Elements could not be linked. Exiting.\n");
>     return -1;
>   }
>   
>   if (!gst_element_link_many (streammuxer_t, tiler, nvvidconv, nvosd, sink, NULL)) {
>     g_printerr ("Elements could not be linked. Exiting.\n");
>     return -1;
>   }
> 
>  
>   // *************************************************************************
>   

Here we have again the problem, that the metadata probe on the sink of the demuxer delivers data whereas behind the muxer (src pad) no metadata can be extracted.
Furthermore, the sink does not get any data.

Dear NVIDIA-Dev Team,

What is the status of your investigation.
I’ve just re-installed TensorRT 7.0.0.11, CUDA 10.2 together with the latest graphics driver 450.66 as well as the latest DeepStream 5.0 GA version, but the problem persists.

As an additional information I can say that the demuxer - without the subsequent muxer - does not crash the pipeline.
That obviously only works with a batch size of 1.

I’ve adapted your DeepStream 5.0 test application deepstream-test1 application accordingly.

Please find the diff file below:

diff deepstream_test1_app.c_original deepstream_test1_app.c
292,293c292,328
<   if (!gst_element_link_many (streammux, pgie,
<       nvvidconv, nvosd, sink, NULL)) {
---
> 
>   // *************************************************
>   // TEST: Add Demuxing + Muxing>   // ./deepstream-test1-app '/opt/nvidia/deepstream/deepstream-5.0/samples/streams/sample_720p.h264'
>   GstElement *streamdemuxer_t = NULL, *streammuxer_t = NULL;
>   GstPad *srcpad_0=NULL, *sinkpad_0=NULL;
>   streamdemuxer_t = gst_element_factory_make ("nvstreamdemux", "stream_demuxer_t");
>   streammuxer_t = gst_element_factory_make ("nvstreammux", "stream_muxer_t");
>   g_object_set (G_OBJECT (streammuxer_t), "batch-size", 1, "width", MUXER_OUTPUT_WIDTH, "height", MUXER_OUTPUT_HEIGHT, "batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
> 
>   gst_bin_add_many (GST_BIN (pipeline), streamdemuxer_t, streammuxer_t, NULL);
>   
>   if (!gst_element_link_many (streammux, pgie, streamdemuxer_t, NULL)) {
>         g_printerr("Failed to link: streammux, pgie, streamdemuxer_t!\n");
> 	    return -1;
>   }
> 
>   // Link the Demuxer and Muxer via request pads
>   srcpad_0 = gst_element_get_request_pad(streamdemuxer_t, "src_0");
>   sinkpad_0 = gst_element_get_request_pad(streammuxer_t, "sink_0");  // Including the Muxer
>   // sinkpad_0 = gst_element_get_static_pad(nvvidconv, "sink");  // Without Muxer
> 
>   if (gst_pad_link(srcpad_0, sinkpad_0) != GST_PAD_LINK_OK) {
>         g_printerr("Failed to link: srcpad_0 to sinkpad_0!\n");
> 	    return -1;
>   }
>   
>   gst_object_unref (srcpad_0);
>   gst_object_unref (sinkpad_0);
>  
>   if (!gst_element_link_many (streammuxer_t, nvvidconv, nvosd, sink, NULL)) { // Including the Muxer
>   //if (!gst_element_link_many (nvvidconv, nvosd, sink, NULL)) {  // Without Muxer
>         g_printerr ("Failed to link: streammuxer_t, nvvidconv, nvosd, sink.\n");
>         return -1;
>   }
> 
>  /*
>   if (!gst_element_link_many (streammux, pgie, nvvidconv, nvosd, sink, NULL)) {
296a332
>   */