[REOPEN] Nvstreamdemux does not copy obj_meta parent structure to src pad

Hardware Platform Both

DeepStream Version 6.3

JetPack Version R35.4.1

TensorRT Version 8.6.1.6-1+cuda12.0

NVIDIA GPU Driver Version 535.154.05

Issue Type Bug

How to reproduce the issue ?

filesrc location=test.mp4 ! qtdemux ! nvv4l2decoder ! nvvideoconvert ! 
mux.sink_1 nvstreammux batch-size=1 name=mux ! 
nvinfer name=primary config-file-path=primary.txt ! 
nvtracker name=tracker tracker-width=1920 tracker-height=1088 ll-config-file=tracker.yaml ll-lib-file=/opt/nvidia/deepstream/deepstream/lib/libnvds_nvmultiobjecttracker.so !
nvinfer name=secondary config-file-path=secondary.txt !
nvstreamdemux name=demux demux.src_1 !
nvvideoconvert ! 
appsink

Requirement details
Frame metadata after nvstreamdemux has had parent/child relationships between NvDsObjectMeta unintentionally stripped away. It should still be possible to access this information after demuxing.

Since

and

and

and

were overzealously closed without a useful resolution, I thought I’d open another ticket to point out the root cause of the bug, and request it to actually be reviewed/fixed.

In DS5, nvstreamdemux preserved the relationships between child and parent NvDsObjectMeta*.

In DS6, nvstreamdemux calls nvds_copy_frame_meta for each frame in a batch, which calls nvds_copy_obj_meta_list, which has an implementation bug.

From disassembling and reverse engineering the closed source libnvds_meta.so, the faulty implementation is (roughly):

void nvds_copy_obj_meta_list( NvDsObjectMetaList* src_obj_meta_list, NvDsFrameMeta* dst_frame_meta )
{
    NvDsBatchMeta* batch_meta;
    NvDsObjectMeta* src_object_meta;
    GHashTable* hash_table;
    NvDsObjectMeta* dst_object_meta;
    NvDsObjectMeta* parent;
    GList* l;

    batch_meta = ( dst_frame_meta->base_meta ).batch_meta;
    hash_table = g_hash_table_new( NULL, NULL );
    for ( l = src_obj_meta_list; l != NULL; l = l->next )
    {
        src_object_meta = ( NvDsObjectMeta* ) l->data;
        dst_object_meta = nvds_acquire_obj_meta_from_pool( batch_meta );
        nvds_copy_obj_meta( src_object_meta, dst_object_meta );
        nvds_add_obj_meta_to_frame( dst_frame_meta, dst_object_meta, NULL );
        g_hash_table_insert( hash_table, src_object_meta, dst_object_meta );
        if ( src_object_meta->parent != NULL )
        {
            parent = ( _NvDsObjectMeta* ) g_hash_table_lookup( hash_table, src_object_meta->parent );
            dst_object_meta->parent = parent;
        }
    }
    g_hash_table_unref( hash_table );
}
0000000000004a30 <nvds_copy_obj_meta_list@@Base>:
    4a30:	f3 0f 1e fa          	endbr64 
    4a34:	41 57                	push   %r15
    4a36:	41 56                	push   %r14
    4a38:	49 89 f6             	mov    %rsi,%r14
    4a3b:	41 55                	push   %r13
    4a3d:	41 54                	push   %r12
    4a3f:	55                   	push   %rbp
    4a40:	48 89 fd             	mov    %rdi,%rbp
    4a43:	31 ff                	xor    %edi,%edi
    4a45:	53                   	push   %rbx
    4a46:	48 83 ec 08          	sub    $0x8,%rsp
    4a4a:	4c 8b 3e             	mov    (%rsi),%r15
    4a4d:	31 f6                	xor    %esi,%esi
    4a4f:	e8 dc ed ff ff       	call   3830 <g_hash_table_new@plt>
    4a54:	49 89 c5             	mov    %rax,%r13
    4a57:	48 85 ed             	test   %rbp,%rbp
    4a5a:	74 58                	je     4ab4 <nvds_copy_obj_meta_list@@Base+0x84>
    4a5c:	0f 1f 40 00          	nopl   0x0(%rax)
    4a60:	4c 8b 65 00          	mov    0x0(%rbp),%r12
    4a64:	4c 89 ff             	mov    %r15,%rdi
    4a67:	e8 54 ee ff ff       	call   38c0 <nvds_acquire_obj_meta_from_pool@plt>
    4a6c:	48 89 c3             	mov    %rax,%rbx
    4a6f:	48 89 c6             	mov    %rax,%rsi
    4a72:	4c 89 e7             	mov    %r12,%rdi
    4a75:	e8 a6 ee ff ff       	call   3920 <nvds_copy_obj_meta@plt>
    4a7a:	31 d2                	xor    %edx,%edx
    4a7c:	48 89 de             	mov    %rbx,%rsi
    4a7f:	4c 89 f7             	mov    %r14,%rdi
    4a82:	e8 79 ed ff ff       	call   3800 <nvds_add_obj_meta_to_frame@plt>
    4a87:	4c 89 e6             	mov    %r12,%rsi
    4a8a:	48 89 da             	mov    %rbx,%rdx
    4a8d:	4c 89 ef             	mov    %r13,%rdi
    4a90:	e8 5b ef ff ff       	call   39f0 <g_hash_table_insert@plt>
    4a95:	49 8b 74 24 28       	mov    0x28(%r12),%rsi
    4a9a:	48 85 f6             	test   %rsi,%rsi
    4a9d:	74 31                	je     4ad0 <nvds_copy_obj_meta_list@@Base+0xa0>
    4a9f:	4c 89 ef             	mov    %r13,%rdi
    4aa2:	e8 99 ec ff ff       	call   3740 <g_hash_table_lookup@plt>
    4aa7:	48 8b 6d 08          	mov    0x8(%rbp),%rbp
    4aab:	48 89 43 28          	mov    %rax,0x28(%rbx)
    4aaf:	48 85 ed             	test   %rbp,%rbp
    4ab2:	75 ac                	jne    4a60 <nvds_copy_obj_meta_list@@Base+0x30>
    4ab4:	48 83 c4 08          	add    $0x8,%rsp
    4ab8:	4c 89 ef             	mov    %r13,%rdi
    4abb:	5b                   	pop    %rbx
    4abc:	5d                   	pop    %rbp
    4abd:	41 5c                	pop    %r12
    4abf:	41 5d                	pop    %r13
    4ac1:	41 5e                	pop    %r14
    4ac3:	41 5f                	pop    %r15
    4ac5:	e9 86 ef ff ff       	jmp    3a50 <g_hash_table_unref@plt>
    4aca:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
    4ad0:	48 8b 6d 08          	mov    0x8(%rbp),%rbp
    4ad4:	48 85 ed             	test   %rbp,%rbp
    4ad7:	75 87                	jne    4a60 <nvds_copy_obj_meta_list@@Base+0x30>
    4ad9:	eb d9                	jmp    4ab4 <nvds_copy_obj_meta_list@@Base+0x84>
    4adb:	0f 1f 44 00 00       	nopl   0x0(%rax,%rax,1)

This implementation can store a NULL parent if the corresponding dst_object_meta hasn’t been added to the hash table yet due to parents being after children in the linked list. To be resilient against this, there need to be 2 passes, one to build the hash table, and a second to attach parents.

This previously wasn’t a problem in DS 5 since primary GIE objects were always added before secondary GIE objects, and so the single-pass hashmap implementation worked.

However in DS 6, the nvds_add_obj_meta_to_frame (which calls nvds_add_meta_to_parent) now calls g_list_prepend if the 3rd parameter is NULL (i.e. no parent) as an optimisation to avoid always traversing the linked list (and also attempting to ensure parents are at the beginning of the linked list).

Somwhat ironically, the nvds_copy_obj_meta_list implementation always calls nvds_add_obj_meta_to_frame with a NULL parent, therefore always prepending (regardless of whether something has a parent) and thefore breaking the original ordering of the list. This happens wherever nvds_batch_meta_copy_funcis called, and can happen in lots of places. This means that repeated metadata copies will break the original list order, and therefore break the single-pass parent hashmap lookups.

For example, the below implementation works correctly (but still reorders the list due to the newly added prepend optimisation):

void nvds_copy_obj_meta_list( NvDsObjectMetaList* src_obj_meta_list, NvDsFrameMeta* dst_frame_meta )
{
    NvDsObjectMeta* src_object_meta;
    NvDsObjectMeta* dst_object_meta;
    GHashTable* hash_table;

    NvDsBatchMeta* batch_meta = ( dst_frame_meta->base_meta ).batch_meta;
    hash_table = g_hash_table_new( NULL, NULL );
    NvDsObjectMetaList* l;
    for ( l = src_obj_meta_list; l != NULL; l = l->next )
    {
        src_object_meta = ( NvDsObjectMeta* ) l->data;
        dst_object_meta = nvds_acquire_obj_meta_from_pool( batch_meta );
        nvds_copy_obj_meta( src_object_meta, dst_object_meta );
        nvds_add_obj_meta_to_frame( dst_frame_meta, dst_object_meta, NULL );
        g_hash_table_insert( hash_table, src_object_meta, dst_object_meta );
    }
    for ( l = src_obj_meta_list; l != NULL; l = l->next )
    {
        src_object_meta = ( NvDsObjectMeta* ) l->data;
        if ( src_object_meta->parent != NULL )
        {
            dst_object_meta = ( NvDsObjectMeta* ) g_hash_table_lookup( hash_table, src_object_meta );
            dst_object_meta->parent = ( NvDsObjectMeta* ) g_hash_table_lookup( hash_table, src_object_meta->parent );
        }
    }
    g_hash_table_unref( hash_table );
}

As a workaround, users can just define the working implementation of nvds_copy_obj_meta_list in their main binary and take advantage of ELF symbol interposition (this forces the libnvds_meta.so PLT relocations to point at the new version instead of the buggy one).

Another alternative is to build a small .so file with just the fixed implementation and use LD_PRELOAD to force the working version of nvds_copy_obj_meta_list to be loaded first

One for the product team - releasing buggy utility libraries as closed source binaries is harmful. Bugs like this can be fixed sooner the more open source components there are.

2 Likes

Please provide complete information as applicable to your setup.

• Hardware Platform (Jetson / GPU)

• DeepStream Version

• JetPack Version (valid for Jetson only)

• TensorRT Version

• NVIDIA GPU Driver Version (valid for GPU only)

• Issue Type( questions, new requirements, bugs)

• How to reproduce the issue ? (This is for bugs. Including which sample app is using, the configuration files content, the command line used and other details for reproducing)

• Requirement details( This is for new requirement. Including the module name-for which plugin or for which sample application, the function description)

Edited

Thanks for the sharing! is the sgie a detection model? can you reproduce this issue on DS6.4? you can use back-to-back-detectors to test. the sgie’s parent pointer is not NULL.

Hi,

Yes, sgie is a detection model, and this issue still exists in DS6.4 (since those functions I described above are unchanged between 6.3 and 6.4).

As I’m sure you read all of the previous forum posts, you will I’m sure have noticed that they were probing after nvstreamdemux, which doesn’t exist in the back-to-back-detectors example, which is why you can’t reproduce it with that sample. If you want, you can add in an nvstreamdemux into that sample and reproduce the error by adding the probe after it (and the nvvideoconvert).

The bug occurs whenever there is more than 1 frame metadata copy - the first copy reverses the order of the object meta list due to the newly added prepend optimisation, and the second copy strips the parent information due to the hashmap not yet containing the parents due to the incorrect list order. Subsequent metadata copies reverse the list order again, but by this point it’s too late as the parent information has already been removed.

To make this even easier to demonstrate, I’ve included a stripped down example of back_to_back_detectors.c below.

If you comment out the line with "nvstreamdemux name=demux demux.src_0 ! " , the bug is hidden.

If you uncomment the correct implementation of nvds_copy_obj_meta_list, the bug is fixed.

Alternatively, you can instead uncomment the non-prepending implementation of nvds_add_meta_to_parent and the bug is also fixed (for a different reason).

#include <glib.h>
#include <gst/gst.h>
#include <stdio.h>
#include "gstnvdsmeta.h"

/*
#####################################################################
     Uncomment the correct implementation below to fix the bug
#####################################################################
*/

// void nvds_copy_obj_meta_list( NvDsObjectMetaList* src_obj_meta_list, NvDsFrameMeta* dst_frame_meta )
//{
//     NvDsObjectMeta* src_object_meta;
//     NvDsObjectMeta* dst_object_meta;
//     GHashTable* hash_table;
//
//     NvDsBatchMeta* batch_meta = ( dst_frame_meta->base_meta ).batch_meta;
//     hash_table = g_hash_table_new( NULL, NULL );
//     NvDsObjectMetaList* l;
//     for ( l = src_obj_meta_list; l != NULL; l = l->next )
//     {
//         src_object_meta = ( NvDsObjectMeta* ) l->data;
//         dst_object_meta = nvds_acquire_obj_meta_from_pool( batch_meta );
//         nvds_copy_obj_meta( src_object_meta, dst_object_meta );
//         nvds_add_obj_meta_to_frame( dst_frame_meta, dst_object_meta, NULL );
//         g_hash_table_insert( hash_table, src_object_meta, dst_object_meta );
//     }
//     for ( l = src_obj_meta_list; l != NULL; l = l->next )
//     {
//         src_object_meta = ( NvDsObjectMeta* ) l->data;
//         if ( src_object_meta->parent != NULL )
//         {
//             dst_object_meta = ( NvDsObjectMeta* ) g_hash_table_lookup( hash_table, src_object_meta );
//             dst_object_meta->parent = ( NvDsObjectMeta* ) g_hash_table_lookup( hash_table, src_object_meta->parent );
//         }
//     }
//     g_hash_table_unref( hash_table );
// }


/*
#####################################################################
      Alternatively, uncomment this non-prepending implementation 
      of nvds_add_meta_to_parent to fix the re-ordering bug
#####################################################################
*/

//GList* nvds_add_meta_to_parent( GList* list, NvDsBatchMeta** batch_meta, int prepend )
//{
//    nvds_acquire_meta_lock( *batch_meta );
//    list = g_list_append( list, batch_meta );
//    nvds_release_meta_lock( *batch_meta );
//    return list;
//}


static GstPadProbeReturn nvvidconv_sink_pad_buffer_probe( GstPad* pad, GstPadProbeInfo* info, gpointer u_data )
{
    GstBuffer* buf = ( GstBuffer* ) info->data;
    NvDsObjectMeta* obj_meta = NULL;
    NvDsMetaList* l_frame = NULL;
    NvDsMetaList* l_obj = NULL;
    NvDsDisplayMeta* display_meta = NULL;

    NvDsBatchMeta* batch_meta = gst_buffer_get_nvds_batch_meta( buf );

    for ( l_frame = batch_meta->frame_meta_list; l_frame != NULL; l_frame = l_frame->next )
    {
        NvDsFrameMeta* frame_meta = ( NvDsFrameMeta* ) ( l_frame->data );
        int offset = 0;
        for ( l_obj = frame_meta->obj_meta_list; l_obj != NULL; l_obj = l_obj->next )
        {
            obj_meta = ( NvDsObjectMeta* ) ( l_obj->data );

            if ( obj_meta->unique_component_id == 2 )
            {
                if ( obj_meta->class_id == 0 )
                {
                    if ( obj_meta->parent == NULL )
                    {
                        g_print( "WTF??? Got a face with a null parent - should be impossible\n" );
                    }
                    else
                    {
                        g_print( "Face found for parent object %p (class_id=%d) \n", obj_meta->parent,
                            obj_meta->parent->class_id );
                    }
                }
            }
        }
    }

    return GST_PAD_PROBE_OK;
}

static gboolean bus_call( GstBus* bus, GstMessage* msg, gpointer data )
{
    GMainLoop* loop = ( GMainLoop* ) data;
    switch ( GST_MESSAGE_TYPE( msg ) )
    {
        case GST_MESSAGE_EOS:
            g_print( "End of stream\n" );
            g_main_loop_quit( loop );
            break;
        case GST_MESSAGE_ERROR: {
            gchar* debug;
            GError* error;
            gst_message_parse_error( msg, &error, &debug );
            g_printerr( "ERROR from element %s: %s\n", GST_OBJECT_NAME( msg->src ), error->message );
            if ( debug )
                g_printerr( "Error details: %s\n", debug );
            g_free( debug );
            g_error_free( error );
            g_main_loop_quit( loop );
            break;
        }
        default:
            break;
    }
    return TRUE;
}

static void usage( const char* bin )
{
    g_printerr( "Usage: %s <h264_elementary_stream>\n", bin );
}

int main( int argc, char* argv[] )
{
    GMainLoop* loop = NULL;
    GstElement *pipeline = NULL, *nvvidconv = NULL;
    GstBus* bus = NULL;
    guint bus_watch_id;
    GstPad* nvvidconv_sink_pad = NULL;
    gboolean is_nvinfer_server = FALSE;

    if ( argc < 2 )
    {
        usage( argv[0] );
        return -1;
    }

    gst_init( &argc, &argv );
    loop = g_main_loop_new( NULL, FALSE );
    const char* pipeline_fmt_str =
        "filesrc location=%s ! "
        "h264parse ! "
        "nvv4l2decoder ! "
        "mux.sink_0  nvstreammux name=mux width=1280 height=720 batch-size=1 batched-push-timeout=40000 ! "
        "nvinfer unique-id=1 process-mode=1 config-file-path=primary_detector_config.txt ! "
        "nvinfer unique-id=2 process-mode=2 config-file-path=secondary_detector_config.txt ! "
        "nvstreamdemux name=demux demux.src_0 ! " // Comment out this line to hide the bug
        "nvvideoconvert name=nvvidconv ! "
        "nvdsosd ! "
        "nveglglessink";
    size_t needed = snprintf( NULL, 0, pipeline_fmt_str, argv[1] ) + 1;
    char* buffer = g_malloc( needed );
    sprintf( buffer, pipeline_fmt_str, argv[1] );

    GError* error = NULL;
    pipeline = gst_parse_launch( buffer, &error );
    if ( error != NULL )
    {
        g_printerr( "Pipeline could not be created. Exiting.\n" );
        printf( "%s", error->message );
        return -1;
    }

    nvvidconv = gst_bin_get_by_name( GST_BIN( pipeline ), "nvvidconv" );

    if ( nvvidconv == NULL )
    {
        g_printerr( "Could not get nvvidconv. Exiting.\n" );
        return -1;
    }

    bus = gst_pipeline_get_bus( GST_PIPELINE( pipeline ) );
    bus_watch_id = gst_bus_add_watch( bus, bus_call, loop );
    gst_object_unref( bus );

    nvvidconv_sink_pad = gst_element_get_static_pad( nvvidconv, "sink" );
    if ( !nvvidconv_sink_pad )
        g_print( "Unable to get sink pad\n" );
    else
        gst_pad_add_probe( nvvidconv_sink_pad, GST_PAD_PROBE_TYPE_BUFFER, nvvidconv_sink_pad_buffer_probe, NULL, NULL );

    g_print( "Now playing: %s\n", argv[1] );
    gst_element_set_state( pipeline, GST_STATE_PLAYING );
    g_main_loop_run( loop );
    gst_element_set_state( pipeline, GST_STATE_NULL );
    gst_object_unref( GST_OBJECT( pipeline ) );
    g_source_remove( bus_watch_id );
    g_main_loop_unref( loop );
    return 0;
}

Also probably this

And

And

And

And

And

And

And

And

And

And

And

And

And

And