Secondary GIE (ONNX gaze model) never attaches tensor-meta (buffer always NULL)

• Hardware Platform (Jetson)
• DeepStream Version 7.1
• JetPack Version (6.2)
• Issue Type(Question)

Hi everyone,

I’m building a DeepStream Python pipeline based on the multi-input multi-output python sample with an integrated tracker. My goal is to run a ResNet34 gaze-estimation ONNX model model repo as a secondary inference on each face detected by a PeopleNet primary GIE that should output pitch and yaw. The engine builds at runtime, but my pad probe never sees any tensor buffers (always buffer address = 0).

This is the structure of the pipeline

[ uridecodebin ]

[ nvstreammux ] ──► queue1 ──► pgie (PeopleNet, unique-id=1) ──► nvtracker ──► sgie (ResNet34 gaze, unique-id=2) ──► nvstreamdemux ──► [ display bins… ]

SGIE File

[property]
gpu-id=0
batch-size=1
network-type=1
network-mode=0
onnx-file=/opt/nvidia/deepstream/deepstream-7.1/models/gaze-estimation/resnet34_gaze.onnx

process-mode=2
gie-unique-id=2
operate-on-gie-id=1
operate-on-class-ids=2

output-tensor-meta=1

Here is my probe

def sgie_src_pad_buffer_probe(pad, info, u_data):
    #print("[DBG] → ENTER SGIE probe")
    gst_buffer = info.get_buffer()
    if not gst_buffer:
        #print("[DBG] no GstBuffer!") 
        return Gst.PadProbeReturn.OK

    # 1) get batch metadata
    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
    """if not batch_meta:
        print("[DBG] no batch_meta!")  """
    l_frame = batch_meta.frame_meta_list
    """if not l_frame:
        print("[DBG] no frames in this batch!")  # <-- (D)"""


    # 2) for each frame
    while l_frame:
        frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)

        #print(f"[DBG] frame #{frame_meta.frame_num} has {frame_meta.num_obj_meta} objs")

        # 3) walk all object metas (e.g. faces) in this frame
        l_obj = frame_meta.obj_meta_list
        while l_obj:
            obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)

            count = 0
            u = obj_meta.obj_user_meta_list
            while u:
                count += 1
                u = u.next
            #print(f"[DBG]   obj_id={obj_meta.object_id} class={obj_meta.class_id} has {count} user_meta entries")  # <-- (F)


            # only process faces
            if obj_meta.class_id == PGIE_CLASS_ID_FACE:
                # 4) each object has its own user_meta_list
                l_user = obj_meta.obj_user_meta_list
                # index 0 will be pitch, 1 will be yaw
                tensor_idx = 0

                while l_user:
                    user_meta = pyds.NvDsUserMeta.cast(l_user.data)
                    #print(f"[DBG] user_meta_type={user_meta.base_meta.meta_type}")
                    # here i changed to nvdsinfer
                    if user_meta.base_meta.meta_type == pyds.NvDsMetaType.NVDSINFER_TENSOR_OUTPUT_META:
                        infer_meta = pyds.NvDsInferTensorMeta.cast(user_meta.user_meta_data)
                        #print(f"[DBG] num_output_layers={infer_meta.num_output_layers}")

                        #print(range(infer_meta.num_output_layers))

                        for layer_idx in range(infer_meta.num_output_layers):
                            #print(f"[DBG] layer_idx={layer_idx}")
                            layer_info = infer_meta.output_layers_info(layer_idx)

                            #print(layer_info)

                            # pick up dims (older binding uses layer_info.dims)
                            dims_obj = getattr(layer_info, "inferDims", None) or layer_info.dims
                            #print(f"[DBG] dims_obj.numDims={dims_obj.numDims}, dims={tuple(dims_obj.d[i] for i in range(dims_obj.numDims))}")

                            raw_buf = pyds.get_ptr(layer_info.buffer)
                            #print(f"[DBG] layer_info.buffer address = {raw_buf}")

                            if raw_buf == 0:
                                #print("[DBG] buffer was NULL, skipping decode")
                                continue
                            #print(dims_obj.d[0], dims_obj.d[1], dims_obj.d[2], dims_obj.d[3])

                            nd = dims_obj.numDims
                            shape = tuple(int(dims_obj.d[i]) for i in range(nd))

                            # total floats in this tensor
                            nelems = 1
                            for s in shape:
                                nelems *= s

                            # pull out the integer pointer
                            addr = pyds.get_ptr(layer_info.buffer)
                            if addr == 0:
                                # engine didn’t attach this tensor
                                continue

                            # build a ctypes array from that integer address
                            buffer_type = c_float * nelems
                            float_array = buffer_type.from_address(addr)

                            # wrap it in numpy
                            arr = np.ctypeslib.as_array(float_array).reshape(shape)
                            #print(f"[DBG] successfully mapped array, arr.shape={arr.shape}")

                            # now decode your bins → angle
                            probs    = arr[0]                       # 1×90 → [90]
                            idxs     = np.arange(len(probs), dtype=np.float32)
                            angle_degs = float(np.sum(probs * idxs) * _BINWIDTH - _ANGLE_OFFSET)
                            angle_rad  = math.radians(angle_degs)

                            label = "pitch" if layer_idx == 0 else "yaw"
                            print(f"[SGIE] obj_id={obj_meta.object_id} → {label} = {angle_degs:.2f}° / {angle_rad:.3f} rad")


                    l_user = l_user.next

            l_obj = l_obj.next
        l_frame = l_frame.next

    return Gst.PadProbeReturn.OK

SGIE starts but my debug prints show:

  • num_output_layers=2
  • dims=(90,)
  • layer_info.buffer address = 0 for every face object

Here is some of the output

<pyds.NvDsInferLayerInfo object at 0xffff8ddc1130>
[DBG] dims_obj.numDims=1, dims=(90,)
[DBG] layer_info.buffer address = 0
[DBG]   obj_id=6 class=2 has 0 user_meta entries
[DBG]   obj_id=0 class=2 has 0 user_meta entries
[DBG]   obj_id=1 class=2 has 0 user_meta entries

I am running this through the deepstream:7.1-triton-multiarch docker container and ran two sh files that are required for python dev.

I don’t know if I am probing erroneously or if I have to modify the configuration file to include something else.

It looks like PGIE doesn’t detect the object correctly.The face objects count is 0,so the sgie can not output tensor meta. If you add nvdsosd to the pipeline, is the bbox drawn correctly?

I have an nvdsosd component and it detects the faces, which makes it strange as why it is not performing the sgie.

I checked the gaze-estimation code. When a face object is converted to a tensor, it is inconsistent with the default behavior of nvinfer. You need to add nvdspreprocess before sgie. and then add custom pre-processing. Please refer to the implementation of /opt/nvidia/deepstream/deepstream/sources/gst-plugins/gst-nvdspreprocess/nvdspreprocess_lib

Please use get_nvds_LayerInfo to get layer info, Otherwise it is invalid

infer_meta = pyds.NvDsInferTensorMeta.cast(user_meta.user_meta_data)
print(f"[DBG] num_output_layers={infer_meta.num_output_layers}")
for layer_idx in range(infer_meta.num_output_layers):
       layer = pyds.get_nvds_LayerInfo(infer_meta, layer_idx)
       array = layer_tensor_to_ndarray(layer)
        print(f"[DBG] {layer.layerName} layer_info.buffer address = {type(array)}")

This function can convert NvDsInferLayerInfo into ndarray for easy data processing

def layer_tensor_to_ndarray(layer: pyds.NvDsInferLayerInfo) -> np.ndarray:
    import ctypes
    if layer.dataType == pyds.NvDsInferDataType.FLOAT:
        # print(f"int_addr {type(pyds.get_ptr(layer.buffer))}") <class 'int'>
        addr = pyds.get_ptr(layer.buffer)
        if addr == 0:
            print("Buffer address is 0")
            return None
        # print(f"addr {type(addr)}") <class 'ctypes.c_int'>
        data_ptr = ctypes.cast(addr, ctypes.POINTER(ctypes.c_float))
        num_dims = layer.inferDims.numDims
        shape = []
        for i in range(num_dims):
            shape.append(layer.inferDims.d[i])
        # print(f"{shape}")
        layer_array = np.ctypeslib.as_array(data_ptr, shape=shape)
        # print(f"layer_array {type(layer_array)}")
        layer_ny = np.frombuffer(layer_array, dtype=np.float32)
        # print(f"boxes {type(layer_ny)}")
        return layer_ny
    return None

This FAQ has detailed descriptions

After solving the above two problems, since your pipeline uses both input tensor and output tensor, you also need to refer to this topic

It worked perfectly with this config file after altering the gstnvinfer_meta_utils.cpp and probe.

[property]
enable=1
target-unique-ids=2
operate-on-gie-id=1
network-input-order=0
process-on-frame=0
unique-id=6
gpu-id=0
maintain-aspect-ratio=1
symmetric-padding=1
processing-width=448
processing-height=448
scaling-buf-pool-size=6
tensor-buf-pool-size=6
network-input-shape=32;3;448;448
network-color-format=0
tensor-data-type=0
tensor-name=input
scaling-pool-memory-type=0
scaling-pool-compute-hw=1
scaling-filter=1

# point at the NVIDIA-provided preprocess library
custom-lib-path=/opt/nvidia/deepstream/deepstream/lib/gst-plugins/libcustom2d_preprocess.so

# the symbol that builds the raw tensor from your resized ROIs
custom-tensor-preparation-function=CustomTensorPreparation

[user-configs]
# (x/255)/std  →  three channels
pixel-normalization-factor=0.017159;0.017582;0.017647

# subtract mean/std  →  three channels
offsets=2.1179;2.0357;1.8044

[group-0]
src-ids=0
operate-on-class-ids=2
process-on-all-objects=1
custom-input-transformation-function=CustomAsyncTransformation
input-object-min-width=50
input-object-min-height=50
input-object-max-width=1000
input-object-max-height=1000

Thanks :)

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.