Sgie output tensor stored in frame_user_meta_list instead of object's obj_user_meta_list

Please provide complete information as applicable to your setup.

• Hardware Platform (Jetson / GPU): GPU RTX 4060Ti
• DeepStream Version: 7.0
• TensorRT Version: 8.6.1
• NVIDIA GPU Driver Version: 555.58.02
• Issue Type: Question

This is config for face detector

[property]
gpu-id=0
net-scale-factor=0.00392156862745098
onnx-file=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/yolov8n-face.onnx
model-engine-file=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/yolov8n-face.engine
labelfile-path=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/labels.txt
int8-calib-file=…/…/…/…/samples/models/Primary_Detector/cal_trt.bin
batch-size=10
process-mode=1
model-color-format=0

0=FP32, 1=INT8, 2=FP16 mode

network-mode=1
num-detected-classes=4
interval=0
gie-unique-id=1
cluster-mode=2
network-type = 100

This is my face embedding model

[property]
gpu-id=0
net-scale-factor=0.00392156862745098
onnx-file=/home/aiserver/Desktop/Experiement/DeepStream/models/Face_Embedding/embedding_BCHW.onnx
model-engine-file=/home/aiserver/Desktop/Experiement/DeepStream/models/Face_Embedding/embedding_BCHW.engine
labelfile-path=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/labels.txt
int8-calib-file=…/…/…/…/samples/models/Primary_Detector/cal_trt.bin
batch-size=32
model-color-format=0

0=FP32, 1=INT8, 2=FP16 mode

network-mode=2
num-detected-classes=4
interval=0
gie-unique-id=2
cluster-mode=2
network-type = 100
gie-unique-id=2
operate-on-gie-id=1
process-mode=2
output-tensor-meta=1

I need to detect face and use the crop face for embedding. For my custom model, i follow the code from ssd parser custom model of deepstream python example. This is my code for custom bounding box parsing.

def add_obj_meta_to_frame(frame_object, batch_meta, frame_meta, label_names, unique_id):
“”" Inserts an object into the metadata “”"
# this is a good place to insert objects into the metadata.
# Here’s an example of inserting a single object.

obj_meta = pyds.nvds_acquire_obj_meta_from_pool(batch_meta)
# Set bbox properties. These are in input resolution.
rect_params = obj_meta.rect_params

rect_params.left = int(frame_object["facial_area"][0])
rect_params.top = int(frame_object["facial_area"][1])
rect_params.width = int(frame_object["facial_area"][2] -  frame_object["facial_area"][0])
rect_params.height = int(frame_object["facial_area"][3] - frame_object["facial_area"][1])

# Semi-transparent yellow backgroud
rect_params.has_bg_color = 0
rect_params.bg_color.set(1, 1, 0, 0.4)

# Red border of width 3
rect_params.border_width = 3
rect_params.border_color.set(1, 0, 0, 1)

# Set object info including class, detection confidence, etc.
obj_meta.confidence = frame_object["score"]
obj_meta.class_id = 0

# There is no tracking ID upon detection. The tracker will
# assign an ID.
obj_meta.object_id = UNTRACKED_OBJECT_ID

lbl_id = 0
if lbl_id >= len(label_names):
    lbl_id = 0

# Set the object classification label.
obj_meta.obj_label = label_names[lbl_id]

# Set display text for the object.
txt_params = obj_meta.text_params
if txt_params.display_text:
    pyds.free_buffer(txt_params.display_text)

txt_params.x_offset = int(rect_params.left)
txt_params.y_offset = max(0, int(rect_params.top) - 10)
txt_params.display_text = (
    label_names[lbl_id]
)
# Font , font-color and font-size
txt_params.font_params.font_name = "Serif"
txt_params.font_params.font_size = 10
# set(red, green, blue, alpha); set to White
txt_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)

# Text background color
txt_params.set_bg_clr = 1
# set(red, green, blue, alpha); set to Black
txt_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)
obj_meta.unique_component_id = unique_id

# Inser the object into current frame meta
# This object has no parent
pyds.nvds_add_obj_meta_to_frame(frame_meta, obj_meta, None)

pgie_src_pad_buffer_probe will extract metadata received on tiler sink pad

and update params for drawing rectangle, object information etc.

def pgie_src_pad_buffer_probe(pad, info, u_data):

gst_buffer = info.get_buffer()
if not gst_buffer:
    print("Unable to get GstBuffer ")
    return

batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
l_frame = batch_meta.frame_meta_list
label_names = ["Face"]
while l_frame is not None:
    try:
        frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
    except StopIteration:
        break
    l_user = frame_meta.frame_user_meta_list

    while l_user is not None:
        try:
            # Note that l_user.data needs a cast to pyds.NvDsUserMeta
            # The casting also keeps ownership of the underlying memory
            # in the C code, so the Python garbage collector will leave
            # it alone.
            user_meta = pyds.NvDsUserMeta.cast(l_user.data)
        except StopIteration:
            break

        if (
                user_meta.base_meta.meta_type
                != pyds.NvDsMetaType.NVDSINFER_TENSOR_OUTPUT_META
        ):
            continue

        tensor_meta = pyds.NvDsInferTensorMeta.cast(user_meta.user_meta_data)
        print(tensor_meta.unique_id)
        # Boxes in the tensor meta should be in network resolution which is
        # found in tensor_meta.network_info. Use this info to scale boxes to
        # the input frame resolution.
        layers_info = []
        for i in range(tensor_meta.num_output_layers):
            layer = pyds.get_nvds_LayerInfo(tensor_meta, i)
            layers_info.append(layer)
        ptr = ctypes.cast(pyds.get_ptr(layers_info[0].buffer), ctypes.POINTER(ctypes.c_float))
        dims = pyds.NvDsInferDims.cast(layers_info[0].dims)
        v = np.ctypeslib.as_array(ptr, shape=(1,dims.d[0],dims.d[1]))

        frame_object_list = yolo_face.nvds_yolo_face_parse(
            v
        )
        try:
            l_user = l_user.next
        except StopIteration:
            break

        for frame_object in frame_object_list:
            add_obj_meta_to_frame(frame_object, batch_meta, frame_meta, label_names, tensor_meta.unique_id)

    try:
        # indicate inference is performed on the frame
        frame_meta.bInferDone = True
        l_frame = l_frame.next
    except StopIteration:
        break
return Gst.PadProbeReturn.OK

When i tried to acess face object embedding from sgie src pad probe with l_user = obj_meta.obj_user_meta_list, it is None. But if i tried to get embedding from l_user = frame_meta.frame_user_meta_list, i can get a embedding vector. So is my model inference on the whole frame or on the object that i detected. And also, can i use the keypoint from face detector model to align face with deepstream before input into embedding moedel

Oops, i make a mistake of setting sgie processs mode to 1, i solved the problem now but i still want to ask for a good way to align my face image before sgie model.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.