Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU): GPU RTX 4060Ti
• DeepStream Version: 7.0
• TensorRT Version: 8.6.1
• NVIDIA GPU Driver Version: 555.58.02
• Issue Type: Question
This is config for face detector
[property]
gpu-id=0
net-scale-factor=0.00392156862745098
onnx-file=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/yolov8n-face.onnx
model-engine-file=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/yolov8n-face.engine
labelfile-path=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/labels.txt
int8-calib-file=…/…/…/…/samples/models/Primary_Detector/cal_trt.bin
batch-size=10
process-mode=1
model-color-format=00=FP32, 1=INT8, 2=FP16 mode
network-mode=1
num-detected-classes=4
interval=0
gie-unique-id=1
cluster-mode=2
network-type = 100
This is my face embedding model
[property]
gpu-id=0
net-scale-factor=0.00392156862745098
onnx-file=/home/aiserver/Desktop/Experiement/DeepStream/models/Face_Embedding/embedding_BCHW.onnx
model-engine-file=/home/aiserver/Desktop/Experiement/DeepStream/models/Face_Embedding/embedding_BCHW.engine
labelfile-path=/home/aiserver/Desktop/Experiement/DeepStream/models/YOLO/labels.txt
int8-calib-file=…/…/…/…/samples/models/Primary_Detector/cal_trt.bin
batch-size=32
model-color-format=00=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=4
interval=0
gie-unique-id=2
cluster-mode=2
network-type = 100
gie-unique-id=2
operate-on-gie-id=1
process-mode=2
output-tensor-meta=1
I need to detect face and use the crop face for embedding. For my custom model, i follow the code from ssd parser custom model of deepstream python example. This is my code for custom bounding box parsing.
def add_obj_meta_to_frame(frame_object, batch_meta, frame_meta, label_names, unique_id):
“”" Inserts an object into the metadata “”"
# this is a good place to insert objects into the metadata.
# Here’s an example of inserting a single object.obj_meta = pyds.nvds_acquire_obj_meta_from_pool(batch_meta) # Set bbox properties. These are in input resolution. rect_params = obj_meta.rect_params rect_params.left = int(frame_object["facial_area"][0]) rect_params.top = int(frame_object["facial_area"][1]) rect_params.width = int(frame_object["facial_area"][2] - frame_object["facial_area"][0]) rect_params.height = int(frame_object["facial_area"][3] - frame_object["facial_area"][1]) # Semi-transparent yellow backgroud rect_params.has_bg_color = 0 rect_params.bg_color.set(1, 1, 0, 0.4) # Red border of width 3 rect_params.border_width = 3 rect_params.border_color.set(1, 0, 0, 1) # Set object info including class, detection confidence, etc. obj_meta.confidence = frame_object["score"] obj_meta.class_id = 0 # There is no tracking ID upon detection. The tracker will # assign an ID. obj_meta.object_id = UNTRACKED_OBJECT_ID lbl_id = 0 if lbl_id >= len(label_names): lbl_id = 0 # Set the object classification label. obj_meta.obj_label = label_names[lbl_id] # Set display text for the object. txt_params = obj_meta.text_params if txt_params.display_text: pyds.free_buffer(txt_params.display_text) txt_params.x_offset = int(rect_params.left) txt_params.y_offset = max(0, int(rect_params.top) - 10) txt_params.display_text = ( label_names[lbl_id] ) # Font , font-color and font-size txt_params.font_params.font_name = "Serif" txt_params.font_params.font_size = 10 # set(red, green, blue, alpha); set to White txt_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0) # Text background color txt_params.set_bg_clr = 1 # set(red, green, blue, alpha); set to Black txt_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0) obj_meta.unique_component_id = unique_id # Inser the object into current frame meta # This object has no parent pyds.nvds_add_obj_meta_to_frame(frame_meta, obj_meta, None)
pgie_src_pad_buffer_probe will extract metadata received on tiler sink pad
and update params for drawing rectangle, object information etc.
def pgie_src_pad_buffer_probe(pad, info, u_data):
gst_buffer = info.get_buffer() if not gst_buffer: print("Unable to get GstBuffer ") return batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer)) l_frame = batch_meta.frame_meta_list label_names = ["Face"] while l_frame is not None: try: frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data) except StopIteration: break l_user = frame_meta.frame_user_meta_list while l_user is not None: try: # Note that l_user.data needs a cast to pyds.NvDsUserMeta # The casting also keeps ownership of the underlying memory # in the C code, so the Python garbage collector will leave # it alone. user_meta = pyds.NvDsUserMeta.cast(l_user.data) except StopIteration: break if ( user_meta.base_meta.meta_type != pyds.NvDsMetaType.NVDSINFER_TENSOR_OUTPUT_META ): continue tensor_meta = pyds.NvDsInferTensorMeta.cast(user_meta.user_meta_data) print(tensor_meta.unique_id) # Boxes in the tensor meta should be in network resolution which is # found in tensor_meta.network_info. Use this info to scale boxes to # the input frame resolution. layers_info = [] for i in range(tensor_meta.num_output_layers): layer = pyds.get_nvds_LayerInfo(tensor_meta, i) layers_info.append(layer) ptr = ctypes.cast(pyds.get_ptr(layers_info[0].buffer), ctypes.POINTER(ctypes.c_float)) dims = pyds.NvDsInferDims.cast(layers_info[0].dims) v = np.ctypeslib.as_array(ptr, shape=(1,dims.d[0],dims.d[1])) frame_object_list = yolo_face.nvds_yolo_face_parse( v ) try: l_user = l_user.next except StopIteration: break for frame_object in frame_object_list: add_obj_meta_to_frame(frame_object, batch_meta, frame_meta, label_names, tensor_meta.unique_id) try: # indicate inference is performed on the frame frame_meta.bInferDone = True l_frame = l_frame.next except StopIteration: break return Gst.PadProbeReturn.OK
When i tried to acess face object embedding from sgie src pad probe with l_user = obj_meta.obj_user_meta_list, it is None. But if i tried to get embedding from l_user = frame_meta.frame_user_meta_list, i can get a embedding vector. So is my model inference on the whole frame or on the object that i detected. And also, can i use the keypoint from face detector model to align face with deepstream before input into embedding moedel