• Hardware Platform (GPU)
• DeepStream Version 5.0
• TensorRT Version 7.1.3
• NVIDIA GPU Driver Version 450.119.04
• Issue Type questions
I have a problem with runing app for face detection. I could convert net *.wts → *.onnx → *.engine with repo: help GitHub - nghiapq77/face-recognition-cpp-tensorrt: Face Recognition with RetinaFace and ArcFace.. When I run app than I can’t see display and not get callback info in terminal. Info in terminal:
// main.cpp
#include "gstnvdsmeta.h"
#include <glib.h>
#include <gst/gst.h>
#include <stdio.h>
#include <string>
#include <iostream>
#define COUT_RESET "\033[0m"
#define COUT_BLACK "\033[30m" /* Black */
#define COUT_RED "\033[31m" /* Red */
#define COUT_GREEN "\033[32m" /* Green */
#define COUT_YELLOW "\033[33m" /* Yellow */
#define COUT_BLUE "\033[34m" /* Blue */
#define COUT_MAGENTA "\033[35m" /* Magenta */
#define COUT_CYAN "\033[36m" /* Cyan */
#define COUT_WHITE "\033[37m" /* White */
#define HVG_DEBUG(msg) std::cout << COUT_BLUE << "[DEBUG] " << COUT_RESET << msg << std::endl;
#define HVG_ERROR(msg) std::cerr << COUT_RED << "[ERROR] " << COUT_RESET << msg << std::endl;
#define HVG_INFO(msg) std::cout << COUT_GREEN << "[INFO] " << COUT_RESET << msg << std::endl;
#define HVG_WARNING(msg) std::cout << COUT_YELLOW << "[WARNING] " << COUT_RESET << msg << std::endl;
#define MAKE_ELEMENT(element, gs_name, name) \
element = gst_element_factory_make(gs_name, name); \
if (!element)\
{\
HVG_ERROR("Element '" << name << "' was not created!")\
return EXIT_FAILURE;\
}\
else\
{\
HVG_INFO("Element '" << name << "' was created successfully!") \
}
#define MAKE_PIPE(pipe, name) \
pipe = gst_pipeline_new(name); \
if (!pipe)\
{\
HVG_ERROR("Pipeline '" << name << "' was not created!")\
return EXIT_FAILURE;\
}\
else\
{\
HVG_INFO("Pipeline '" << name << "' was created successfully!") \
}
GST_DEBUG_CATEGORY(NVDS_APP);
#define MAX_DISPLAY_LEN 64
#define PGIE_CLASS_ID_VEHICLE 0
#define PGIE_CLASS_ID_PERSON 2
/* The muxer output resolution must be set if the input streams will be of
* different resolution. The muxer will scale all the input frames to this
* resolution. */
#define MUXER_OUTPUT_WIDTH 1920
#define MUXER_OUTPUT_HEIGHT 1080
/* Muxer batch formation timeout, for e.g. 40 millisec. Should ideally be set
* based on the fastest source's framerate. */
#define MUXER_BATCH_TIMEOUT_USEC 40000
gint frame_number = 0;
gchar pgie_classes_str[4][32] = {"Vehicle", "TwoWheeler", "Person", "Roadsign"};
/* osd_sink_pad_buffer_probe will extract metadata received on OSD sink pad
* and update params for drawing rectangle, object information etc. */
static GstPadProbeReturn osd_sink_pad_buffer_probe(GstPad* pad, GstPadProbeInfo* info, gpointer u_data)
{
HVG_DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
GstBuffer* buf = (GstBuffer*)info->data;
guint num_rects = 0;
NvDsObjectMeta* obj_meta = nullptr;
guint vehicle_count = 0;
guint person_count = 0;
NvDsMetaList* l_frame = nullptr;
NvDsMetaList* l_obj = nullptr;
NvDsDisplayMeta* display_meta = nullptr;
NvDsBatchMeta* batch_meta = gst_buffer_get_nvds_batch_meta(buf);
for (l_frame = batch_meta->frame_meta_list; l_frame != nullptr; l_frame = l_frame->next)
{
NvDsFrameMeta* frame_meta = (NvDsFrameMeta*)(l_frame->data);
int offset = 0;
for (l_obj = frame_meta->obj_meta_list; l_obj != nullptr; l_obj = l_obj->next)
{
obj_meta = (NvDsObjectMeta*)(l_obj->data);
if (obj_meta->class_id == PGIE_CLASS_ID_VEHICLE)
{
vehicle_count++;
num_rects++;
}
if (obj_meta->class_id == PGIE_CLASS_ID_PERSON)
{
person_count++;
num_rects++;
}
}
display_meta = nvds_acquire_display_meta_from_pool(batch_meta);
NvOSD_TextParams* txt_params = &display_meta->text_params[0];
display_meta->num_labels = 1;
txt_params->display_text = (char*)g_malloc0(MAX_DISPLAY_LEN);
offset = snprintf(txt_params->display_text, MAX_DISPLAY_LEN, "Person = %d ", person_count);
offset = snprintf(txt_params->display_text + offset, MAX_DISPLAY_LEN, "Vehicle = %d ", vehicle_count);
/* Now set the offsets where the string should appear */
txt_params->x_offset = 10;
txt_params->y_offset = 12;
/* Font , font-color and font-size */
txt_params->font_params.font_name = "Serif";
txt_params->font_params.font_size = 10;
txt_params->font_params.font_color.red = 1.0;
txt_params->font_params.font_color.green = 1.0;
txt_params->font_params.font_color.blue = 1.0;
txt_params->font_params.font_color.alpha = 1.0;
/* Text background color */
txt_params->set_bg_clr = 1;
txt_params->text_bg_clr.red = 0.0;
txt_params->text_bg_clr.green = 0.0;
txt_params->text_bg_clr.blue = 0.0;
txt_params->text_bg_clr.alpha = 1.0;
nvds_add_display_meta_to_frame(frame_meta, display_meta);
}
g_print("Frame Number = %d Number of objects = %d "
"Vehicle Count = %d Person Count = %d\n",
frame_number,
num_rects,
vehicle_count,
person_count);
frame_number++;
return GST_PAD_PROBE_OK;
}
static gboolean bus_call(GstBus* bus, GstMessage* msg, gpointer data)
{
GMainLoop* loop = (GMainLoop*)data;
switch (GST_MESSAGE_TYPE(msg))
{
case GST_MESSAGE_EOS:
g_print("End of stream\n");
g_main_loop_quit(loop);
break;
case GST_MESSAGE_ERROR:
{
gchar* debug;
GError* error;
gst_message_parse_error(msg, &error, &debug);
g_printerr("ERROR from element %s: %s\n", GST_OBJECT_NAME(msg->src), error->message);
if (debug)
g_printerr("Error details: %s\n", debug);
g_free(debug);
g_error_free(error);
g_main_loop_quit(loop);
break;
}
default:
break;
}
return TRUE;
}
int main(int argc, char* argv[])
{
const std::string FILE_H264 = "/data/deepstream/data/face_video.mp4";
const std::string CONFIG_INFER = "/data/deepstream/face_recognition_skytrack/build_docker/config_temp.txt";
const int WIDTH = 1280;
const int HEIGHT = 720;
/* Standard GStreamer initialization */
gst_init(&argc, &argv);
GMainLoop* loop = g_main_loop_new(nullptr, FALSE);
/* Create gstreamer elements */
/* Create Pipeline element that will form a connection of other elements */
GstElement *pipeline = nullptr;
MAKE_PIPE(pipeline, "dstest1-pipeline")
/* Source element for reading from the file */
GstElement *source = nullptr;
MAKE_ELEMENT(source, "filesrc", "file-source")
/* Since the data format in the input file is elementary h264 stream,
* we need a h264parser */
GstElement *h264parser = nullptr;
MAKE_ELEMENT(h264parser, "h264parse", "file-parser")
/* Use nvdec_h264 for hardware accelerated decode on GPU */
GstElement *decoder = nullptr;
MAKE_ELEMENT(decoder, "nvv4l2decoder", "nvv4l2-parser")
/* Create nvstreammux instance to form batches from one or more sources. */
GstElement *streammux = nullptr;
MAKE_ELEMENT(streammux, "nvstreammux", "stream-muxer")
/* Use nvinfer to run inferencing on decoder's output,
* behaviour of inferencing is set through config file */
GstElement *pgie = nullptr;
MAKE_ELEMENT(pgie, "nvinfer", "primary-nvinference-engine")
/* Use convertor to convert from NV12 to RGBA as required by nvosd */
GstElement *nvvidconv = nullptr;
MAKE_ELEMENT(nvvidconv, "nvvideoconvert", "nvvideo-converter")
/* Create OSD to draw on the converted RGBA buffer */
GstElement *nvosd = nullptr;
MAKE_ELEMENT(nvosd, "nvdsosd", "nv-onscreendisplay")
/* Finally render the osd output */
GstElement *nvsink = nullptr;
MAKE_ELEMENT(nvsink, "nveglglessink", "nvvideo-renderer")
/* we add a message handler */
GstBus* bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
guint bus_watch_id = gst_bus_add_watch(bus, bus_call, loop);
gst_object_unref(bus);
/* Set up the pipeline */
/* we add all elements into the pipeline */
gst_bin_add_many(GST_BIN(pipeline), source, h264parser, decoder, streammux, pgie, nvvidconv, nvosd, nvsink, nullptr);
gchar pad_name_src[16] = "src";
GstPad* srcpad = gst_element_get_static_pad(decoder, pad_name_src);
if (!srcpad)
{
HVG_ERROR("Decoder request src pad failed. Exiting.");
return -1;
}
gchar pad_name_sink[16] = "sink_0";
GstPad* sinkpad = gst_element_get_request_pad(streammux, pad_name_sink);
if (!sinkpad)
{
HVG_ERROR("Streammux request sink pad failed. Exiting.");
return -1;
}
if (gst_pad_link(srcpad, sinkpad) != GST_PAD_LINK_OK)
{
HVG_ERROR("Failed to link decoder to stream muxer. Exiting.");
return -1;
}
gst_object_unref(sinkpad);
gst_object_unref(srcpad);
/* we link the elements together */
/* file-source -> h264-parser -> nvh264-decoder ->
* nvinfer -> nvvidconv -> nvosd -> video-renderer */
if (!gst_element_link_many(source, h264parser, decoder, nullptr))
{
HVG_ERROR("Elements could not be linked: 1. Exiting.");
return EXIT_FAILURE;
}
if (!gst_element_link_many(streammux, pgie, nvvidconv, nvosd, nvsink, nullptr))
{
HVG_ERROR("Elements could not be linked: 2. Exiting.");
return EXIT_FAILURE;
}
/* we set the input filename to the source element */
g_object_set(G_OBJECT(source), "location", FILE_H264.c_str(), nullptr);
g_object_set(G_OBJECT(streammux), "batch-size", 1, nullptr);
g_object_set(G_OBJECT(streammux),
"width",
WIDTH,
"height",
HEIGHT,
"batched-push-timeout",
MUXER_BATCH_TIMEOUT_USEC,
nullptr);
g_object_set (G_OBJECT(nvsink), "sync", FALSE, NULL);
/* Set all the necessary properties of the nvinfer element, the necessary ones are : */
g_object_set(G_OBJECT(pgie), "config-file-path", CONFIG_INFER.c_str(), nullptr);
/* Lets add probe to get informed of the meta data generated, we add probe to
* the sink pad of the osd element, since by that time, the buffer would have
* had got all the metadata. */
GstPad* osd_sink_pad = gst_element_get_static_pad(nvosd, "sink");
if (!osd_sink_pad)
{
HVG_WARNING("Unable to get sink pad");
}
else
{
gst_pad_add_probe(osd_sink_pad, GST_PAD_PROBE_TYPE_BUFFER, osd_sink_pad_buffer_probe, nullptr, nullptr);
}
gst_object_unref(osd_sink_pad);
/* Set the pipeline to "playing" state */
HVG_INFO("Now playing: " << FILE_H264.c_str());
gst_element_set_state(pipeline, GST_STATE_PLAYING);
/* Wait till pipeline encounters an error or EOS */
HVG_INFO("Running...");
g_main_loop_run(loop);
/* Out of the main loop, clean up nicely */
HVG_INFO("Returned, stopping playback");
gst_element_set_state(pipeline, GST_STATE_NULL);
HVG_INFO("Deleting pipeline");
gst_object_unref(GST_OBJECT(pipeline));
g_source_remove(bus_watch_id);
g_main_loop_unref(loop);
return 0;
}
# config_temp.txt
[property]
gpu-id=0
model-engine-file=model_info/retina-mobile0.25-288x320.engine
batch-size=1
net-scale-factor=1.0
offsets=104.0;117.0;123.0
force-implicit-batch-dim=1
model-color-format=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
#process-mode=1
# workaround for getting topk detection
num-detected-classes=4
# number of consecutive batches to skip for inference
interval=0
# custom detection parser
parse-bbox-func-name=NvDsInferParseCustomRetinaFace
custom-lib-path=../../retinaface_parser/build_docker/libretinaface_parser.so
gie-unique-id=1
[class-attrs-all]
# bbox threshold
pre-cluster-threshold=0.6
# nms threshold
post-cluster-threshold=0.4
#include "nvdsinfer_custom_impl.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <iostream>
#include <gst/gst.h>
#include <fstream>
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define CLIP(a, min, max) (MAX(MIN(a, max), min))
extern "C" bool NvDsInferParseCustomRetinaFace(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferObjectDetectionInfo>& objectList);
struct Bbox
{
int x1, y1, x2, y2;
float score;
};
struct anchorBox
{
float cx;
float cy;
float sx;
float sy;
};
void postprocessing(float* bbox,
float* conf,
float bbox_threshold,
float nms_threshold,
unsigned int topk,
int width,
int height,
std::vector<NvDsInferObjectDetectionInfo>& objectList);
void create_anchor_retinaface(std::vector<anchorBox>& anchor, int w, int h);
bool cmp(NvDsInferObjectDetectionInfo a, NvDsInferObjectDetectionInfo b);
void nms(std::vector<NvDsInferObjectDetectionInfo>& input_boxes, float NMS_THRESH);
void postprocessing(float* bbox,
float* conf,
float bbox_threshold,
float nms_threshold,
unsigned int topk,
int width,
int height,
std::vector<NvDsInferObjectDetectionInfo>& objectList)
{
std::vector<anchorBox> anchor;
create_anchor_retinaface(anchor, width, height);
for (unsigned int i = 0; i < anchor.size(); ++i)
{
if (*(conf + 1) > bbox_threshold)
{
anchorBox tmp = anchor[i];
anchorBox tmp1;
NvDsInferObjectDetectionInfo result;
result.classId = 0;
// decode bbox
tmp1.cx = tmp.cx + *bbox * 0.1f * tmp.sx;
tmp1.cy = tmp.cy + *(bbox + 1) * 0.1f * tmp.sy;
tmp1.sx = tmp.sx * exp(*(bbox + 2) * 0.2f);
tmp1.sy = tmp.sy * exp(*(bbox + 3) * 0.2f);
result.left = (tmp1.cx - tmp1.sx / 2) * width;
result.top = (tmp1.cy - tmp1.sy / 2) * height;
result.width = (tmp1.cx + tmp1.sx / 2) * width - result.left;
result.height = (tmp1.cy + tmp1.sy / 2) * height - result.top;
// Clip object box coordinates to network resolution
result.left = CLIP(result.left, 0, width - 1);
result.top = CLIP(result.top, 0, height - 1);
result.width = CLIP(result.width, 0, width - 1);
result.height = CLIP(result.height, 0, height - 1);
result.detectionConfidence = *(conf + 1);
objectList.push_back(result);
}
bbox += 4;
conf += 2;
}
std::sort(objectList.begin(), objectList.end(), cmp);
nms(objectList, nms_threshold);
if (objectList.size() > topk)
objectList.resize(topk);
}
void create_anchor_retinaface(std::vector<anchorBox>& anchor, int w, int h)
{
anchor.clear();
std::vector<std::vector<int>> feature_map(3), min_sizes(3);
float steps[] = {8, 16, 32};
for (unsigned int i = 0; i < feature_map.size(); ++i)
{
feature_map[i].push_back(ceil(h / steps[i]));
feature_map[i].push_back(ceil(w / steps[i]));
}
std::vector<int> minsize1 = {10, 20};
min_sizes[0] = minsize1;
std::vector<int> minsize2 = {32, 64};
min_sizes[1] = minsize2;
std::vector<int> minsize3 = {128, 256};
min_sizes[2] = minsize3;
for (unsigned int k = 0; k < feature_map.size(); ++k)
{
std::vector<int> min_size = min_sizes[k];
for (int i = 0; i < feature_map[k][0]; ++i)
{
for (int j = 0; j < feature_map[k][1]; ++j)
{
for (unsigned int l = 0; l < min_size.size(); ++l)
{
float s_kx = static_cast<float>(min_size[l]) * 1.0f / static_cast<float>(w);
float s_ky = static_cast<float>(min_size[l]) * 1.0f / static_cast<float>(h);
float cx = (static_cast<float>(j) + 0.5f) * steps[k] / static_cast<float>(w);
float cy = (static_cast<float>(i) + 0.5f) * steps[k] / static_cast<float>(h);
anchorBox axil = {cx, cy, s_kx, s_ky};
anchor.push_back(axil);
}
}
}
}
}
bool cmp(NvDsInferObjectDetectionInfo a, NvDsInferObjectDetectionInfo b)
{
if (a.detectionConfidence > b.detectionConfidence)
return true;
return false;
}
void nms(std::vector<NvDsInferObjectDetectionInfo>& input_boxes, float NMS_THRESH)
{
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
vArea[i] = (input_boxes.at(i).width + 1) * (input_boxes.at(i).height + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i)
{
for (int j = i + 1; j < int(input_boxes.size());)
{
float xx1 = std::max(input_boxes[i].left, input_boxes[j].left);
float yy1 = std::max(input_boxes[i].top, input_boxes[j].top);
float xx2 =
std::min(input_boxes[i].left + input_boxes[i].width, input_boxes[j].left + input_boxes[j].width);
float yy2 =
std::min(input_boxes[i].top + input_boxes[i].height, input_boxes[j].top + input_boxes[j].height);
float w = std::max(float(0), xx2 - xx1 + 1);
float h = std::max(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
bool NvDsInferParseCustomRetinaFace(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferObjectDetectionInfo>& objectList)
{
// Get output indexes
static int bboxLayerIndex = -1;
static int confLayerIndex = -1;
for (unsigned int i = 0; i < outputLayersInfo.size(); i++)
{
if (strcmp(outputLayersInfo[i].layerName, "output_det0") == 0)
{
bboxLayerIndex = static_cast<int>(i);
}
else if (strcmp(outputLayersInfo[i].layerName, "output_det1") == 0)
{
confLayerIndex = static_cast<int>(i);
}
}
if ((bboxLayerIndex == -1) || (confLayerIndex == -1))
{
std::cerr << "Could not find output layer buffer while parsing" << std::endl;
return false;
}
// Host memory for "decode"
auto bbox = (float*)outputLayersInfo[bboxLayerIndex].buffer;
auto* conf = (float*)outputLayersInfo[confLayerIndex].buffer;
// Get thresholds and topk value
const float bbox_threshold = detectionParams.perClassPreclusterThreshold[0];
const float nms_threshold = detectionParams.perClassPostclusterThreshold[0];
const unsigned int topk = detectionParams.numClassesConfigured;
// Do post processing
postprocessing(bbox, conf, bbox_threshold, nms_threshold, topk, networkInfo.width, networkInfo.height, objectList);
return true;
}
/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomRetinaFace);