• Hardware Platform (GPU)
• DeepStream Version 5.0
• TensorRT Version 7.0
• NVIDIA GPU Driver Version (valid for GPU only)
• Issue Type( questions)
•Able to load and infer tensorrt retina face model, on Deepstream.
•Written a custom bbox parser that draws appropriate bbox on the face
Requirments
- Want to display the landmark points, on the OSD.
- Also want to use the (bbox and landmarks) -----> Affine-transformation----->intermediate output------> Secondary-GIE.
Here is the custom_bbox_parser file
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include "nvdsinfer_custom_impl.h"
#include "gstnvdsmeta.h"
#define LNM_ARRAY_SIZE 11
float clamp(const float val, const float minVal, const float maxVal)
{
assert(minVal <= maxVal);
return std::min(maxVal, std::max(minVal, val));
}
extern "C" bool NvDsInferParseCustomRetinaFace(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList);
void *set_metadata_ptr(std::array<float, LNM_ARRAY_SIZE> & arr)
{
gfloat *user_metadata = (gfloat*)g_malloc0(10*sizeof(gfloat));
for(int i = 1; i < 11; i++) {
user_metadata[i] = arr[i];
}
return (void *)user_metadata;
}
static bool cmp(const NvDsInferParseObjectInfo & a, const NvDsInferParseObjectInfo & b) {
return a.detectionConfidence > b.detectionConfidence;
}
static bool cmp1(const std::array<float, LNM_ARRAY_SIZE> &a, const std::array<float, LNM_ARRAY_SIZE> &b) {
return a[0] > b[0];
}
static float iou( const NvDsInferParseObjectInfo & lbox, const NvDsInferParseObjectInfo & rbox) {
float interBox[] = {
// std::max(lbox[0], rbox[0]), //left
// std::min(lbox[2], rbox[2]), //right
// std::max(lbox[1], rbox[1]), //top
// std::min(lbox[3], rbox[3]), //bottom
std::max(lbox.left, rbox.left), //left
std::min(lbox.width + lbox.left, rbox.width+ rbox.left), //right
std::max(lbox.top, rbox.top), //top
std::min(lbox.height +lbox.top, rbox.height + rbox.top), //bottom
};
if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
return 0.0f;
float interBoxS = (interBox[1] - interBox[0]) * (interBox[3] - interBox[2]);
return interBoxS / ((lbox.width) * (lbox.height) + (rbox.width) * (rbox.height) -interBoxS + 0.000001f);
}
/* RetinaFace implementations */
static NvDsInferParseObjectInfo convertBBoxRetinaFace(const float& bx1, const float& by1, const float& bx2,
const float& by2, const uint& netW, const uint& netH)
{
NvDsInferParseObjectInfo b;
// Restore coordinates to network input resolution
//float x1 = bx1 * netW;
//float y1 = by1 * netH;
//float x2 = bx2 * netW;
//float y2 = by2 * netH;
float x1 = clamp(bx1, 0, netW);
float y1 = clamp(by1, 0, netH);
float x2 = clamp(bx2, 0, netW);
float y2 = clamp(by2, 0, netH);
b.left = x1;
b.width = clamp(x2 - x1, 0, netW);
b.top = y1;
b.height = clamp(y2 - y1, 0, netH);
return b;
}
static void addBBoxProposalRetinaFace(const float bx, const float by, const float bw, const float bh,
const uint& netW, const uint& netH, const int maxIndex,
const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo, std::array<float, LNM_ARRAY_SIZE>& lnm,
std::vector<std::array<float, LNM_ARRAY_SIZE>>& i_lnm_vec)
{
NvDsInferParseObjectInfo bbi = convertBBoxRetinaFace(bx, by, bw, bh, netW, netH);
if (bbi.width < 1 || bbi.height < 1) return;
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
//std::copy(std::begin(lnm), std::end(lnm), std::begin(bbi.landmarks));
//bbi.landmarks = lnm;
binfo.push_back(bbi);
i_lnm_vec.push_back(lnm);
}
static std::vector<NvDsInferParseObjectInfo> decodeRetinaFaceTensor(
const float* boxes,
NvDsInferParseDetectionParams const& detectionParams,
const uint& netW, const uint& netH)
{
std::vector<NvDsInferParseObjectInfo> binfo;
std::vector<NvDsInferParseObjectInfo> res;
NvDsUserMetaList *obj_user_meta_list = NULL;
std::array<float, LNM_ARRAY_SIZE> lnm;
std::vector<std::array<float, LNM_ARRAY_SIZE>> i_lnm_vec;
std::vector<std::array<float, LNM_ARRAY_SIZE>> f_lnm_vec;
//std::cout << "Number of Detected boxes: " << boxes[0] << std::endl;
for (uint b = 0; b < boxes[0]; b++){
float maxProb = boxes[15 * b + 1 + 4];
if (maxProb<=0.1) continue;
//bbox
float bx1 = boxes[15*b + 1];
float by1 = boxes[15*b + 1 + 1];
float bx2 = boxes[15*b + 1 + 2];
float by2 = boxes[15*b + 1 + 3];
//landmarks
for (uint i = 0; i < LNM_ARRAY_SIZE; i++ )
lnm[i]=boxes[15*b + 1 + 4 + i];
int maxIndex = 0;
if (maxProb > detectionParams.perClassPreclusterThreshold[maxIndex])
{
addBBoxProposalRetinaFace(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo, lnm, i_lnm_vec);
}
}
std::sort(binfo.begin(), binfo.end(), cmp);
std::sort(i_lnm_vec.begin(), i_lnm_vec.end(), cmp1);
/*for ( size_t m = 0; m < binfo.size(); ++m )
{
NvDsInferParseObjectInfo item = binfo[m];
std::array<float, LNM_ARRAY_SIZE> item1 = i_lnm_vec[m];
assert( item.detectionConfidence == item1[0]);
std::cout << "BBox Cords: " << item.left << " " << item.top << " " << (item.left+item.width) << " " << (item.top+item.height) << " " << item.detectionConfidence << std::endl;
std::cout << "LNMs Cords: " << std::endl;
for(auto i = item1.begin(); i != item1.end(); ++i)
{
std::cout << *i << std::endl;
}
}*/
assert( binfo.size() == i_lnm_vec.size());
//std::cout << "Before IOU: " << binfo.size() << " " << i_lnm_vec.size() << std::endl;
for (size_t m = 0; m < binfo.size(); ++m) {
auto& item = binfo[m];
auto& item1 = i_lnm_vec[m];
res.push_back(item);
f_lnm_vec.push_back(item1);
for (size_t n = m + 1; n < binfo.size(); ++n) {
if (iou(item, binfo[n]) > 0.4) { //nms threshold 0.4
binfo.erase(binfo.begin()+n);
i_lnm_vec.erase(i_lnm_vec.begin()+n);
--n;
}
}
}
assert( res.size() == f_lnm_vec.size());
//std::cout << "After IOU: " << res.size() << " " << f_lnm_vec.size() << std::endl;
for ( size_t m = 0; m < f_lnm_vec.size(); ++m )
{
//NvDsInferParseObjectInfo item = binfo[m];
std::array<float, LNM_ARRAY_SIZE> item1 = f_lnm_vec[m];
NvDsUserMeta* um1;
um1->user_meta_data = set_metadata_ptr(item1);
//std::cout << "BBox Cords: " << item.left << " " << item.top << " " << (item.left+item.width) << " " << (item.top+item.height) << " " << item.detectionConfidence << std::endl;
//std::cout << "LNMs Cords: " << std::endl;
//for(auto i = item1.begin(); i != item1.end(); ++i)
//{
//std::cout << *i << std::endl;
//}
obj_user_meta_list = g_list_append(obj_user_meta_list,um1);
}
return res;
}
extern "C" bool NvDsInferParseCustomRetinaFace(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
std::vector<NvDsInferParseObjectInfo> objects;
const NvDsInferLayerInfo &boxes = outputLayersInfo[0]; // num_boxes x 4
// 3 dimensional: [num_boxes, 1, 4]
assert(boxes.inferDims.numDims == 3);
//std::cout << "Network Info: " << networkInfo.height << " " << networkInfo.width << std::endl;
std::vector<NvDsInferParseObjectInfo> outObjs =
decodeRetinaFaceTensor(
(const float*)(boxes.buffer), detectionParams,
networkInfo.width, networkInfo.height);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
objectList = objects;
return true;
}
/* RetinaFace implementations end*/
/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomRetinaFace);