I am working with DeepStream 6.2, C++, and a Tesla T4. I am writing a custom postprocessing function to parse the output of a custom yolo object.
The input size of the yolo object is 1280x1280, while my nvstreammux is working on a size of 1280x720 (width x height).
Here’s the problem: if the object detected by yolo has points with the y coordinate greater than 720, the object will be discarded.
I have been able to prove this quite easily using the following code, by explicitly assigning values to NvDsInferParseObjectInfo
:
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include "nvdsinfer_custom_impl.h"
#include "trt_utils.h"
#define NMS_THRESH 0.5
#define CONF_THRESH 0.4
#define BATCH_SIZE 1
extern "C" bool NvDsInferParseCustomYoloV5(
std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector<NvDsInferParseObjectInfo> &objectList);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static constexpr int LOCATIONS = 4;
struct alignas(float) Detection{
//center_x center_y w h
float bbox[LOCATIONS];
float conf; // bbox_conf * cls_conf
float class_id;
// float emptySpace[32]; // YoloV5s6 and YoloV5n6 would require this
};
template<class T>
const T& clamp(const T& x, const T& lower, const T& upper) {
return min(upper, max(x, lower));
}
float iou(float lbox[4], float rbox[4]) {
float interBox[] = {
std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
};
if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
return 0.0f;
float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
}
bool cmp(Detection& a, Detection& b) {
return a.conf > b.conf;
}
void nms(
std::vector<Detection>& res,
float *output,
float conf_thresh,
float nms_thresh = 0.5,
int offset_between_detections = 0)
{
int det_size = sizeof(Detection) / sizeof(float);
std::map<float, std::vector<Detection>> m;
for (int i = 0; i < output[0] && i < 1000; i++) {
if (output[1 + (det_size + offset_between_detections) * i + 4] <= conf_thresh) continue;
Detection det;
memcpy(&det, &output[1 + (det_size + offset_between_detections) * i], det_size * sizeof(float));
if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
m[det.class_id].push_back(det);
}
for (auto it = m.begin(); it != m.end(); it++) {
//std::cout << it->second[0].class_id << " --- " << std::endl;
auto& dets = it->second;
std::sort(dets.begin(), dets.end(), cmp);
for (size_t m = 0; m < dets.size(); ++m) {
auto& item = dets[m];
res.push_back(item);
for (size_t n = m + 1; n < dets.size(); ++n) {
if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
dets.erase(dets.begin()+n);
--n;
}
}
}
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/* This is a sample bounding box parsing function for the sample YoloV5m detector model */
static bool NvDsInferParseYoloV5(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList,
int offset_between_detections = 0
{
std::vector<Detection> res;
nms(res, (float*)(outputLayersInfo[0].buffer), CONF_THRESH, NMS_THRESH, offset_between_detections);
//std::cout<<"Nms done sucessfully----"<<std::endl;
//std::cout << "Found " << res.size() << " objects after NMS" << std::endl;
//std::cout << "Model height " << networkInfo.height << " width" << networkInfo.width << std::endl;
for(auto& r : res) {
NvDsInferParseObjectInfo oinfo;
oinfo.classId = r.class_id;
//oinfo.left = static_cast<unsigned int>(clamp(r.bbox[0]-r.bbox[2]*0.5f, 0.0f, networkInfo.width - 1));
//oinfo.top = static_cast<unsigned int>(clamp(r.bbox[1]-r.bbox[3]*0.5f, 0.0f, networkInfo.height - 1));
//oinfo.width = static_cast<unsigned int>(clamp(r.bbox[2], 0.0f, networkInfo.width - oinfo.left - 1));
//oinfo.height = static_cast<unsigned int>(clamp(r.bbox[3], 0.0f, networkInfo.height - oinfo.top - 1));
oinfo.left = 0;
oinfo.top = 0;
oinfo.width = 1280;
oinfo.height = 720;
oinfo.detectionConfidence = r.conf;
std::cout << r.class_id << " " << r.conf << " | "
<< oinfo.left << ";" << oinfo.top << ";"
<< oinfo.width << ";" << oinfo.height << ";"
<< std::endl;
objectList.push_back(oinfo);
}
std::cout << "Returning " << objectList.size() << " objects after postprocessing" << std::endl;
return true;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/* C-linkage to prevent name-mangling */
extern "C" bool NvDsInferParseCustomYoloV5(
std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector<NvDsInferParseObjectInfo> &objectList)
{
return NvDsInferParseYoloV5(
outputLayersInfo, networkInfo, detectionParams, objectList, 0);
}
/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV5);
The code above works fine, but if you switch line oinfo.height = 720;
with oinfo.height = 721;
it stops working. I mean, the code doesn’t raise any error, no object is actually received by the rest of the deepstream pipeline.
I used the following Python probe, attached to the pgie element, to see if the pipeline was receiving any object. When oinfo.height = 720;
, the Python probe prints “Counted 1 objects”. When oinfo.height = 721;
, it prints “Counted 0 objects”.
def pgie_src_pad_buffer_probe(self, pad, info, u_data):
gst_buffer = info.get_buffer()
if not gst_buffer:
logger.error("Unable to get GstBuffer ")
return
# Retrieve batch metadata from the gst_buffer
# Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
# C address of gst_buffer as input, which is obtained with hash(gst_buffer)
batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
# Acquire lock
pyds.nvds_acquire_meta_lock(batch_meta)
# Get frame list
l_frame = batch_meta.frame_meta_list
while l_frame is not None:
try:
# Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
# The casting also keeps ownership of the underlying memory
# in the C code, so the Python garbage collector will leave
# it alone.
frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
except StopIteration:
break
l_obj = frame_meta.obj_meta_list
objects_count = 0
while l_obj is not None:
try:
# Casting l_obj.data to pyds.NvDsObjectMeta
obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
except StopIteration:
break
# Count object
objects_count += 1
try:
l_obj = l_obj.next
except StopIteration:
break
print(f"Counted {objects_count} objects")
try:
l_frame = l_frame.next
except StopIteration:
break
# Release lock
pyds.nvds_release_meta_lock(batch_meta)
return Gst.PadProbeReturn.OK
To recap, the code above proves that if an object has any y coordinate > nvstreammux height
, the deepstream pipeline will discard the object without raising any error.
The obvious solution would be to clip the value of all the y coordinates of an object such that 0 <= y <= nvstreammux width
. However, how can I access nvstreammux width from the NvDsInferParseYoloV5 function in C++? I found a way to access the height and width of the model, which are 1280x1280, but I can’t find a way to get the nvstreammux coordinates which are 1280x720.
My goal is simply to avoid deepstream discarding any object.
Thank you