NvDsInferParseObjectInfo ignored after postprocessing if its size it's bigger than the nvstreammux size

mfoglio · October 19, 2023, 5:01pm

I am working with DeepStream 6.2, C++, and a Tesla T4. I am writing a custom postprocessing function to parse the output of a custom yolo object.

The input size of the yolo object is 1280x1280, while my nvstreammux is working on a size of 1280x720 (width x height).

Here’s the problem: if the object detected by yolo has points with the y coordinate greater than 720, the object will be discarded.

I have been able to prove this quite easily using the following code, by explicitly assigning values to NvDsInferParseObjectInfo:

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include "nvdsinfer_custom_impl.h"
#include "trt_utils.h"

#define NMS_THRESH 0.5
#define CONF_THRESH 0.4
#define BATCH_SIZE 1

extern "C" bool NvDsInferParseCustomYoloV5(
    std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
    NvDsInferNetworkInfo const &networkInfo,
    NvDsInferParseDetectionParams const &detectionParams,
    std::vector<NvDsInferParseObjectInfo> &objectList);

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static constexpr int LOCATIONS = 4;
struct alignas(float) Detection{
        //center_x center_y w h
        float bbox[LOCATIONS];
        float conf;  // bbox_conf * cls_conf
        float class_id;
        // float emptySpace[32];   // YoloV5s6 and YoloV5n6 would require this
    };

template<class T>
const T& clamp(const T& x, const T& lower, const T& upper) {
    return min(upper, max(x, lower));
}

float iou(float lbox[4], float rbox[4]) {
    float interBox[] = {
        std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
        std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
        std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
        std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
    };

    if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
        return 0.0f;

    float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
    return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
}

bool cmp(Detection& a, Detection& b) {
    return a.conf > b.conf;
}

void nms(
        std::vector<Detection>& res,
        float *output,
        float conf_thresh,
        float nms_thresh = 0.5,
        int offset_between_detections = 0)
{
    int det_size = sizeof(Detection) / sizeof(float);
    std::map<float, std::vector<Detection>> m;
    for (int i = 0; i < output[0] && i < 1000; i++) {
        if (output[1 + (det_size + offset_between_detections) * i + 4] <= conf_thresh) continue;
        Detection det;
        memcpy(&det, &output[1 + (det_size + offset_between_detections) * i], det_size * sizeof(float));
        if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
        m[det.class_id].push_back(det);
    }
    for (auto it = m.begin(); it != m.end(); it++) {
        //std::cout << it->second[0].class_id << " --- " << std::endl;
        auto& dets = it->second;
        std::sort(dets.begin(), dets.end(), cmp);
        for (size_t m = 0; m < dets.size(); ++m) {
            auto& item = dets[m];
            res.push_back(item);
            for (size_t n = m + 1; n < dets.size(); ++n) {
                if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
                    dets.erase(dets.begin()+n);
                    --n;
                }
            }
        }
    }
}

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/* This is a sample bounding box parsing function for the sample YoloV5m detector model */
static bool NvDsInferParseYoloV5(
    std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
    NvDsInferNetworkInfo const& networkInfo,
    NvDsInferParseDetectionParams const& detectionParams,
    std::vector<NvDsInferParseObjectInfo>& objectList,
    int offset_between_detections = 0
{

    std::vector<Detection> res;

    nms(res, (float*)(outputLayersInfo[0].buffer), CONF_THRESH, NMS_THRESH, offset_between_detections);
    //std::cout<<"Nms done sucessfully----"<<std::endl;
    //std::cout << "Found " << res.size() << " objects after NMS" << std::endl;
    //std::cout << "Model height " << networkInfo.height << " width" << networkInfo.width << std::endl;

    for(auto& r : res) {
	    NvDsInferParseObjectInfo oinfo;

	    oinfo.classId = r.class_id;
	    //oinfo.left    = static_cast<unsigned int>(clamp(r.bbox[0]-r.bbox[2]*0.5f, 0.0f, networkInfo.width - 1));
	    //oinfo.top     = static_cast<unsigned int>(clamp(r.bbox[1]-r.bbox[3]*0.5f, 0.0f, networkInfo.height - 1));
	    //oinfo.width   = static_cast<unsigned int>(clamp(r.bbox[2], 0.0f, networkInfo.width - oinfo.left - 1));
	    //oinfo.height  = static_cast<unsigned int>(clamp(r.bbox[3], 0.0f, networkInfo.height - oinfo.top - 1));
        oinfo.left = 0;
        oinfo.top = 0;
        oinfo.width = 1280;
        oinfo.height = 720;
	    oinfo.detectionConfidence = r.conf;
        std::cout << r.class_id << " " << r.conf << " | "
          << oinfo.left << ";" << oinfo.top << ";"
          << oinfo.width << ";"  << oinfo.height << ";"
          << std::endl;
	    objectList.push_back(oinfo);
    }

    std::cout << "Returning " << objectList.size() << " objects after postprocessing" << std::endl;

    return true;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/* C-linkage to prevent name-mangling */
extern "C" bool NvDsInferParseCustomYoloV5(
    std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
    NvDsInferNetworkInfo const &networkInfo,
    NvDsInferParseDetectionParams const &detectionParams,
    std::vector<NvDsInferParseObjectInfo> &objectList)
{
    return NvDsInferParseYoloV5(
        outputLayersInfo, networkInfo, detectionParams, objectList, 0);
}

/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV5);

The code above works fine, but if you switch line oinfo.height = 720; with oinfo.height = 721; it stops working. I mean, the code doesn’t raise any error, no object is actually received by the rest of the deepstream pipeline.
I used the following Python probe, attached to the pgie element, to see if the pipeline was receiving any object. When oinfo.height = 720;, the Python probe prints “Counted 1 objects”. When oinfo.height = 721;, it prints “Counted 0 objects”.


def pgie_src_pad_buffer_probe(self, pad, info, u_data):

    gst_buffer = info.get_buffer()
    if not gst_buffer:
        logger.error("Unable to get GstBuffer ")
        return

    # Retrieve batch metadata from the gst_buffer
    # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
    # C address of gst_buffer as input, which is obtained with hash(gst_buffer)
    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))

    # Acquire lock
    pyds.nvds_acquire_meta_lock(batch_meta)

    # Get frame list
    l_frame = batch_meta.frame_meta_list

    while l_frame is not None:

        try:
            # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
            # The casting also keeps ownership of the underlying memory
            # in the C code, so the Python garbage collector will leave
            # it alone.
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break

        l_obj = frame_meta.obj_meta_list
        objects_count = 0
        while l_obj is not None:
            try:
                # Casting l_obj.data to pyds.NvDsObjectMeta
                obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
            except StopIteration:
                break

            # Count object
            objects_count += 1

            try:
                l_obj = l_obj.next
            except StopIteration:
                break

        print(f"Counted {objects_count} objects")

        try:
            l_frame = l_frame.next
        except StopIteration:
            break

    # Release lock
    pyds.nvds_release_meta_lock(batch_meta)

    return Gst.PadProbeReturn.OK

To recap, the code above proves that if an object has any y coordinate > nvstreammux height, the deepstream pipeline will discard the object without raising any error.
The obvious solution would be to clip the value of all the y coordinates of an object such that 0 <= y <= nvstreammux width. However, how can I access nvstreammux width from the NvDsInferParseYoloV5 function in C++? I found a way to access the height and width of the model, which are 1280x1280, but I can’t find a way to get the nvstreammux coordinates which are 1280x720.
My goal is simply to avoid deepstream discarding any object.
Thank you

yuweiw · October 20, 2023, 5:54am

You can try to set the crop-objects-to-roi-boundary=1 on your config file.

mfoglio · October 20, 2023, 7:56pm

Thank you for your reply, @yuweiw . I tried to set this parameter but the problem persists.

yuweiw · October 24, 2023, 8:10am

There is no update from you for a period, assuming this is not an issue anymore. Hence we are closing this topic. If need further support, please open a new one. Thanks

How do you set that in your config file? If you use yaml config file, crop-objects-to-roi-boundary: 1. If you use text config file, crop-objects-to-roi-boundary=1.
You can also debug that yourself with our open source: gstnvinfer_meta_utils.cpp

    if(nvinfer->crop_objects_to_roi_boundary){
      if (obj.top < filter_params.roiTopOffset)
        obj.top = filter_params.roiTopOffset;
      if (obj.left + obj.width >= frame.input_surf_params->width)
        obj.width = frame.input_surf_params->width - obj.left;
      if (obj.top + obj.height > (frame.input_surf_params->height - filter_params.roiBottomOffset))
        obj.height = frame.input_surf_params->height - filter_params.roiBottomOffset - obj.top;
    } else {
      if (obj.top < filter_params.roiTopOffset)
        continue;
      if (obj.top + obj.height > (frame.input_surf_params->height - filter_params.roiBottomOffset))
        continue;
    }

system · November 7, 2023, 8:10am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Custom Yolo Postprocessing Probe gets slower after 1-2 days of execution DeepStream SDK	2	372	June 9, 2023
Loading OCDNet as sgie0 DeepStream SDK deepstream	11	59	January 14, 2025
Deep Stream JAX deepstream	7	46	February 18, 2025
Yolov8 Object Detection using Deepstream DeepStream SDK tensorrt , cuda , jetson-inference , gstreamer	3	598	July 9, 2024
Some questions about plugins in deepstream DeepStream SDK	13	218	June 11, 2024
YOLOv8 Custom Parser: Improper Face Detection (with Reference Image & Logs) DeepStream SDK tensorrt , ubuntu , gstreamer , python , video , deepstream	5	65	June 25, 2025
Face alignment: Extract landmarks from yolo model and dump it into user_meta DeepStream SDK	2	501	January 11, 2024
How to get surface index when batch size bigger 1 DeepStream SDK	5	1322	October 12, 2021
YoloV4 BBox confidence values are wrong DeepStream SDK inception , deepstream61	11	1096	August 31, 2022
DeepStream parseBoundingBox(): Could not find output coverage layer error with YOLOv8 custom parser DeepStream SDK yolo , deepstream	10	45	June 12, 2025

NvDsInferParseObjectInfo ignored after postprocessing if its size it's bigger than the nvstreammux size

Related topics