Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU)
• DeepStream Version
hbrain@ubuntu:~/DeepStream-Yolo$ deepstream-app --version-all
deepstream-app version 6.3.0
DeepStreamSDK 6.3.0
CUDA Driver Version: 11.4
CUDA Runtime Version: 11.4
TensorRT Version: 8.5
cuDNN Version: 8.6
libNVWarp360 Version: 2.0.1d3
hbrain@ubuntu:~/DeepStream-Yolo$
• JetPack Version (5.1.3)
• TensorRT Version
• NVIDIA GPU Driver Version (valid for GPU only)
• Issue Type( ERROR from src_elem: Internal data stream error.)
** I am trying to do object detection using yolov8 in deepstream 6.3.0.
Hi, I working on deploying Yolov8 model to Nvidia Jetson Xavier.
I am using github repo: GitHub - marcoslucianops/DeepStream-Yolo: NVIDIA DeepStream SDK 7.1 / 7.0 / 6.4 / 6.3 / 6.2 / 6.1.1 / 6.1 / 6.0.1 / 6.0 / 5.1 implementation for YOLO models
I could not understand the logic in nvdsparsebbox_Yolo.cpp file
what I want is print the bbox result to terminal and send then via mqtt. I have added printing statements to see the bbox information
#include "nvdsinfer_custom_impl.h"
#include "utils.h"
#include <iomanip>
extern "C" bool
NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
static NvDsInferParseObjectInfo
convertBBox(const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH)
{
NvDsInferParseObjectInfo b;
float x1 = bx1;
float y1 = by1;
float x2 = bx2;
float y2 = by2;
// 🔍 DEBUG: Print raw YOLO output coordinates
std::cout << "🔍 RAW YOLO OUTPUT:" << std::endl;
std::cout << " → Network Resolution: " << netW << "x" << netH << std::endl;
std::cout << " → Raw coordinates: (" << bx1 << ", " << by1 << ") to (" << bx2 << ", " << by2 << ")" << std::endl;
std::cout << " → Raw size: " << std::fixed << std::setprecision(1) << (bx2 - bx1) << "x" << (by2 - by1) << " pixels" << std::endl;
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
x2 = clamp(x2, 0, netW);
y2 = clamp(y2, 0, netH);
b.left = x1;
b.width = clamp(x2 - x1, 0, netW);
b.top = y1;
b.height = clamp(y2 - y1, 0, netH);
// 🔍 DEBUG: Print processed coordinates
std::cout << "🔍 PROCESSED BBOX (" << netW << "x" << netH << "):" << std::endl;
std::cout << " → Final BBox: left=" << (int)b.left << ", top=" << (int)b.top << ", width=" << (int)b.width << ", height=" << (int)b.height << std::endl;
std::cout << " → Top-Left: (" << (int)b.left << ", " << (int)b.top << ")" << std::endl;
std::cout << " → Bottom-Right: (" << (int)(b.left + b.width) << ", " << (int)(b.top + b.height) << ")" << std::endl;
// 🔍 DEBUG: Show what this would look like scaled to 1920x1080
if (netW == 640 && netH == 640) {
float scale_x = 1920.0f / 640.0f; // 3.0
float scale_y = 1080.0f / 640.0f; // 1.6875
std::cout << "🔍 SCALED TO 1920x1080:" << std::endl;
std::cout << " → Scaled BBox: left=" << (int)(b.left * scale_x) << ", top=" << (int)(b.top * scale_y)
<< ", width=" << (int)(b.width * scale_x) << ", height=" << (int)(b.height * scale_y) << std::endl;
std::cout << " → Scaled Top-Left: (" << (int)(b.left * scale_x) << ", " << (int)(b.top * scale_y) << ")" << std::endl;
std::cout << " → Scaled Bottom-Right: (" << (int)((b.left + b.width) * scale_x) << ", " << (int)((b.top + b.height) * scale_y) << ")" << std::endl;
}
std::cout << " ═══════════════════════════════════════════════════════════════" << std::endl;
return b;
}
static void
addBBoxProposal(const float bx1, const float by1, const float bx2, const float by2, const uint& netW, const uint& netH,
const int maxIndex, const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
{
std::cout << "\n📦 BBOX PROPOSAL #" << (binfo.size() + 1) << ":" << std::endl;
std::cout << " → Resolution Context: " << netW << "x" << netH << std::endl;
std::cout << " → Class: " << maxIndex << " | Confidence: " << std::fixed << std::setprecision(3) << maxProb << std::endl;
NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH);
if (bbi.width < 1 || bbi.height < 1) {
std::cout << "❌ REJECTED: BBox too small (width=" << bbi.width << ", height=" << bbi.height << ")" << std::endl;
std::cout << "───────────────────────────────────────────────────────────────" << std::endl;
return;
}
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
binfo.push_back(bbi);
std::cout << "✅ ADDED TO DEEPSTREAM QUEUE" << std::endl;
std::cout << "───────────────────────────────────────────────────────────────" << std::endl;
}
static std::vector<NvDsInferParseObjectInfo>
decodeTensorYolo(const float* output, const uint& outputSize, const uint& netW, const uint& netH,
const std::vector<float>& preclusterThreshold)
{
std::vector<NvDsInferParseObjectInfo> binfo;
std::cout << "\n🔍 YOLO TENSOR DECODING:" << std::endl;
std::cout << " → Output size: " << outputSize << " detections" << std::endl;
std::cout << " → Network resolution: " << netW << "x" << netH << std::endl;
std::cout << " → Confidence threshold: " << preclusterThreshold[0] << std::endl;
std::cout << "═══════════════════════════════════════════════════════════════════════════════════════" << std::endl;
int valid_detections = 0;
for (uint b = 0; b < outputSize; ++b) {
float maxProb = output[b * 6 + 4];
int maxIndex = (int) output[b * 6 + 5];
if (maxProb < preclusterThreshold[maxIndex]) {
continue;
}
if (maxIndex != 0) { // Only process person class
continue;
}
valid_detections++;
float bx1 = output[b * 6 + 0];
float by1 = output[b * 6 + 1];
float bx2 = output[b * 6 + 2];
float by2 = output[b * 6 + 3];
std::cout << "\n🎯 VALID DETECTION #" << valid_detections << " FOUND:" << std::endl;
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
}
std::cout << "\n📊 YOLO DECODING SUMMARY:" << std::endl;
std::cout << " → Total raw outputs: " << outputSize << std::endl;
std::cout << " → Valid detections: " << valid_detections << std::endl;
std::cout << " → Final proposals: " << binfo.size() << std::endl;
std::cout << "═══════════════════════════════════════════════════════════════════════════════════════" << std::endl;
return binfo;
}
static bool
NvDsInferParseCustomYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>& objectList)
{
if (outputLayersInfo.empty()) {
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
return false;
}
std::cout << "\n🚀 YOLO PARSING STARTED:" << std::endl;
std::cout << "╔══════════════════════════════════════════════════════════════════════════════╗" << std::endl;
std::cout << "║ YOLO PARSING DEBUG ║" << std::endl;
std::cout << "╠══════════════════════════════════════════════════════════════════════════════╣" << std::endl;
std::cout << "║ Network Input Resolution : " << std::setw(4) << networkInfo.width << "x" << std::setw(4) << networkInfo.height << " ║" << std::endl;
std::cout << "║ Model processes in : " << std::setw(4) << networkInfo.width << "x" << std::setw(4) << networkInfo.height << " coordinate space ║" << std::endl;
std::cout << "║ DeepStream will display : scaled to output resolution ║" << std::endl;
std::cout << "╚══════════════════════════════════════════════════════════════════════════════╝" << std::endl;
std::vector<NvDsInferParseObjectInfo> objects;
const NvDsInferLayerInfo& output = outputLayersInfo[0];
const uint outputSize = output.inferDims.d[0];
std::vector<NvDsInferParseObjectInfo> outObjs = decodeTensorYolo((const float*) (output.buffer), outputSize,
networkInfo.width, networkInfo.height, detectionParams.perClassPreclusterThreshold);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
// 🚀 FINAL DEBUG: What's actually being sent to DeepStream
std::cout << "\n🚀 FINAL OUTPUT TO DEEPSTREAM:" << std::endl;
std::cout << "╔══════════════════════════════════════════════════════════════════════════════╗" << std::endl;
std::cout << "║ EXACT DATA SENT TO DEEPSTREAM OSD ║" << std::endl;
std::cout << "╚══════════════════════════════════════════════════════════════════════════════╝" << std::endl;
if (objects.size() == 0) {
std::cout << "⚠️ NO OBJECTS TO SEND TO DEEPSTREAM - Nothing will be drawn" << std::endl;
} else {
for (size_t i = 0; i < objects.size(); i++) {
const auto& obj = objects[i];
std::cout << "\n🎯 DeepStream Object [" << (i+1) << "/" << objects.size() << "]:" << std::endl;
std::cout << " → ClassID: " << obj.classId << " (Person)" << std::endl;
std::cout << " → Confidence: " << std::fixed << std::setprecision(3) << obj.detectionConfidence << std::endl;
std::cout << " → EXACT OSD Coordinates (" << networkInfo.width << "x" << networkInfo.height << "):" << std::endl;
std::cout << " left=" << (int)obj.left << ", top=" << (int)obj.top << ", width=" << (int)obj.width << ", height=" << (int)obj.height << std::endl;
std::cout << " → Top-Left: (" << (int)obj.left << ", " << (int)obj.top << ")" << std::endl;
std::cout << " → Bottom-Right: (" << (int)(obj.left + obj.width) << ", " << (int)(obj.top + obj.height) << ")" << std::endl;
// Show expected display size if using 640x640 model
if (networkInfo.width == 640 && networkInfo.height == 640) {
float scale_x = 1920.0f / 640.0f;
float scale_y = 1080.0f / 640.0f;
std::cout << " → Expected Display Size (if 1920x1080 output):" << std::endl;
std::cout << " left=" << (int)(obj.left * scale_x) << ", top=" << (int)(obj.top * scale_y)
<< ", width=" << (int)(obj.width * scale_x) << ", height=" << (int)(obj.height * scale_y) << std::endl;
}
std::cout << " ═══════════════════════════════════════════════════════════════" << std::endl;
}
}
std::cout << "\n📺 NEXT STEP: DeepStream OSD will draw " << objects.size() << " bounding boxes on video" << std::endl;
std::cout << " → If display shows DIFFERENT sizes than above coordinates," << std::endl;
std::cout << " then tracker/DeepStream is modifying them post-processing." << std::endl;
std::cout << " → If display shows SAME sizes as above coordinates," << std::endl;
std::cout << " then this is exactly what YOLO model detected." << std::endl;
std::cout << "═══════════════════════════════════════════════════════════════════════════════════════\n" << std::endl;
objectList = objects;
return true;
}
extern "C" bool
NvDsInferParseYolo(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
{
return NvDsInferParseCustomYolo(outputLayersInfo, networkInfo, detectionParams, objectList);
}
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo);
and the result is
🎯 DeepStream Object [1/4]:
→ ClassID: 0 (Person)
→ Confidence: 0.471
→ EXACT OSD Coordinates (640x640):
left=473, top=317, width=14, height=47
→ Top-Left: (473, 317)
→ Bottom-Right: (487, 364)
→ Expected Display Size (if 1920x1080 output):
left=1419, top=535, width=42, height=79
and then I have plotted the bbox info to image but it is drawn on the half of the object,
but using deepstream-app it is plotting to whole object.
My image size is 1920x1080, detected object bbox info must be around left=1419, top=535, width=42, height=150
there is no issue with left and top and width values, but height is decreased by 2
@anyone can help me?