Description:I am using the YOLOv8 object detection model, which we trained to detect objects like shoes and goggles and integrated into DeepStream for object detection.
Issue : I created a parser for the YOLOv8 object detection model and added it to DeepStream, but I am getting a black screen in the display window, as in the given screenshot below:
Environment
TensorRT Version: 8.5.2
GPU Type: Jetson Xavier NX
Nvidia Driver Version: jetpack version 5.1.4
CUDA Version: 11.5
Operating System + Version: Ubuntu -20.4
Python Version (if applicable): python -3.8
Relevant Files
My parser code is :
include “nvdsinfer_custom_impl.h”
include
include
include
include
include
include
include <unordered_map>
define DIVUP(x, y) (((x) + (y) - 1) / (y))
static const int NUM_CLASSES_YOLO = 5; // Number of classes for YOLOv8 object detection
float clamp(const float val, const float minVal, const float maxVal) {
assert(minVal <= maxVal);
return std::min(maxVal, std::max(minVal, val));
}
static NvDsInferParseObjectInfo
// convertBBoxYolo(const float &bx, const float &by, const float &bw,
// const float &bh, const uint &netW, const uint &netH) {
convertBBoxYolo(const float &bx, const float &by, const float &bw,
const float &bh, const int &stride, const uint &netW,
const uint &netH) {
NvDsInferParseObjectInfo b;
float xCenter = bx * stride;
float yCenter = by * stride;
float x0 = xCenter - bw / 2;
float y0 = yCenter - bh / 2;
float x1 = x0 + bw;
float y1 = y0 + bh;
x0 = clamp(x0, 0, netW);
y0 = clamp(y0, 0, netH);
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
b.left = x0;
b.width = clamp(x1 - x0, 0, netW);
b.top = y0;
b.height = clamp(y1 - y0, 0, netH);
return b;
}
static void addBBoxProposalYolo(const float bx, const float by, const float bw,
const float bh, const uint stride,const uint &netW, const uint &netH,
const int maxIndex, const float maxProb,
std::vector &binfo) {
NvDsInferParseObjectInfo bbi = convertBBoxYolo(bx, by, bw, bh, stride, netW, netH);
if (bbi.width < 1 || bbi.height < 1)
return;
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
binfo.push_back(bbi);
}
static bool
NvDsInferParseYoloV8(std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList) {
if (outputLayersInfo.empty()) {
std::cerr << “ERROR: No output layers found in bbox parsing.” << std::endl;
return false;
}
const NvDsInferLayerInfo &layer = outputLayersInfo[0];
// Print tensor shape
std::cerr << “Input Tensor Shape: [”;
for (int i = 0; i < layer.inferDims.numDims; i++) {
std::cerr << layer.inferDims.d[i] << (i < layer.inferDims.numDims - 1 ? ", " : “”);
}
std::cerr << “]” << std::endl;
// Validate the output layer dimensions
if (layer.inferDims.numDims != 2 || layer.inferDims.d[0] != 9) {
std::cerr << “ERROR: Invalid output layer dimensions. Expected shape: [9, 8400].” << std::endl;
return false;
}
std::vector objects;
float *data = (float *)layer.buffer;
int dimensions = layer.inferDims.d[0]; // Should be 9 (4 box coords + 1 objectness + 4 class scores)
int rows = layer.inferDims.d[1]; // Should be 8400 (number of detections)
const uint gridSize = sqrt(rows); // Approximate grid size
const uint stride = DIVUP(networkInfo.width, gridSize);
std::cerr << "Parsing YOLOv8 output with dimensions: " << dimensions
<< " rows: " << rows << ", calculated stride: " << stride << std::endl;
// // Log the dimensions for debugging
// std::cerr << "Parsing YOLOv8 output with dimensions: " << dimensions
// << " rows: " << rows << std::endl;
for (int i = 0; i < rows; ++i) {
// Process each detection
float bx = data[i * dimensions + 0]; // x_center
float by = data[i * dimensions + 1]; // y_center
float bw = data[i * dimensions + 2]; // width
float bh = data[i * dimensions + 3]; // height
float objectness = data[i * dimensions + 4]; // objectness score
float *classes_scores = &data[i * dimensions + 5]; // class scores start at index 5
// Find the class with highest probability
float maxScore = 0;
int maxIndex = 0;
for (int j = 0; j < NUM_CLASSES_YOLO; j++) {
if (classes_scores[j] > maxScore) {
maxIndex = j;
maxScore = classes_scores[j];
}
}
// Combine objectness with class score for final confidence
float confidence = objectness * maxScore;
// Log the parsed bounding box and confidence
std::cerr << "Detection " << i << ": bx=" << bx << ", by=" << by
<< ", bw=" << bw << ", bh=" << bh
<< ", confidence=" << confidence << ", class=" << maxIndex << std::endl;
// Add to list if above threshold
if (confidence > detectionParams.perClassThreshold[maxIndex]) {
addBBoxProposalYolo(bx, by, bw, bh, stride, networkInfo.width,
networkInfo.height, maxIndex, confidence, objects);
}
}
std::cerr << "Number of objects detected: " << objects.size() << std::endl;
for (const auto &obj : objects) {
std::cerr << “Object: left=” << obj.left << “, top=” << obj.top
<< “, width=” << obj.width << “, height=” << obj.height
<< “, confidence=” << obj.detectionConfidence
<< “, classId=” << obj.classId << std::endl;
}
objectList = objects;
return true;
}
extern “C” bool NvDsInferParseCustomYoloV8(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList) {
try {
return NvDsInferParseYoloV8(outputLayersInfo, networkInfo, detectionParams,
objectList);
} catch (const std::exception &e) {
std::cerr << "ERROR: Exception in NvDsInferParseCustomYoloV8: " << e.what() << std::endl;
return false;
} catch (…) {
std::cerr << “ERROR: Unknown exception in NvDsInferParseCustomYoloV8.” << std::endl;
return false;
}
}
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV8);
And the output log of Deep Stream app after adding the parser is :
yolov8_parser_error.txt (1.5 MB)
My config file is :
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
model-color-format=0
infer-dims=3;640;640
onnx-file=/home/atmecs/Documents/ppe detection/models/Goggles_Shoes/Goggels_Shoes.onnx
model-engine-file=/home/atmecs/Documents/ppe detection/models/Goggles_Shoes/Goggels_Shoes.engine
#int8-calib-file=calib.table
labelfile-path=/home/atmecs/Documents/labels.txt
batch-size=1
network-mode=0
num-detected-classes=5
interval=0
gie-unique-id=1
process-mode=1
network-type=0
1=DBSCAN, 2=NMS, 3= DBSCAN+NMS Hybrid, 4 = None(No clustering)
cluster-mode=2
maintain-aspect-ratio=1
symmetric-padding=1
#workspace-size=2000
#custom-lib-path=/home/atmecs/Desktop/DeepStream-Yolov8-Jetson-Nano/nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
custom-lib-path=/home/atmecs/Documents/ppe detection/yolov8 parser/yolov8n_goggle_shoe_parser.so
#parse-bbox-func-name=NvDsInferParseYolo
parse-bbox-func-name=NvDsInferParseCustomYoloV8
[class-attrs-all]
nms-iou-threshold=0.3
pre-cluster-threshold=0.25
topk=300
classes which my model is detecting is :
0- Safety Goggle
1- ToeGuard
2- Non Safety Shoes
3- Safety Shoes
4- Non Safety Goggles