• Hardware Platform (GPU)
• DeepStream Version
Deepstream 7.0
hi sir
i was trying to integrate yolov9 deepstream to midas for depth estimation
this is the github file which i was refering
i edited the nvdsinfer_yolo.cpp file in the nvdsinfer_yolo folder for passing the data to my midas python file i was able to get the x1 x2 y1 y2 bounding box coordinates i need to pass the video frame also to the python file
i was unable to get that video frame please help me
i am pasting the my edited nvdsinfer_yolo.cpp file
include
include
include “nvdsinfer_custom_impl.h”
include
include
define MIN(a,b) ((a) < (b) ? (a) : (b))
define MAX(a,b) ((a) > (b) ? (a) : (b))
define CLIP(a,min,max) (MAX(MIN(a, max), min))
/* C-linkage to prevent name-mangling */
extern “C”
bool NvDsInferYoloNMS (std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
extern “C” bool NvDsInferYoloMask(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
extern “C”
bool NvDsInferYoloNMS (std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList) {
if(outputLayersInfo.size() != 5)
{
std::cerr << “Mismatch in the number of output buffers.”
<< “Expected 4 output buffers, detected in the network :”
<< outputLayersInfo.size() << std::endl;
return false;
}
auto layerFinder = [&outputLayersInfo](const std::string &name)
-> const NvDsInferLayerInfo *{
for (auto &layer : outputLayersInfo) {
if (layer.layerName && name == layer.layerName) {
return &layer;
}
}
return nullptr;
};
const NvDsInferLayerInfo *num_detsLayer = layerFinder("num_dets");
const NvDsInferLayerInfo *boxesLayer = layerFinder("det_boxes");
const NvDsInferLayerInfo *scoresLayer = layerFinder("det_scores");
const NvDsInferLayerInfo *classesLayer = layerFinder("det_classes");
const NvDsInferLayerInfo *indicesLayer = layerFinder("det_indices");
if (!num_detsLayer || !boxesLayer || !scoresLayer || !classesLayer || !indicesLayer) {
if (!num_detsLayer) {
std::cerr << " - num_detsLayer: Missing or unsupported data type." << std::endl;
}
if (!boxesLayer) {
std::cerr << " - boxesLayer: Missing or unsupported data type." << std::endl;
}
if (!scoresLayer) {
std::cerr << " - scoresLayer: Missing or unsupported data type." << std::endl;
}
if (!classesLayer) {
std::cerr << " - classesLayer: Missing or unsupported data type." << std::endl;
}
if (!indicesLayer) {
std::cerr << " - indicesLayer: Missing or unsupported data type." << std::endl;
}
return false;
}
if(num_detsLayer->inferDims.numDims != 1U) {
std::cerr << "Network num_dets dims is : " <<
num_detsLayer->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(boxesLayer->inferDims.numDims != 2U) {
std::cerr << "Network det_boxes dims is : " <<
boxesLayer->inferDims.numDims << " expect is 2"<< std::endl;
return false;
}
if(scoresLayer->inferDims.numDims != 1U) {
std::cerr << "Network det_scores dims is : " <<
scoresLayer->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(classesLayer->inferDims.numDims != 1U) {
std::cerr << "Network det_classes dims is : " <<
classesLayer->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(indicesLayer->inferDims.numDims != 1U) {
std::cerr << "Network det_indices dims is : " <<
indicesLayer->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
const char* log_enable = std::getenv("ENABLE_DEBUG");
int* p_keep_count = (int *) num_detsLayer->buffer;
int* p_indices = (int *) indicesLayer->buffer;
float* p_bboxes = (float *) boxesLayer->buffer;
NvDsInferDims inferDims_p_bboxes = boxesLayer->inferDims;
int numElements_p_bboxes=inferDims_p_bboxes.numElements;
float* p_scores = (float *) scoresLayer->buffer;
unsigned int* p_classes = (unsigned int *) classesLayer->buffer;
const float threshold = detectionParams.perClassThreshold[0];
float max_bbox=0;
for (int i=0; i < numElements_p_bboxes; i++)
{
if ( max_bbox < p_bboxes[i] )
max_bbox=p_bboxes[i];
}
if (p_keep_count[0] > 0)
{
assert (!(max_bbox < 2.0));
for (int i = 0; i < p_keep_count[0]; i++) {
if ( p_scores[i] < threshold) continue;
if ((unsigned int) p_classes[i] >= detectionParams.numClassesConfigured) {
printf("Error: The number of classes configured in the GIE config-file (postprocess > num_detected_classes) is incorrect.\n");
printf("Detected class index: %u\n", (unsigned int) p_classes[i]);
}
assert((unsigned int) p_classes[i] < detectionParams.numClassesConfigured);
//std::cout << "class: " << p_classes[i] << std::endl;
NvDsInferObjectDetectionInfo object;
object.classId = (int) p_classes[i];
object.detectionConfidence = p_scores[i];
std::cout << object.detectionConfidence << std::endl;
object.left=p_bboxes[4*i];
object.top=p_bboxes[4*i+1];
object.width=(p_bboxes[4*i+2] - object.left);
object.height= (p_bboxes[4*i+3] - object.top);
//std::cout << "x1: " << object.left << ", y1: " << object.top << ", x2: " << p_bboxes[4*i+2] << ", y2: " << p_bboxes[4*i+3] << std::endl;
Py_Initialize()
Pyobject *name, *load_module, *func, *callfunc, *args;
name = PyUnicode_fromString((char*)"midas");
load_module = PyImport_Import(name);
func = PyObject_GetAttrString(load_module,(char*)"midas_1")
args = PyTuple_Pack(5, PyFloat_FromDouble(object.left),PyFloat_FromDouble(object.top),PyFloat_FromDouble(p_bboxes[4*i+2]),PyFloat_FromDouble(p_bboxes[4*i+]))
callfunc = PyObject_CallObject(func,args);
double midas_out = PyFloat_AsDouble(callfunc)
Py_Finalize();
std::cout<<midas_out<<std::endl
if(log_enable != NULL && std::stoi(log_enable)) {
std::cout << "idx/label/conf/ x/y w/h -- "
<< p_indices[i] << " "
<< p_classes[i] << " "
<< p_scores[i] << " "
<< object.left << " " << object.top << " " << object.width << " "<< object.height << " "
<< std::endl;
}
object.left=CLIP(object.left, 0, networkInfo.width - 1);
object.top=CLIP(object.top, 0, networkInfo.height - 1);
object.width=CLIP(object.width, 0, networkInfo.width - 1);
object.height=CLIP(object.height, 0, networkInfo.height - 1);
objectList.push_back(object);
}
}
return true;
}
extern “C” bool NvDsInferYoloMask(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
if (outputLayersInfo.size() != 5) {
std::cerr << “Mismatch in the number of output buffers.”
<< “Expected 5 output buffers, detected in the network :”
<< outputLayersInfo.size() << std::endl;
return false;
}
auto layerFinder = [&outputLayersInfo](const std::string &name)
-> const NvDsInferLayerInfo *{
for (auto &layer : outputLayersInfo) {
if (layer.layerName && name == layer.layerName) {
return &layer;
}
}
return nullptr;
};
const NvDsInferLayerInfo *num_detsLayer = layerFinder("num_dets");
const NvDsInferLayerInfo *boxesLayer = layerFinder("det_boxes");
const NvDsInferLayerInfo *scoresLayer = layerFinder("det_scores");
const NvDsInferLayerInfo *classesLayer = layerFinder("det_classes");
const NvDsInferLayerInfo *masksLayer = layerFinder("det_masks");
if (!num_detsLayer || !boxesLayer || !scoresLayer || !classesLayer || !masksLayer) {
if (!num_detsLayer) {
std::cerr << " - num_detsLayer: Missing or unsupported data type." << std::endl;
}
if (!boxesLayer) {
std::cerr << " - boxesLayer: Missing or unsupported data type." << std::endl;
}
if (!scoresLayer) {
std::cerr << " - scoresLayer: Missing or unsupported data type." << std::endl;
}
if (!classesLayer) {
std::cerr << " - classesLayer: Missing or unsupported data type." << std::endl;
}
if (!masksLayer) {
std::cerr << " - masksLayer: Missing or unsupported data type." << std::endl;
}
return false;
}
if(num_detsLayer->inferDims.numDims != 1U) {
std::cerr << "Network num_dets dims is : " <<
num_detsLayer->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(boxesLayer->inferDims.numDims != 2U) {
std::cerr << "Network det_boxes dims is : " <<
boxesLayer->inferDims.numDims << " expect is 2"<< std::endl;
return false;
}
if(scoresLayer->inferDims.numDims != 1U) {
std::cerr << "Network det_scores dims is : " <<
scoresLayer->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(classesLayer->inferDims.numDims != 1U) {
std::cerr << "Network det_classes dims is : " <<
classesLayer->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(masksLayer->inferDims.numDims != 2U) {
std::cerr << "Network det_masks dims is : " <<
masksLayer->inferDims.numDims << " expect is 2"<< std::endl;
return false;
}
const char* log_enable = std::getenv("ENABLE_DEBUG");
int* p_keep_count = (int *) num_detsLayer->buffer;
float* p_bboxes = (float *) boxesLayer->buffer;
float* p_scores = (float *) scoresLayer->buffer;
unsigned int* p_classes = (unsigned int *) classesLayer->buffer;
float *p_mask = (float *) masksLayer->buffer;
const float threshold = detectionParams.perClassThreshold[0];
NvDsInferDims inferDims_p_bboxes = boxesLayer->inferDims;
int numElements_p_bboxes=inferDims_p_bboxes.numElements;
const int mask_resolution = sqrt(masksLayer->inferDims.d[1]);
if(log_enable != NULL && std::stoi(log_enable)) {
std::cout << "keep cout: " << p_keep_count[0] << std::endl;
}
float max_bbox=0;
for (int i=0; i < numElements_p_bboxes; i++){
if ( max_bbox < p_bboxes[i] )
max_bbox=p_bboxes[i];
}
if (p_keep_count[0] > 0){
assert (!(max_bbox < 2.0));
for (int i = 0; i < p_keep_count[0]; i++) {
if ( p_scores[i] < threshold) continue;
if ((unsigned int) p_classes[i] >= detectionParams.numClassesConfigured) {
printf("Error: The number of classes configured in the GIE config-file (postprocess > num_detected_classes) is incorrect.\n");
printf("Detected class index: %u\n", (unsigned int) p_classes[i]);
continue;
}
//assert((unsigned int) p_classes[i] < detectionParams.numClassesConfigured);
NvDsInferInstanceMaskInfo object;
object.classId = (int) p_classes[i];
object.detectionConfidence = p_scores[i];
object.left=p_bboxes[4*i];
object.top=p_bboxes[4*i+1];
object.width=(p_bboxes[4*i+2] - object.left);
object.height= (p_bboxes[4*i+3] - object.top);
if (log_enable != NULL && std::stoi(log_enable)) {
std::cout << "label/conf/ x/y w/h -- "
<< p_classes[i] << " "
<< p_scores[i] << " "
<< object.left << " " << object.top << " " << object.width << " "<< object.height << " "
<< std::endl;
}
object.left=CLIP(object.left, 0, networkInfo.width - 1);
object.top=CLIP(object.top, 0, networkInfo.height - 1);
object.width=CLIP(object.width, 0, networkInfo.width - 1);
object.height=CLIP(object.height, 0, networkInfo.height - 1);
object.mask_size = sizeof(float) * mask_resolution * mask_resolution;
object.mask = new float[mask_resolution * mask_resolution];
object.mask_width = mask_resolution;
object.mask_height = mask_resolution;
const float* rawMask = reinterpret_cast<const float*>(p_mask + i * mask_resolution * mask_resolution);
//float *rawMask = reinterpret_cast<float *>(p_mask + mask_resolution * mask_resolution * i);
memcpy(object.mask, rawMask, sizeof(float) * mask_resolution * mask_resolution);
objectList.push_back(object);
}
}
return true;
}
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferYoloNMS);
CHECK_CUSTOM_INSTANCE_MASK_PARSE_FUNC_PROTOTYPE(NvDsInferYoloMask);
here how i can pass the video frame from the NvdsInferyoloNMS function to the python file midas