• Hardware Platform (dGPU)
• DeepStream Version 6.2
• NVIDIA GPU Driver Version (valid for GPU only) 525.85.12
• Issue Type( questions, new requirements, bugs) question
Hi, I have an onnx model for a custom yolov7 model which provides rotation apart from bounding boxes. The custom model has output layer as follow:
Due to the model modification, nms layer could not be added to it while obtaining onnx using the export.py
file. Hence I do it manually.
When I convert it to engine file and infer with a custom parser, I get NaN
as x,y,w,h
when the scale-factor=1/255… just like the python pre-processing. But when I experiment and change this value to 1, it gives reasonable values of x,y,w&h but the confidence is so low that no detection is obtained.
My custom parser is defined as follows:
extern "C"
bool NvDsInferParseCustomYoloV7Obb (std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector<NvDsInferObjectDetectionInfo> &objectList) {
// std::map <int, string> layerName2id;
// for(int l=0; l<4; l++)
// {
// /*
// l: 0 502 3 48 80 193
// l: 1 537 3 24 40 193
// l: 2 572 3 12 20 193
// l: 3 output 15120 193
// */
// std::cout<< "\nl: " <<l << "\t" << outputLayersInfo[l].layerName;
// for(int jj=0; jj<outputLayersInfo[l].inferDims.numDims; jj++)
// std::cout<< " "<<outputLayersInfo[l].inferDims.d[jj]<<" ";
// }
float *buf = (float *)outputLayersInfo[3].buffer; // lname: output [15120, 193]
//std::cout<< "\nl: " <<4 << "\t" << outputLayersInfo[3].layerName;
if(outputLayersInfo.size() != 4)
{
std::cerr << "Mismatch in the number of output buffers."
<< "Expected 4 output buffers, detected in the network :"
<< outputLayersInfo.size() << std::endl;
return false;
}
std::vector<std::pair<float, float > > canchors;
std::vector<std::pair<int, int > > xy_offsets;
std::vector<int> strides;
for(int l=0; l<3; l++)
{
/*
l: 0 502
l: 1 537
l: 2 572
*/
//std::cout<< "\nl: " <<l << "\t" << outputLayersInfo[l].layerName;
// eg 3,20,20,193
int G1_MAX = outputLayersInfo[l].inferDims.d[1];
int G2_MAX = outputLayersInfo[l].inferDims.d[2];
for(int c=0; c<outputLayersInfo[l].inferDims.d[0]; c++)
{
for(int g1=0; g1<G1_MAX; g1++)
{
for(int g2=0; g2<G2_MAX; g2++)
{
xy_offsets.push_back({g2, g1});
strides.push_back(stride[l]);
float canchor_x = anchors[l][c][0] * stride[l];
float canchor_y = anchors[l][c][1] * stride[l];
canchors.push_back({canchor_x, canchor_y});
}
}
}
}
// Now add these offsets/values
for(int i = 0; i<15120; i++){ // TBI
int basic_pos = NUM_PROPOSALS * i;
if(buf[basic_pos + 4] < 0.2) continue;
double x = sigmoid(buf[basic_pos]);
double y = sigmoid(buf[basic_pos + 1]);
double w = sigmoid(buf[basic_pos + 2]);
double h = sigmoid(buf[basic_pos + 3]);
printf("\n old wh %f, %f \t new wh %f %f",buf[basic_pos + 1], buf[basic_pos + 2],w,h);
float conf = buf[basic_pos + 4];
return true;
// apply xy offset
x = (x * 2 - 0.5 + xy_offsets[i].first) * strides[i];
y = (y * 2 - 0.5 + xy_offsets[i].second) * strides[i];
// apply wh offset
w = pow((w * 2), 2) * canchors[i].first;
h = pow((h * 2), 2) * canchors[i].second;
int cls_id = 0;
float cls_score = 0.0;
for(int j=0; j<NUM_CLASSES; j++){
if(cls_score<buf[basic_pos + 5 + j]){
cls_score = buf[basic_pos + 5 + j];
cls_id = j;
}
}
// get rotation TODO
// generate bbox meta
int left = x-(w/2);
int top = y-(h/2);
NvDsInferObjectDetectionInfo res;
res.classId = cls_id;
res.detectionConfidence = conf;
res.left = left;
res.top = top;
res.width = w;
res.height = h;
if(res.height < 0 || res.width < 0)
continue;
objectList.push_back(res);
printf("\nid: %d xywh %f, %f, %f, %f",cls_id,x,y,w,h);
//std::cout<<"\n l "<<l<<" g1g2 "<<g1<<" "<<g2;
}
return true;
}
The engine i/p & o/p are detected as
INFO: [Implicit Engine Info]: layers num: 5
0 INPUT kHALF images 3x384x640
1 OUTPUT kHALF 502 3x48x80x193
2 OUTPUT kHALF 537 3x24x40x193
3 OUTPUT kHALF 572 3x12x20x193
4 OUTPUT kFLOAT output 15120x193
I use the last layer.
Please help me rectify this issue.