jetson-inference’s detectnet.cpp accepts multi-images (batch) input detection.
tensorNet.cpp’s line 1141 has mMaxBatchSize = engine->getMaxBatchSize();
.
Then line 1208
size_t inputSize = mMaxBatchSize * DIMS_C(inputDims) * DIMS_H(inputDims) * DIMS_W(inputDims) * sizeof(float);
and line 1269
size_t outputSize = mMaxBatchSize * DIMS_C(outputDims) * DIMS_H(outputDims) * DIMS_W(outputDims) * sizeof(float);
have different allocations for multiple input images.
But detectnet.cpp
const int numDetections = net->Detect(image, input->GetWidth(), input->GetHeight(), &detections, overlayFlags);
if( numDetections > 0 )
{
LogVerbose("%i objects detected\n", numDetections);
for( int n=0; n < numDetections; n++ )
{
LogVerbose("detected obj %i class #%u (%s) confidence=%f\n", n, detections[n].ClassID, net->GetClassDesc(detections[n].ClassID), detections[n].Confidence);
LogVerbose("bounding box %i (%f, %f) (%f, %f) w=%f h=%f\n", n, detections[n].Left, detections[n].Top, detections[n].Right, detections[n].Bottom, detections[n].Width(), detections[n].Height());
}
}
and detectNet.cpp (line930-1011)
int detectNet::clusterDetections( Detection* detections, uint32_t width, uint32_t height )
{
// cluster detection bboxes
float* net_cvg = mOutputs[OUTPUT_CVG].CPU;
float* net_rects = mOutputs[OUTPUT_BBOX].CPU;
const int ow = DIMS_W(mOutputs[OUTPUT_BBOX].dims); // number of columns in bbox grid in X dimension
const int oh = DIMS_H(mOutputs[OUTPUT_BBOX].dims); // number of rows in bbox grid in Y dimension
const int owh = ow * oh; // total number of bbox in grid
const int cls = GetNumClasses(); // number of object classes in coverage map
const float cell_width = /*width*/ GetInputWidth() / ow;
const float cell_height = /*height*/ GetInputHeight() / oh;
const float scale_x = float(width) / float(GetInputWidth());
const float scale_y = float(height) / float(GetInputHeight());
#ifdef DEBUG_CLUSTERING
LogDebug(LOG_TRT "input width %i height %i\n", (int)DIMS_W(mInputDims), (int)DIMS_H(mInputDims));
LogDebug(LOG_TRT "cells x %i y %i\n", ow, oh);
LogDebug(LOG_TRT "cell width %f height %f\n", cell_width, cell_height);
LogDebug(LOG_TRT "scale x %f y %f\n", scale_x, scale_y);
#endif
// extract and cluster the raw bounding boxes that meet the coverage threshold
int numDetections = 0;
for( uint32_t z=0; z < cls; z++ ) // z = current object class
{
for( uint32_t y=0; y < oh; y++ )
{
for( uint32_t x=0; x < ow; x++)
{
const float coverage = net_cvg[z * owh + y * ow + x];
if( coverage > mCoverageThreshold )
{
const float mx = x * cell_width;
const float my = y * cell_height;
const float x1 = (net_rects[0 * owh + y * ow + x] + mx) * scale_x; // left
const float y1 = (net_rects[1 * owh + y * ow + x] + my) * scale_y; // top
const float x2 = (net_rects[2 * owh + y * ow + x] + mx) * scale_x; // right
const float y2 = (net_rects[3 * owh + y * ow + x] + my) * scale_y; // bottom
#ifdef DEBUG_CLUSTERING
LogDebug(LOG_TRT "rect x=%u y=%u cvg=%f %f %f %f %f \n", x, y, coverage, x1, x2, y1, y2);
#endif
// merge with list, checking for overlaps
bool detectionMerged = false;
for( uint32_t n=0; n < numDetections; n++ )
{
if( detections[n].ClassID == z && detections[n].Expand(x1, y1, x2, y2) )
{
detectionMerged = true;
break;
}
}
// create new entry if the detection wasn't merged with another detection
if( !detectionMerged )
{
detections[numDetections].Instance = numDetections;
detections[numDetections].ClassID = z;
detections[numDetections].Confidence = coverage;
detections[numDetections].Left = x1;
detections[numDetections].Top = y1;
detections[numDetections].Right = x2;
detections[numDetections].Bottom = y2;
numDetections++;
}
}
}
}
}
return numDetections;
}
don’t have separation for different images in batch.
How to get separation of detection outputs for batch inference?