Jetson-inference's detectnet.cpp for batch of images detection

jetson-inference’s detectnet.cpp accepts multi-images (batch) input detection.
tensorNet.cpp’s line 1141 has mMaxBatchSize = engine->getMaxBatchSize();.

Then line 1208
size_t inputSize = mMaxBatchSize * DIMS_C(inputDims) * DIMS_H(inputDims) * DIMS_W(inputDims) * sizeof(float);

and line 1269
size_t outputSize = mMaxBatchSize * DIMS_C(outputDims) * DIMS_H(outputDims) * DIMS_W(outputDims) * sizeof(float);

have different allocations for multiple input images.

But detectnet.cpp

       const int numDetections = net->Detect(image, input->GetWidth(), input->GetHeight(), &detections, overlayFlags);
		
		if( numDetections > 0 )
		{
			LogVerbose("%i objects detected\n", numDetections);
		
			for( int n=0; n < numDetections; n++ )
			{
				LogVerbose("detected obj %i  class #%u (%s)  confidence=%f\n", n, detections[n].ClassID, net->GetClassDesc(detections[n].ClassID), detections[n].Confidence);
				LogVerbose("bounding box %i  (%f, %f)  (%f, %f)  w=%f  h=%f\n", n, detections[n].Left, detections[n].Top, detections[n].Right, detections[n].Bottom, detections[n].Width(), detections[n].Height()); 
			}
		}	

and detectNet.cpp (line930-1011)

int detectNet::clusterDetections( Detection* detections, uint32_t width, uint32_t height )
{
	// cluster detection bboxes
	float* net_cvg   = mOutputs[OUTPUT_CVG].CPU;
	float* net_rects = mOutputs[OUTPUT_BBOX].CPU;
	
	const int ow  = DIMS_W(mOutputs[OUTPUT_BBOX].dims);	// number of columns in bbox grid in X dimension
	const int oh  = DIMS_H(mOutputs[OUTPUT_BBOX].dims);	// number of rows in bbox grid in Y dimension
	const int owh = ow * oh;							// total number of bbox in grid
	const int cls = GetNumClasses();					// number of object classes in coverage map
	
	const float cell_width  = /*width*/ GetInputWidth() / ow;
	const float cell_height = /*height*/ GetInputHeight() / oh;
	
	const float scale_x = float(width) / float(GetInputWidth());
	const float scale_y = float(height) / float(GetInputHeight());

#ifdef DEBUG_CLUSTERING	
	LogDebug(LOG_TRT "input width %i height %i\n", (int)DIMS_W(mInputDims), (int)DIMS_H(mInputDims));
	LogDebug(LOG_TRT "cells x %i  y %i\n", ow, oh);
	LogDebug(LOG_TRT "cell width %f  height %f\n", cell_width, cell_height);
	LogDebug(LOG_TRT "scale x %f  y %f\n", scale_x, scale_y);
#endif

	// extract and cluster the raw bounding boxes that meet the coverage threshold
	int numDetections = 0;

	for( uint32_t z=0; z < cls; z++ )	// z = current object class
	{
		for( uint32_t y=0; y < oh; y++ )
		{
			for( uint32_t x=0; x < ow; x++)
			{
				const float coverage = net_cvg[z * owh + y * ow + x];
				
				if( coverage > mCoverageThreshold )
				{
					const float mx = x * cell_width;
					const float my = y * cell_height;
					
					const float x1 = (net_rects[0 * owh + y * ow + x] + mx) * scale_x;	// left
					const float y1 = (net_rects[1 * owh + y * ow + x] + my) * scale_y;	// top
					const float x2 = (net_rects[2 * owh + y * ow + x] + mx) * scale_x;	// right
					const float y2 = (net_rects[3 * owh + y * ow + x] + my) * scale_y;	// bottom 
					
				#ifdef DEBUG_CLUSTERING
					LogDebug(LOG_TRT "rect x=%u y=%u  cvg=%f  %f %f   %f %f \n", x, y, coverage, x1, x2, y1, y2);
				#endif		

					// merge with list, checking for overlaps
					bool detectionMerged = false;

					for( uint32_t n=0; n < numDetections; n++ )
					{
						if( detections[n].ClassID == z && detections[n].Expand(x1, y1, x2, y2) )
						{
							detectionMerged = true;
							break;
						}
					}

					// create new entry if the detection wasn't merged with another detection
					if( !detectionMerged )
					{
						detections[numDetections].Instance   = numDetections;
						detections[numDetections].ClassID    = z;
						detections[numDetections].Confidence = coverage;
					
						detections[numDetections].Left   = x1;
						detections[numDetections].Top    = y1;
						detections[numDetections].Right  = x2;
						detections[numDetections].Bottom = y2;
					
						numDetections++;
					}
				}
			}
		}
	}
	
	return numDetections;
}

don’t have separation for different images in batch.

How to get separation of detection outputs for batch inference?

Hi @edit_or,
I recommend you to raise this issue on https://github.com/dusty-nv/jetson-inference/issues .
Thanks!