DetectnetV2 C++ tensorrt inference,no result

I got the trt engine file using the following command
./tlt-converter resnet34_peoplenet_pruned.etlt -k tlt_encode -o output_cov/Sigmoid,output_bbox/BiasAdd -d 3,544,960 -i nchw -t fp16 -e resnet34_peoplenet_pruned.etlt_b1_gpu0_fp16.engine -m 1 -b 1

using the following code to process but got no outputs

```cpp

/* OpenCV headers */
#include <opencv2/core/core.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/imgcodecs/imgcodecs.hpp>

#include <iostream>
#include <fstream>
#include <cudnn.h>
#include "NvInfer.h"
#include "NvInferPlugin.h"

//#define MIN(a,b) ((a) < (b) ? (a) : (b))
//#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define CLIP(a,min,max) (MAX(MIN(a, max), min))
#define DIVIDE_AND_ROUND_UP(a, b) ((a + b - 1) / b)
using namespace cv;
using namespace std;

class Logger : public nvinfer1::ILogger
	void log(nvinfer1::ILogger::Severity severity, const char* msg) override
		// suppress info-level messages
		if (severity == Severity::kINFO) return;

		switch (severity)
			case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
			case Severity::kERROR: std::cerr << "ERROR: "; break;
			case Severity::kWARNING: std::cerr << "WARNING: "; break;
			case Severity::kINFO: std::cerr << "INFO: "; break;
			default: std::cerr << "UNKNOWN: "; break;
		std::cerr << msg << std::endl;

int main()
	std::string engineFilePath = "/opt/nvidia/deepstream/deepstream-5.0/samples/models/tlt_pretrained_models/peoplenet/resnet34_peoplenet_pruned.etlt_b1_gpu0_fp16.engine";
	std::string imagePath = "test.jpg";

	// General parameters
	uint16_t m_InputH, m_OutputW;
	uint16_t m_InputW, m_OutputH;
	uint16_t m_InputC;
	uint64_t m_InputSize, m_OutputBBoxSize, m_OutputConfidenceSize;
	uint16_t m_NumOutputClasses;

	// TRT specific parameters
	uint16_t m_maxBatchSize = 1;
	int m_InputIndex = -1;
	int m_OutputBboxIndex = -1, m_OutputClassIndex = -1;

	Logger m_Logger;
	nvinfer1::ICudaEngine* m_Engine;
	nvinfer1::IExecutionContext* m_Context;
	nvinfer1::IRuntime* runtime;

	std::vector<void*> m_Bindings;
	std::vector<float*> m_TrtOutputBuffers;  
	cudaStream_t m_CudaStream;

	m_InputW = 960;
	m_InputH = 544;
	m_InputC = 3;
	m_OutputW = 60;
	m_OutputH = 34;
	m_NumOutputClasses = 3;

	m_InputSize = m_InputW * m_InputH * m_InputC;
	m_OutputBBoxSize = m_OutputW * m_OutputH * m_NumOutputClasses * 4;
	m_OutputConfidenceSize = m_OutputW * m_OutputH * m_NumOutputClasses;
	// Deserializing engine	
	// reading the model in memory
	std::cout << "[Info] Loading TRT Engine...\n";
	std::stringstream trtModelStream;
	trtModelStream.seekg(0, trtModelStream.beg);
	std::ifstream cache(engineFilePath);
	trtModelStream << cache.rdbuf();

	// calculating model size
	trtModelStream.seekg(0, std::ios::end);
	const int modelSize = trtModelStream.tellg();
	trtModelStream.seekg(0, std::ios::beg);
	void* modelMem = malloc(modelSize);*) modelMem, modelSize);

	runtime = nvinfer1::createInferRuntime(m_Logger);
	m_Engine = runtime->deserializeCudaEngine(modelMem, modelSize, nullptr);
	std::cout << "[Info] Loading Complete!\n";

	if(m_Engine == nullptr)
		std::cout << "[Error] TensorRT engine loading failed\n";
		return -1;	

	m_Context = m_Engine->createExecutionContext();
	if(m_Context == nullptr)
		std::cout << "[Error] TensorRT getting context failed\n";
		return -2;	

	// Get the bindings
	std::cout << "[Info] Getting the Bindings...\n";
	m_Bindings.resize(m_Engine->getNbBindings(), nullptr);
	m_TrtOutputBuffers.resize(m_Engine->getNbBindings() - 1, nullptr);	
	m_InputIndex = m_Engine->getBindingIndex("input_1");
	m_OutputBboxIndex = m_Engine->getBindingIndex("output_bbox/BiasAdd");
	m_OutputClassIndex = m_Engine->getBindingIndex("output_cov/Sigmoid");
	if (m_InputIndex == -1 || m_OutputBboxIndex == -1 || m_OutputClassIndex == -1)
		std::cout << "[Error] TensorRT binding not found\n";
		return -3;
	std::cout << "[Info] Bindings size : " << m_Engine->getNbBindings() << "\n";
	std::cout << "[Info] Bindings " << m_InputIndex << " " << m_OutputBboxIndex << " " << m_OutputClassIndex << "\n";
	// Allocate Buffers	
	(cudaMalloc(&, m_maxBatchSize * m_InputSize * sizeof(float)));
	(cudaMalloc(&, m_maxBatchSize * m_OutputBBoxSize * sizeof(float)));
	(cudaMalloc(&, m_maxBatchSize * m_OutputConfidenceSize * sizeof(float)));
	(cudaMallocHost(&m_TrtOutputBuffers[0], m_OutputBBoxSize * m_maxBatchSize * sizeof(float)));
	(cudaMallocHost(&m_TrtOutputBuffers[1], m_OutputConfidenceSize * m_maxBatchSize * sizeof(float)));

	// Loading input image to device
	std::cout << "[Info] Loading input image\n";
	Mat inputImage = imread(imagePath);
	//cv::cvtColor(inputImage, inputImage, cv::COLOR_BGR2RGB);
	Mat inferImage = cv::dnn::blobFromImage(inputImage, 0.0039215697906911373, cv::Size(m_InputW, m_InputH), cv::Scalar(0.0, 0.0, 0.0), true, false);
	//Mat inferImage = cv::dnn::blobFromImage(inputImage, 1.0, cv::Size(m_InputW, m_InputH), cv::Scalar(0.0, 0.0, 0.0), false, false);
								  m_maxBatchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice,

	// Running Inference
	std::cout << "[Info] Running Inference\n";
	m_Context->enqueue(m_maxBatchSize,, m_CudaStream, nullptr);
								  m_maxBatchSize * m_OutputBBoxSize * sizeof(float),
								  cudaMemcpyDeviceToHost, m_CudaStream);
								  m_maxBatchSize * m_OutputConfidenceSize * sizeof(float),
								  cudaMemcpyDeviceToHost, m_CudaStream);

	// Decoding output buffers
	std::cout << "[Info] Decoding the output Buffers\n";
	int gridW = m_OutputW;
	int gridH = m_OutputH;
	int gridSize = gridW * gridH;
	float gcCentersX[gridW];
	float gcCentersY[gridH];
	float bboxNormX = 35.0;
	float bboxNormY = 35.0;
	float* outputBboxBuf = &[0];
	float* outputCovBuf = &[0];

	int strideX = DIVIDE_AND_ROUND_UP(m_InputW, gridW);
	int strideY = DIVIDE_AND_ROUND_UP(m_InputH, gridH);

	for (int i = 0; i < gridW; i++)
		gcCentersX[i] = (float)(i * strideX + 0.5);
		gcCentersX[i] /= (float)bboxNormX;
	for (int i = 0; i < gridH; i++)
		gcCentersY[i] = (float)(i * strideY + 0.5);
		gcCentersY[i] /= (float)bboxNormY;

	for (int c = 0; c < m_NumOutputClasses; c++)
		float *outputX1 = outputBboxBuf + (c * 4 * gridW * gridH);

		float *outputY1 = outputX1 + gridSize;
		float *outputX2 = outputY1 + gridSize;
		float *outputY2 = outputX2 + gridSize;

		float threshold = 0.1;//detectionParams.perClassPreclusterThreshold[c];
		for (int h = 0; h < gridH; h++)
			for (int w = 0; w < gridW; w++)
				int i = w + h * gridW;
				float confidence = outputCovBuf[c * gridSize + i];
				if (confidence >= threshold)
					//NvDsInferObjectDetectionInfo object;
					float rectX1f, rectY1f, rectX2f, rectY2f;

					rectX1f = (outputX1[w + h * gridW] - gcCentersX[w]) * -bboxNormX;
					rectY1f = (outputY1[w + h * gridW] - gcCentersY[h]) * -bboxNormY;
					rectX2f = (outputX2[w + h * gridW] + gcCentersX[w]) * bboxNormX;
					rectY2f = (outputY2[w + h * gridW] + gcCentersY[h]) * bboxNormY;

					//object.classId = c;
					//object.detectionConfidence = outputCovBuf[c * gridSize + i];

					/* Clip object box co-ordinates to network resolution */
					rectX1f = CLIP(rectX1f, 0, m_InputW - 1);
					rectY1f = CLIP(rectY1f, 0, m_InputH - 1);
					rectX2f = CLIP(rectX2f, 0, m_InputW - 1);
					rectY2f = CLIP(rectY2f, 0, m_InputH - 1);

					//Prevent underflows
					if(((rectX2f - rectX1f) < 0) || ((rectY2f - rectY1f) < 0))

					// Detected boxes
					std::cout << "[Info] ClassIdx : " << c << " BBox : " << rectX1f << "," << rectY1f << "," << (rectX2f) << "," << (rectY2f) << "," << confidence << "\n";          
	//imshow("Display", inputImage);

	return 0;

Can you try to debug your code firstly to check where has output and where has no output?

what’s the difference between enqueue and execute? execute works,using tensorrt

See TensorRT: nvinfer1::IExecutionContext Class Reference
