I got the trt engine file using the following command
./tlt-converter resnet34_peoplenet_pruned.etlt -k tlt_encode -o output_cov/Sigmoid,output_bbox/BiasAdd -d 3,544,960 -i nchw -t fp16 -e resnet34_peoplenet_pruned.etlt_b1_gpu0_fp16.engine -m 1 -b 1
using the following code to process but got no outputs
//g++ inferPeopleNet.cpp `pkg-config --cflags --libs opencv4` -I /usr/local/cuda-10.2/include -I /usr/include/aarch64-linux-gnu/ -L /usr/lib/aarch64-linux-gnu -lnvinfer -lnvinfer_plugin -L /usr/local/cuda-10.2/lib64 -lcudart -lcublas -lcurand
/* OpenCV headers */
#include <opencv2/core/core.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/imgcodecs/imgcodecs.hpp>
#include <iostream>
#include <fstream>
#include <cudnn.h>
#include "NvInfer.h"
#include "NvInferPlugin.h"
//#define MIN(a,b) ((a) < (b) ? (a) : (b))
//#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define CLIP(a,min,max) (MAX(MIN(a, max), min))
#define DIVIDE_AND_ROUND_UP(a, b) ((a + b - 1) / b)
using namespace cv;
using namespace std;
class Logger : public nvinfer1::ILogger
{
public:
void log(nvinfer1::ILogger::Severity severity, const char* msg) override
{
// suppress info-level messages
if (severity == Severity::kINFO) return;
switch (severity)
{
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
case Severity::kERROR: std::cerr << "ERROR: "; break;
case Severity::kWARNING: std::cerr << "WARNING: "; break;
case Severity::kINFO: std::cerr << "INFO: "; break;
default: std::cerr << "UNKNOWN: "; break;
}
std::cerr << msg << std::endl;
}
};
int main()
{
std::string engineFilePath = "/opt/nvidia/deepstream/deepstream-5.0/samples/models/tlt_pretrained_models/peoplenet/resnet34_peoplenet_pruned.etlt_b1_gpu0_fp16.engine";
std::string imagePath = "test.jpg";
// General parameters
uint16_t m_InputH, m_OutputW;
uint16_t m_InputW, m_OutputH;
uint16_t m_InputC;
uint64_t m_InputSize, m_OutputBBoxSize, m_OutputConfidenceSize;
uint16_t m_NumOutputClasses;
// TRT specific parameters
uint16_t m_maxBatchSize = 1;
int m_InputIndex = -1;
int m_OutputBboxIndex = -1, m_OutputClassIndex = -1;
Logger m_Logger;
nvinfer1::ICudaEngine* m_Engine;
nvinfer1::IExecutionContext* m_Context;
nvinfer1::IRuntime* runtime;
std::vector<void*> m_Bindings;
std::vector<float*> m_TrtOutputBuffers;
cudaStream_t m_CudaStream;
m_InputW = 960;
m_InputH = 544;
m_InputC = 3;
m_OutputW = 60;
m_OutputH = 34;
m_NumOutputClasses = 3;
m_InputSize = m_InputW * m_InputH * m_InputC;
m_OutputBBoxSize = m_OutputW * m_OutputH * m_NumOutputClasses * 4;
m_OutputConfidenceSize = m_OutputW * m_OutputH * m_NumOutputClasses;
// Deserializing engine
// reading the model in memory
std::cout << "[Info] Loading TRT Engine...\n";
std::stringstream trtModelStream;
trtModelStream.seekg(0, trtModelStream.beg);
std::ifstream cache(engineFilePath);
assert(cache.good());
trtModelStream << cache.rdbuf();
cache.close();
// calculating model size
trtModelStream.seekg(0, std::ios::end);
const int modelSize = trtModelStream.tellg();
trtModelStream.seekg(0, std::ios::beg);
void* modelMem = malloc(modelSize);
trtModelStream.read((char*) modelMem, modelSize);
runtime = nvinfer1::createInferRuntime(m_Logger);
m_Engine = runtime->deserializeCudaEngine(modelMem, modelSize, nullptr);
free(modelMem);
runtime->destroy();
std::cout << "[Info] Loading Complete!\n";
if(m_Engine == nullptr)
{
std::cout << "[Error] TensorRT engine loading failed\n";
return -1;
}
m_Context = m_Engine->createExecutionContext();
if(m_Context == nullptr)
{
std::cout << "[Error] TensorRT getting context failed\n";
return -2;
}
// Get the bindings
std::cout << "[Info] Getting the Bindings...\n";
m_Bindings.resize(m_Engine->getNbBindings(), nullptr);
m_TrtOutputBuffers.resize(m_Engine->getNbBindings() - 1, nullptr);
m_InputIndex = m_Engine->getBindingIndex("input_1");
m_OutputBboxIndex = m_Engine->getBindingIndex("output_bbox/BiasAdd");
m_OutputClassIndex = m_Engine->getBindingIndex("output_cov/Sigmoid");
if (m_InputIndex == -1 || m_OutputBboxIndex == -1 || m_OutputClassIndex == -1)
{
std::cout << "[Error] TensorRT binding not found\n";
return -3;
}
std::cout << "[Info] Bindings size : " << m_Engine->getNbBindings() << "\n";
std::cout << "[Info] Bindings " << m_InputIndex << " " << m_OutputBboxIndex << " " << m_OutputClassIndex << "\n";
// Allocate Buffers
(cudaMalloc(&m_Bindings.at(m_InputIndex), m_maxBatchSize * m_InputSize * sizeof(float)));
(cudaMalloc(&m_Bindings.at(m_OutputBboxIndex), m_maxBatchSize * m_OutputBBoxSize * sizeof(float)));
(cudaMalloc(&m_Bindings.at(m_OutputClassIndex), m_maxBatchSize * m_OutputConfidenceSize * sizeof(float)));
(cudaMallocHost(&m_TrtOutputBuffers[0], m_OutputBBoxSize * m_maxBatchSize * sizeof(float)));
(cudaMallocHost(&m_TrtOutputBuffers[1], m_OutputConfidenceSize * m_maxBatchSize * sizeof(float)));
(cudaStreamCreate(&m_CudaStream));
// Loading input image to device
std::cout << "[Info] Loading input image\n";
Mat inputImage = imread(imagePath);
//cv::cvtColor(inputImage, inputImage, cv::COLOR_BGR2RGB);
Mat inferImage = cv::dnn::blobFromImage(inputImage, 0.0039215697906911373, cv::Size(m_InputW, m_InputH), cv::Scalar(0.0, 0.0, 0.0), true, false);
//Mat inferImage = cv::dnn::blobFromImage(inputImage, 1.0, cv::Size(m_InputW, m_InputH), cv::Scalar(0.0, 0.0, 0.0), false, false);
cudaMemcpyAsync(m_Bindings.at(m_InputIndex), inferImage.data,
m_maxBatchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice,
m_CudaStream);
// Running Inference
std::cout << "[Info] Running Inference\n";
m_Context->enqueue(m_maxBatchSize, m_Bindings.data(), m_CudaStream, nullptr);
//m_Context->execute(m_maxBatchSize, m_Bindings.data());
cudaMemcpyAsync(m_TrtOutputBuffers.at(0), m_Bindings.at(m_OutputBboxIndex),
m_maxBatchSize * m_OutputBBoxSize * sizeof(float),
cudaMemcpyDeviceToHost, m_CudaStream);
cudaMemcpyAsync(m_TrtOutputBuffers.at(1), m_Bindings.at(m_OutputClassIndex),
m_maxBatchSize * m_OutputConfidenceSize * sizeof(float),
cudaMemcpyDeviceToHost, m_CudaStream);
// Decoding output buffers
std::cout << "[Info] Decoding the output Buffers\n";
int gridW = m_OutputW;
int gridH = m_OutputH;
int gridSize = gridW * gridH;
float gcCentersX[gridW];
float gcCentersY[gridH];
float bboxNormX = 35.0;
float bboxNormY = 35.0;
float* outputBboxBuf = &m_TrtOutputBuffers.at(0)[0];
float* outputCovBuf = &m_TrtOutputBuffers.at(1)[0];
int strideX = DIVIDE_AND_ROUND_UP(m_InputW, gridW);
int strideY = DIVIDE_AND_ROUND_UP(m_InputH, gridH);
for (int i = 0; i < gridW; i++)
{
gcCentersX[i] = (float)(i * strideX + 0.5);
gcCentersX[i] /= (float)bboxNormX;
}
for (int i = 0; i < gridH; i++)
{
gcCentersY[i] = (float)(i * strideY + 0.5);
gcCentersY[i] /= (float)bboxNormY;
}
for (int c = 0; c < m_NumOutputClasses; c++)
{
float *outputX1 = outputBboxBuf + (c * 4 * gridW * gridH);
float *outputY1 = outputX1 + gridSize;
float *outputX2 = outputY1 + gridSize;
float *outputY2 = outputX2 + gridSize;
float threshold = 0.1;//detectionParams.perClassPreclusterThreshold[c];
for (int h = 0; h < gridH; h++)
{
for (int w = 0; w < gridW; w++)
{
int i = w + h * gridW;
float confidence = outputCovBuf[c * gridSize + i];
if (confidence >= threshold)
{
//NvDsInferObjectDetectionInfo object;
float rectX1f, rectY1f, rectX2f, rectY2f;
rectX1f = (outputX1[w + h * gridW] - gcCentersX[w]) * -bboxNormX;
rectY1f = (outputY1[w + h * gridW] - gcCentersY[h]) * -bboxNormY;
rectX2f = (outputX2[w + h * gridW] + gcCentersX[w]) * bboxNormX;
rectY2f = (outputY2[w + h * gridW] + gcCentersY[h]) * bboxNormY;
//object.classId = c;
//object.detectionConfidence = outputCovBuf[c * gridSize + i];
/* Clip object box co-ordinates to network resolution */
rectX1f = CLIP(rectX1f, 0, m_InputW - 1);
rectY1f = CLIP(rectY1f, 0, m_InputH - 1);
rectX2f = CLIP(rectX2f, 0, m_InputW - 1);
rectY2f = CLIP(rectY2f, 0, m_InputH - 1);
//Prevent underflows
if(((rectX2f - rectX1f) < 0) || ((rectY2f - rectY1f) < 0))
continue;
// Detected boxes
std::cout << "[Info] ClassIdx : " << c << " BBox : " << rectX1f << "," << rectY1f << "," << (rectX2f) << "," << (rectY2f) << "," << confidence << "\n";
}
}
}
}
//imshow("Display", inputImage);
//waitKey();
return 0;
}