Hi @NVES
When I used my method for batching, I was able to get n
outputs but they are gibberish UNLESS I set the BATCH =1. Please help me verify if I’m reading the output of the models correctly. I’m following the ONNXMNIST c++ code sample.
The modified build function:
bool SampleInference::build()
{
std::vector<char> trtModelStream_;
size_t size{ 0 };
std::ifstream file("/media/31A079936F39FBF9/onnx_cache_trt/model.trt", std::ios::binary);
if (file.good())
{
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream_.resize(size);
file.read(trtModelStream_.data(), size);
file.close();
}
IRuntime* runtime = createInferRuntime(sample::gLogger);
mEngine_midas_hq = std::shared_ptr<nvinfer1::ICudaEngine>(runtime->deserializeCudaEngine(trtModelStream_.data(), size, nullptr), samplesCommon::InferDeleter());
if (!mEngine_midas_hq)
{
return false;
}
context_iExecutionContext = (mEngine_midas_hq->createExecutionContext());
context_midas_hq = SampleUniquePtr<nvinfer1::IExecutionContext>(context_iExecutionContext);
nvinfer1::Dims4 input_dimensions(BATCH,3,384,1120)
context_midas_hq->setBindingDimensions(0,input_dimensions);
return true;
}
The slightly modified Infer function
vector<cv::Mat> SampleInference::infer(vector<cv::Mat> &inputs_fin)
{
samplesCommon::BufferManager buffers(mEngine_midas_hq, 0, context_iExecutionContext);
//cudaStream_t stream;
//cudaStreamCreate(&stream);
bool status_processInput = processInput(buffers,inputs_fin);
//buffers.copyInputToDeviceAsync();
buffers.copyInputToDevice();
//bool status_inference = context_midas_hq->enqueueV2(buffers.getDeviceBindings().data(), stream, nullptr);
bool status_inference = context_midas_hq->executeV2(buffers.getDeviceBindings().data());
//buffers.copyOutputToHostAsync();
buffers.copyOutputToHost();
//cudaStreamSynchronize(stream);
//cudaStreamDestroy(stream);
vector<cv::Mat> output_fin = processOutput(buffers);
return output_fin;
}
The modified processInput function:
bool SampleInference::processInput(const samplesCommon::BufferManager& buffers, vector<cv::Mat>& input)
{
int batch = BATCH; // correct output only when BATCH=1
float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer("INPUTS"));
for (int batch_i = 0; batch_i < batch; batch_i++)
{
cv::Mat input1; //bgr image, hwc
cv::resize(input[batch_i], input1, cv::Size(1120, 384), 0, 0, cv::INTER_CUBIC);
Normalizer normalizer; // bgr -> rgb, hwc -> chw, normalize
cv::Mat refined = normalizer.Process(input1);
cv::Mat linear_refined = refined.reshape(1,refined.total()*refined.channels());
for (int i = (int)(linear_refined.rows)*batch_i; i < (batch_i + 1)*(int)(linear_refined.rows); i++)
{
hostDataBuffer[i] = (float)linear_refined.at<float>(cv::Point(i-(int)(linear_refined.rows)*batch_i,0));
}
}
return true;
}
The modified verifyOutput function:
vector<cv::Mat> SampleInference::processOutput(const samplesCommon::BufferManager& buffers)
{
int batch = BATCH; //correct output only when BATCH=1
vector<cv::Mat> out;
float* output = static_cast<float*>(buffers.getHostBuffer("OUTPUTS"));
for(int batch_i=0; batch_i < batch; batch_i++)
{
float* output_i = output+batch_i*(384*1120);
cv::Mat outputs = cv::Mat(384, 1120, CV_32FC1, output_i);
out.push_back(600000.0f*(1.0f/outputs));//Changed 6000 to 600000 because of outputs*100.0f
}
return out;
}