Hey there, thanks for the response.
I don’t have any makefiles, as I was just hacking in the samples source. You can drop this in to your samples project and build it as it’s supposed to be built.
Here’s the hacked sampleMNIST.cpp:
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//! \file sampleMNIST.cpp
//! \brief This file contains the implementation of the MNIST sample.
//!
//! It builds a TensorRT engine by importing a trained MNIST Caffe model. It uses the engine to run
//! inference on an input image of a digit.
//! It can be run with the following command line:
//! Command: ./sample_mnist [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
/*
clang++ /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7
//usr/local/cuda-10.2/targets/x86_64-linux/lib/libcudart.so.10.2 sampleFasterRCNN.cpp -I ../../common/ -I
/home/tom/projects/TensorRT/include/ -I /usr/local/cuda-10.2/targets/x86_64-linux/include/ -lnvinfer -lnvparsers
-lpthread
*/
#include "argsParser.h"
#include "common.h"
#include "NvCaffeParser.h"
#include "NvInfer.h"
#include <algorithm>
#include <cassert>
#include <cmath>
//#include <cuda_runtime_api.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <thread>
const std::string gSampleName = "TensorRT.sample_mnist";
namespace
{
class TomLogger : public nvinfer1::ILogger
{
public:
void log(nvinfer1::ILogger::Severity severity, const char* msg) override
{
}
} g_logger;
} // namespace
//!
//! \brief The SampleMNIST class implements the MNIST sample
//!
//! \details It creates the network using a trained Caffe MNIST classification model
//!
class SampleMNIST
{
template <typename T>
using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
public:
SampleMNIST(const samplesCommon::CaffeSampleParams& params)
: mParams(params)
{
}
//!
//! \brief Builds the network engine
//!
bool build();
//!
//! \brief Used to clean up any state created in the sample class
//!
bool teardown();
private:
//!
//! \brief uses a Caffe parser to create the MNIST Network and marks the
//! output layers
//!
void constructNetwork(
SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser, SampleUniquePtr<nvinfer1::INetworkDefinition>& network);
std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network
samplesCommon::CaffeSampleParams mParams; //!< The parameters for the sample.
nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network.
SampleUniquePtr<nvcaffeparser1::IBinaryProtoBlob>
mMeanBlob; //! the mean blob, which we need to keep around until build is done
};
//!
//! \brief Creates the network, configures the builder and creates the network engine
//!
//! \details This function creates the MNIST network by parsing the caffe model and builds
//! the engine that will be used to run MNIST (mEngine)
//!
//! \return Returns true if the engine was created successfully and false otherwise
//!
bool SampleMNIST::build()
{
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(g_logger));
if (!builder)
{
return false;
}
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
if (!network)
{
return false;
}
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
if (!config)
{
return false;
}
auto parser = SampleUniquePtr<nvcaffeparser1::ICaffeParser>(nvcaffeparser1::createCaffeParser());
if (!parser)
{
return false;
}
constructNetwork(parser, network);
builder->setMaxBatchSize(mParams.batchSize);
config->setMaxWorkspaceSize(16_MiB);
config->setFlag(BuilderFlag::kGPU_FALLBACK);
config->setFlag(BuilderFlag::kSTRICT_TYPES);
if (mParams.fp16)
{
config->setFlag(BuilderFlag::kFP16);
}
if (mParams.int8)
{
config->setFlag(BuilderFlag::kINT8);
}
samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
if (!mEngine)
return false;
assert(network->getNbInputs() == 1);
mInputDims = network->getInput(0)->getDimensions();
assert(mInputDims.nbDims == 3);
return true;
}
//!
//! \brief Uses a caffe parser to create the MNIST Network and marks the
//! output layers
//!
//! \param network Pointer to the network that will be populated with the MNIST network
//!
//! \param builder Pointer to the engine builder
//!
void SampleMNIST::constructNetwork(
SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser, SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
{
const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(
mParams.prototxtFileName.c_str(), mParams.weightsFileName.c_str(), *network, nvinfer1::DataType::kFLOAT);
for (auto& s : mParams.outputTensorNames)
{
network->markOutput(*blobNameToTensor->find(s.c_str()));
}
// add mean subtraction to the beginning of the network
nvinfer1::Dims inputDims = network->getInput(0)->getDimensions();
mMeanBlob
= SampleUniquePtr<nvcaffeparser1::IBinaryProtoBlob>(parser->parseBinaryProto(mParams.meanFileName.c_str()));
nvinfer1::Weights meanWeights{nvinfer1::DataType::kFLOAT, mMeanBlob->getData(), inputDims.d[1] * inputDims.d[2]};
// For this sample, a large range based on the mean data is chosen and applied to the head of the network.
// After the mean subtraction occurs, the range is expected to be between -127 and 127, so the rest of the network
// is given a generic range.
// The preferred method is use scales computed based on a representative data set
// and apply each one individually based on the tensor. The range here is large enough for the
// network, but is chosen for example purposes only.
float maxMean
= samplesCommon::getMaxValue(static_cast<const float*>(meanWeights.values), samplesCommon::volume(inputDims));
auto mean = network->addConstant(nvinfer1::Dims3(1, inputDims.d[1], inputDims.d[2]), meanWeights);
mean->getOutput(0)->setDynamicRange(-maxMean, maxMean);
network->getInput(0)->setDynamicRange(-maxMean, maxMean);
auto meanSub = network->addElementWise(*network->getInput(0), *mean->getOutput(0), ElementWiseOperation::kSUB);
meanSub->getOutput(0)->setDynamicRange(-maxMean, maxMean);
network->getLayer(0)->setInput(0, *meanSub->getOutput(0));
samplesCommon::setAllTensorScales(network.get(), 127.0f, 127.0f);
}
//!
//! \brief Used to clean up any state created in the sample class
//!
bool SampleMNIST::teardown()
{
//! Clean up the libprotobuf files as the parsing is complete
//! \note It is not safe to use any other part of the protocol buffers library after
//! ShutdownProtobufLibrary() has been called.
nvcaffeparser1::shutdownProtobufLibrary();
return true;
}
//!
//! \brief Initializes members of the params struct using the command line args
//!
samplesCommon::CaffeSampleParams initializeSampleParams(const samplesCommon::Args& args)
{
samplesCommon::CaffeSampleParams params;
if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
{
params.dataDirs.push_back("data/mnist/");
params.dataDirs.push_back("data/samples/mnist/");
}
else //!< Use the data directory provided by the user
{
params.dataDirs = args.dataDirs;
}
params.prototxtFileName = locateFile("mnist.prototxt", params.dataDirs);
params.weightsFileName = locateFile("mnist.caffemodel", params.dataDirs);
params.meanFileName = locateFile("mnist_mean.binaryproto", params.dataDirs);
params.inputTensorNames.push_back("data");
params.batchSize = 1;
params.outputTensorNames.push_back("prob");
params.dlaCore = args.useDLACore;
params.int8 = args.runInInt8;
params.fp16 = args.runInFp16;
return params;
}
//!
//! \brief Prints the help information for running this sample
//!
void printHelpInfo()
{
std::cout
<< "Usage: ./sample_mnist [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>]\n";
std::cout << "--help Display help information\n";
std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
"multiple times to add multiple directories. If no data directories are given, the default is to use "
"(data/samples/mnist/, data/mnist/)"
<< std::endl;
std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
"where n is the number of DLA engines on the platform."
<< std::endl;
std::cout << "--int8 Run in Int8 mode.\n";
std::cout << "--fp16 Run in FP16 mode.\n";
}
int main(int argc, char** argv)
{
samplesCommon::Args args;
bool argsOK = samplesCommon::parseArgs(args, argc, argv);
if (!argsOK)
{
std::cerr << "Invalid arguments" << std::endl;
printHelpInfo();
return EXIT_FAILURE;
}
if (args.help)
{
printHelpInfo();
return EXIT_SUCCESS;
}
samplesCommon::CaffeSampleParams params = initializeSampleParams(args);
auto buildSample = [params](int const id) {
SampleMNIST sample(params);
// std::cout << "Building and running a GPU inference engine for MNIST: " << id << std::endl;
assert(sample.build());
};
int const numEngines = 128;
std::vector<std::thread> initThreads;
for (int ix = 0; ix < numEngines; ++ix)
{
initThreads.emplace_back(buildSample, ix);
}
for (auto& t : initThreads)
{
t.join();
}
return 0;
}
and here’s the hacked sampleFasterRCNN.cpp:
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//!
//! sampleFasterRCNN.cpp
//! This file contains the implementation of the FasterRCNN sample. It creates the network using
//! the FasterRCNN caffe model.
//! It can be run with the following command line:
//! Command: ./sample_fasterRCNN [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
//!
// clang++ /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7
// //usr/local/cuda-10.2/targets/x86_64-linux/lib/libcudart.so.10.2 sampleFasterRCNN.cpp -I ../../common/ -I
// /home/tom/projects/TensorRT/include/ -I /usr/local/cuda-10.2/targets/x86_64-linux/include/ -lnvinfer -lnvparsers
// -lpthread
#include "argsParser.h"
#include "buffers.h"
#include "common.h"
//#include "logger.h"
#include "NvCaffeParser.h"
#include "NvInfer.h"
#include <cuda_runtime_api.h>
#include <cassert>>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <thread>
#include <vector>
namespace
{
class TomLogger : public nvinfer1::ILogger
{
public:
void log(nvinfer1::ILogger::Severity severity, const char* msg) override {}
} g_logger;
} // namespace
const std::string gSampleName = "TensorRT.sample_fasterRCNN";
//!
//! \brief The SampleFasterRCNNParams structure groups the additional parameters required by
//! the FasterRCNN sample.
//!
struct SampleFasterRCNNParams : public samplesCommon::CaffeSampleParams
{
int outputClsSize; //!< The number of output classes
int nmsMaxOut; //!< The maximum number of detection post-NMS
};
//! \brief The SampleFasterRCNN class implements the FasterRCNN sample
//!
//! \details It creates the network using a caffe model
//!
class SampleFasterRCNN
{
template <typename T>
using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
public:
SampleFasterRCNN(const SampleFasterRCNNParams& params)
: mParams(params)
, mEngine(nullptr)
{
}
//!
//! \brief Function builds the network engine
//!
bool build();
//!
//! \brief Runs the TensorRT inference engine for this sample
//!
bool infer();
//!
//! \brief Cleans up any state created in the sample class
//!
bool teardown();
private:
SampleFasterRCNNParams mParams; //!< The parameters for the sample.
nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network.
static const int kIMG_CHANNELS = 3;
static const int kIMG_H = 375;
static const int kIMG_W = 500;
std::vector<samplesCommon::PPM<kIMG_CHANNELS, kIMG_H, kIMG_W>> mPPMs; //!< PPMs of test images
std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network
//!
//! \brief Parses a Caffe model for FasterRCNN and creates a TensorRT network
//!
void constructNetwork(SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser,
SampleUniquePtr<nvinfer1::IBuilder>& builder, SampleUniquePtr<nvinfer1::INetworkDefinition>& network,
SampleUniquePtr<nvinfer1::IBuilderConfig>& config);
//!
//! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer
//!
bool processInput(const samplesCommon::BufferManager& buffers);
//!
//! \brief Filters output detections, handles post-processing of bounding boxes and verify results
//!
bool verifyOutput(const samplesCommon::BufferManager& buffers);
//!
//! \brief Performs inverse bounding box transform and clipping
//!
void bboxTransformInvAndClip(const float* rois, const float* deltas, float* predBBoxes, const float* imInfo,
const int N, const int nmsMaxOut, const int numCls);
//!
//! \brief Performs non maximum suppression on final bounding boxes
//!
std::vector<int> nonMaximumSuppression(std::vector<std::pair<float, int>>& scoreIndex, float* bbox,
const int classNum, const int numClasses, const float nmsThreshold);
};
//!
//! \brief Creates the network, configures the builder and creates the network engine
//!
//! \details This function creates the FasterRCNN network by parsing the caffe model and builds
//! the engine that will be used to run FasterRCNN (mEngine)
//!
//! \return Returns true if the engine was created successfully and false otherwise
//!
bool SampleFasterRCNN::build()
{
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(g_logger));
if (!builder)
{
return false;
}
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
if (!network)
{
return false;
}
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
if (!config)
{
return false;
}
auto parser = SampleUniquePtr<nvcaffeparser1::ICaffeParser>(nvcaffeparser1::createCaffeParser());
if (!parser)
{
return false;
}
constructNetwork(parser, builder, network, config);
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
if (!mEngine)
{
return false;
}
assert(network->getNbInputs() == 2);
mInputDims = network->getInput(0)->getDimensions();
assert(mInputDims.nbDims == 3);
return true;
}
//!
//! \brief Uses a caffe parser to create the FasterRCNN network and marks the
//! output layers
//!
//! \param network Pointer to the network that will be populated with the FasterRCNN network
//!
//! \param builder Pointer to the engine builder
//!
void SampleFasterRCNN::constructNetwork(SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser,
SampleUniquePtr<nvinfer1::IBuilder>& builder, SampleUniquePtr<nvinfer1::INetworkDefinition>& network,
SampleUniquePtr<nvinfer1::IBuilderConfig>& config)
{
const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor
= parser->parse(locateFile(mParams.prototxtFileName, mParams.dataDirs).c_str(),
locateFile(mParams.weightsFileName, mParams.dataDirs).c_str(), *network, nvinfer1::DataType::kFLOAT);
for (auto& s : mParams.outputTensorNames)
{
network->markOutput(*blobNameToTensor->find(s.c_str()));
}
builder->setMaxBatchSize(mParams.batchSize);
config->setMaxWorkspaceSize(16_MiB);
samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
}
//!
//! \brief Runs the TensorRT inference engine for this sample
//!
//! \details This function is the main execution function of the sample. It allocates the buffer,
//! sets inputs and executes the engine.
//!
bool SampleFasterRCNN::infer()
{
// Create RAII buffer manager object
samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);
auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
return false;
}
// Read the input data into the managed buffers
assert(mParams.inputTensorNames.size() == 2);
if (!processInput(buffers))
{
return false;
}
// Memcpy from host input buffers to device input buffers
buffers.copyInputToDevice();
bool status = context->execute(mParams.batchSize, buffers.getDeviceBindings().data());
if (!status)
{
return false;
}
// Memcpy from device output buffers to host output buffers
buffers.copyOutputToHost();
// Post-process detections and verify results
if (!verifyOutput(buffers))
{
return false;
}
return true;
}
//!
//! \brief Cleans up any state created in the sample class
//!
bool SampleFasterRCNN::teardown()
{
//! Clean up the libprotobuf files as the parsing is complete
//! \note It is not safe to use any other part of the protocol buffers library after
//! ShutdownProtobufLibrary() has been called.
nvcaffeparser1::shutdownProtobufLibrary();
return true;
}
//!
//! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer
//!
bool SampleFasterRCNN::processInput(const samplesCommon::BufferManager& buffers)
{
const int inputC = mInputDims.d[0];
const int inputH = mInputDims.d[1];
const int inputW = mInputDims.d[2];
const int batchSize = mParams.batchSize;
// Available images
const std::vector<std::string> imageList = {"000456.ppm", "000542.ppm", "001150.ppm", "001763.ppm", "004545.ppm"};
mPPMs.resize(batchSize);
assert(mPPMs.size() <= imageList.size());
// Fill im_info buffer
float* hostImInfoBuffer = static_cast<float*>(buffers.getHostBuffer("im_info"));
for (int i = 0; i < batchSize; ++i)
{
readPPMFile(locateFile(imageList[i], mParams.dataDirs), mPPMs[i]);
hostImInfoBuffer[i * 3] = float(mPPMs[i].h); // Number of rows
hostImInfoBuffer[i * 3 + 1] = float(mPPMs[i].w); // Number of columns
hostImInfoBuffer[i * 3 + 2] = 1; // Image scale
}
// Fill data buffer
float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer("data"));
// Pixel mean used by the Faster R-CNN's author
const float pixelMean[3]{102.9801f, 115.9465f, 122.7717f}; // Also in BGR order
for (int i = 0, volImg = inputC * inputH * inputW; i < batchSize; ++i)
{
for (int c = 0; c < inputC; ++c)
{
// The color image to input should be in BGR order
for (unsigned j = 0, volChl = inputH * inputW; j < volChl; ++j)
hostDataBuffer[i * volImg + c * volChl + j] = float(mPPMs[i].buffer[j * inputC + 2 - c]) - pixelMean[c];
}
}
return true;
}
//!
//! \brief Filters output detections and handles post-processing of bounding boxes, verify result
//!
//! \return whether the detection output matches expectations
//!
bool SampleFasterRCNN::verifyOutput(const samplesCommon::BufferManager& buffers)
{
const int batchSize = mParams.batchSize;
const int nmsMaxOut = mParams.nmsMaxOut;
const int outputClsSize = mParams.outputClsSize;
const int outputBBoxSize = mParams.outputClsSize * 4;
const float* imInfo = static_cast<const float*>(buffers.getHostBuffer("im_info"));
const float* deltas = static_cast<const float*>(buffers.getHostBuffer("bbox_pred"));
const float* clsProbs = static_cast<const float*>(buffers.getHostBuffer("cls_prob"));
float* rois = static_cast<float*>(buffers.getHostBuffer("rois"));
// Unscale back to raw image space
for (int i = 0; i < batchSize; ++i)
{
for (int j = 0; j < nmsMaxOut * 4 && imInfo[i * 3 + 2] != 1; ++j)
{
rois[i * nmsMaxOut * 4 + j] /= imInfo[i * 3 + 2];
}
}
std::vector<float> predBBoxes(batchSize * nmsMaxOut * outputBBoxSize, 0);
bboxTransformInvAndClip(rois, deltas, predBBoxes.data(), imInfo, batchSize, nmsMaxOut, outputClsSize);
const float nmsThreshold = 0.3f;
const float score_threshold = 0.8f;
const std::vector<std::string> classes{"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car",
"cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
"train", "tvmonitor"};
// The sample passes if there is at least one detection for each item in the batch
bool pass = true;
for (int i = 0; i < batchSize; ++i)
{
float* bbox = predBBoxes.data() + i * nmsMaxOut * outputBBoxSize;
const float* scores = clsProbs + i * nmsMaxOut * outputClsSize;
int numDetections = 0;
for (int c = 1; c < outputClsSize; ++c) // Skip the background
{
std::vector<std::pair<float, int>> scoreIndex;
for (int r = 0; r < nmsMaxOut; ++r)
{
if (scores[r * outputClsSize + c] > score_threshold)
{
scoreIndex.push_back(std::make_pair(scores[r * outputClsSize + c], r));
std::stable_sort(scoreIndex.begin(), scoreIndex.end(),
[](const std::pair<float, int>& pair1, const std::pair<float, int>& pair2) {
return pair1.first > pair2.first;
});
}
}
// Apply NMS algorithm
const std::vector<int> indices = nonMaximumSuppression(scoreIndex, bbox, c, outputClsSize, nmsThreshold);
numDetections += static_cast<int>(indices.size());
// Show results
for (unsigned k = 0; k < indices.size(); ++k)
{
const int idx = indices[k];
const std::string storeName
= classes[c] + "-" + std::to_string(scores[idx * outputClsSize + c]) + ".ppm";
std::cout << "Detected " << classes[c] << " in " << mPPMs[i].fileName << " with confidence "
<< scores[idx * outputClsSize + c] * 100.0f << "% "
<< " (Result stored in " << storeName << ")." << std::endl;
const samplesCommon::BBox b{bbox[idx * outputBBoxSize + c * 4], bbox[idx * outputBBoxSize + c * 4 + 1],
bbox[idx * outputBBoxSize + c * 4 + 2], bbox[idx * outputBBoxSize + c * 4 + 3]};
writePPMFileWithBBox(storeName, mPPMs[i], b);
}
}
pass &= numDetections >= 1;
}
return pass;
}
//!
//! \brief Performs inverse bounding box transform
//!
void SampleFasterRCNN::bboxTransformInvAndClip(const float* rois, const float* deltas, float* predBBoxes,
const float* imInfo, const int N, const int nmsMaxOut, const int numCls)
{
for (int i = 0; i < N * nmsMaxOut; ++i)
{
float width = rois[i * 4 + 2] - rois[i * 4] + 1;
float height = rois[i * 4 + 3] - rois[i * 4 + 1] + 1;
float ctr_x = rois[i * 4] + 0.5f * width;
float ctr_y = rois[i * 4 + 1] + 0.5f * height;
const float* imInfo_offset = imInfo + i / nmsMaxOut * 3;
for (int j = 0; j < numCls; ++j)
{
float dx = deltas[i * numCls * 4 + j * 4];
float dy = deltas[i * numCls * 4 + j * 4 + 1];
float dw = deltas[i * numCls * 4 + j * 4 + 2];
float dh = deltas[i * numCls * 4 + j * 4 + 3];
float pred_ctr_x = dx * width + ctr_x;
float pred_ctr_y = dy * height + ctr_y;
float pred_w = exp(dw) * width;
float pred_h = exp(dh) * height;
predBBoxes[i * numCls * 4 + j * 4]
= std::max(std::min(pred_ctr_x - 0.5f * pred_w, imInfo_offset[1] - 1.f), 0.f);
predBBoxes[i * numCls * 4 + j * 4 + 1]
= std::max(std::min(pred_ctr_y - 0.5f * pred_h, imInfo_offset[0] - 1.f), 0.f);
predBBoxes[i * numCls * 4 + j * 4 + 2]
= std::max(std::min(pred_ctr_x + 0.5f * pred_w, imInfo_offset[1] - 1.f), 0.f);
predBBoxes[i * numCls * 4 + j * 4 + 3]
= std::max(std::min(pred_ctr_y + 0.5f * pred_h, imInfo_offset[0] - 1.f), 0.f);
}
}
}
//!
//! \brief Performs non maximum suppression on final bounding boxes
//!
std::vector<int> SampleFasterRCNN::nonMaximumSuppression(std::vector<std::pair<float, int>>& scoreIndex, float* bbox,
const int classNum, const int numClasses, const float nmsThreshold)
{
auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float {
if (x1min > x2min)
{
std::swap(x1min, x2min);
std::swap(x1max, x2max);
}
return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min;
};
auto computeIoU = [&overlap1D](float* bbox1, float* bbox2) -> float {
float overlapX = overlap1D(bbox1[0], bbox1[2], bbox2[0], bbox2[2]);
float overlapY = overlap1D(bbox1[1], bbox1[3], bbox2[1], bbox2[3]);
float area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]);
float area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]);
float overlap2D = overlapX * overlapY;
float u = area1 + area2 - overlap2D;
return u == 0 ? 0 : overlap2D / u;
};
std::vector<int> indices;
for (auto i : scoreIndex)
{
const int idx = i.second;
bool keep = true;
for (unsigned k = 0; k < indices.size(); ++k)
{
if (keep)
{
const int kept_idx = indices[k];
float overlap = computeIoU(
&bbox[(idx * numClasses + classNum) * 4], &bbox[(kept_idx * numClasses + classNum) * 4]);
keep = overlap <= nmsThreshold;
}
else
{
break;
}
}
if (keep)
{
indices.push_back(idx);
}
}
return indices;
}
//!
//! \brief Initializes members of the params struct using the command line args
//!
SampleFasterRCNNParams initializeSampleParams(const samplesCommon::Args& args)
{
SampleFasterRCNNParams params;
if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
{
params.dataDirs.push_back("data/faster-rcnn/");
params.dataDirs.push_back("data/samples/faster-rcnn/");
}
else //!< Use the data directory provided by the user
{
params.dataDirs = args.dataDirs;
}
params.prototxtFileName = "faster_rcnn_test_iplugin.prototxt";
params.weightsFileName = "VGG16_faster_rcnn_final.caffemodel";
params.inputTensorNames.push_back("data");
params.inputTensorNames.push_back("im_info");
params.batchSize = 5;
params.outputTensorNames.push_back("bbox_pred");
params.outputTensorNames.push_back("cls_prob");
params.outputTensorNames.push_back("rois");
params.dlaCore = args.useDLACore;
params.outputClsSize = 21;
params.nmsMaxOut
= 300; // This value needs to be changed as per the nmsMaxOut value set in RPROI plugin parameters in prototxt
return params;
}
//!
//! \brief Prints the help information for running this sample
//!
void printHelpInfo()
{
std::cout
<< "Usage: ./sample_fasterRCNN [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>]"
<< std::endl;
std::cout << "--help Display help information" << std::endl;
std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
"multiple times to add multiple directories. If no data directories are given, the default is to use "
"data/samples/faster-rcnn/ and data/faster-rcnn/"
<< std::endl;
std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
"where n is the number of DLA engines on the platform."
<< std::endl;
}
int main(int argc, char** argv)
{
samplesCommon::Args args;
bool argsOK = samplesCommon::parseArgs(args, argc, argv);
if (!argsOK)
{
std::cerr << "Invalid arguments" << std::endl;
printHelpInfo();
return EXIT_FAILURE;
}
if (args.help)
{
printHelpInfo();
return EXIT_SUCCESS;
}
initLibNvInferPlugins(&g_logger, "");
auto buildSample = [&]() -> void {
SampleFasterRCNN sample(initializeSampleParams(args));
std::cout << "Building and running a GPU inference engine for FasterRCNN" << std::endl;
assert(sample.build());
};
int const numEngines = 8;
std::vector<std::thread> initThreads;
for (int ix = 0; ix < numEngines; ++ix)
{
initThreads.emplace_back(buildSample);
}
for (auto& t : initThreads)
{
t.join();
}
return EXIT_SUCCESS;
}