Hi, i follow the example “sampleUffSSD” to create my plugin,
Step 1.
i convert the pb file to uff file base on the following command
python3 convert_to_uff.py --input-file myNet.pb -O output -p config.py
the content of config.py
import graphsurgeon as gs
import tensorflow as tf
trt_SpaceToDepth = gs.create_plugin_node(name='spaceToDepth', op="SpaceToDepth_TRT", blockSize=2)
namespace_plugin_map = {
"net/SpaceToDepth": trt_SpaceToDepth
}
def preprocess(dynamic_graph):
# Now create a new graph by collapsing namespaces
dynamic_graph.collapse_namespaces(namespace_plugin_map)
and the result message is
UFF Version 0.5.5
=== Automatically deduced input nodes ===
[name: "image_placeholder"
op: "Placeholder"
attr {
key: "dtype"
value {
type: DT_FLOAT
}
}
attr {
key: "shape"
value {
shape {
dim {
size: -1
}
dim {
size: -1
}
dim {
size: -1
}
dim {
size: 3
}
}
}
}
]
=========================================
Using output node output
Converting to UFF graph
Warning: No conversion function registered for layer: SpaceToDepth_TRT yet.
Converting spaceToDepth as custom op: SpaceToDepth_TRT
No. nodes: 167
UFF Output written to myNet.uff
Step 2.
i modify sampleUffSSD.cpp as below
#include <string>
#include <iostream>
#include <cassert>
#include "NvUffParser.h"
#include "NvInferPlugin.h"
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "common.h"
const char* INPUT_BLOB_NAME{"image_placeholder"};
const char* OUTPUT_BLOB_NAME{"output"};
constexpr int INPUT_C = 3;
constexpr int INPUT_H = 480;
constexpr int INPUT_W = 480;
static Logger gLogger;
std::vector<std::pair<int64_t, DataType>>
calculateBindingBufferSizes(const ICudaEngine& engine, int nbBindings, int batchSize)
{
std::vector<std::pair<int64_t, DataType>> sizes;
for (int i = 0; i < nbBindings; ++i)
{
Dims dims = engine.getBindingDimensions(i);
DataType dtype = engine.getBindingDataType(i);
int64_t eltCount = samplesCommon::volume(dims) * batchSize;
sizes.push_back(std::make_pair(eltCount, dtype));
}
return sizes;
}
ICudaEngine* loadModelAndCreateEngine(const char* uffFile, int maxBatchSize,
nvuffparser::IUffParser* parser, IHostMemory*& trtModelStream)
{
// Create the builder
IBuilder* builder = createInferBuilder(gLogger);
// Parse the UFF model to populate the network, then set the outputs.
INetworkDefinition* network = builder->createNetwork();
std::cout << "Begin parsing model..." << std::endl;
if (!parser->parse(uffFile, *network, nvinfer1::DataType::kFLOAT)) {
gLogger.log(ILogger::Severity::kERROR, "Fail to parse");
// RETURN_AND_LOG(nullptr, ERROR, "Fail to parse");
}
std::cout << "End parsing model..." << std::endl;
// Build the engine.
builder->setMaxBatchSize(maxBatchSize);
// The _GB literal operator is defined in common/common.h
builder->setMaxWorkspaceSize(1_GB); // We need about 1GB of scratch space for the plugin layer for batch size 5.
builder->setHalf2Mode(false);
// if (args.runInInt8)
// {
// builder->setInt8Mode(true);
// builder->setInt8Calibrator(calibrator);
// }
std::cout << "Begin building engine..." << std::endl;
ICudaEngine* engine = builder->buildCudaEngine(*network);
if (!engine) {
gLogger.log(ILogger::Severity::kERROR, "Unable to create engine");
// RETURN_AND_LOG(nullptr, ERROR, "Unable to create engine");
}
std::cout << "End building engine..." << std::endl;
// We don't need the network any more, and we can destroy the parser.
network->destroy();
parser->destroy();
// Serialize the engine, then close everything down.
trtModelStream = engine->serialize();
builder->destroy();
nvuffparser::shutdownProtobufLibrary();
return engine;
}
void doInference(nvinfer1::IExecutionContext& context, float* inputData, float* detectionOut, int batchSize)
{
const ICudaEngine& engine = context.getEngine();
int nbBindings = engine.getNbBindings();
std::vector<void*> buffers(nbBindings);
std::vector<std::pair<int64_t, nvinfer1::DataType>> buffersSizes = calculateBindingBufferSizes(engine, nbBindings, batchSize);
for(int i=0; i < nbBindings; ++i) {
auto bufferSizesOutput = buffersSizes[i];
buffers[i] = samplesCommon::safeCudaMalloc(bufferSizesOutput.first * samplesCommon::getElementSize(bufferSizesOutput.second));
}
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Not that indices are guaranted to be less than IEngine::getNbBindings()
int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME),
outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
// DMA the input to the GPU, execute the batch asynchronously, and DMA it back:
CHECK(cudaMemcpyAsync(buffers[inputIndex], inputData, batchSize * INPUT_C * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
auto t_start = std::chrono::high_resolution_clock::now();
context.execute(batchSize, &buffers[0]);
auto t_end = std::chrono::high_resolution_clock::now();
float total = std::chrono::duration<float, std::milli>(t_end - t_start).count();
std::cout << "Time taken for inference is " << total << " ms." << std::endl;
CHECK(cudaMemcpyAsync(detectionOut, buffers[outputIndex], batchSize * 15*15*5*12 * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release the stream and the buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
}
// Helper function for serializing plugin
template<typename T>
void writeToBuffer(char*& buffer, const T& val)
{
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}
// Helper function for serializing plugin
template<typename T>
T readFromBuffer(const char*& buffer)
{
T val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
return val;
}
class SpaceToDepthPlugin: public nvinfer1::IPluginV2
{
public:
// Ctor
SpaceToDepthPlugin(int block_size): mBlockSize(block_size)
{
}
SpaceToDepthPlugin(const void* data, size_t length)
{
// Deserialize in the same order as serialization
const char *d = static_cast<const char *>(data);
const char *a = d;
mBlockSize = readFromBuffer<int>(d);
assert(d == (a + length));
}
// IPluginV2 public function
const char* getPluginType() const override
{
return "SpaceToDepth_TRT";
}
const char* getPluginVersion() const override
{
return "1";
}
int getNbOutputs() const override
{
return 1;
}
Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) override
{
assert(nbInputDims == 1);
assert(index == 0);
assert(inputs[index].nbDims == 3);
int in_depth = inputs[0].d[0];
int in_height = inputs[0].d[1];
int in_width = inputs[0].d[2];
assert(in_height % mBlockSize == 0);
assert(in_width % mBlockSize == 0);
int out_depth = in_depth * mBlockSize*mBlockSize;
int out_height = in_height / mBlockSize;
int out_width = in_width / mBlockSize;
return DimsCHW(out_depth, out_height, out_width);
}
void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override
{
assert(nbOutputs == 1);
assert(type == DataType::kFLOAT);
assert(format == PluginFormat::kNCHW);
size_t volume = 1;
for(int i = 0; i < inputDims->nbDims; i++) {
volume *= inputDims->d[i];
}
mInputVolume = volume;
}
bool supportsFormat(DataType type, PluginFormat format) const override
{
if(type == DataType::kFLOAT && format == PluginFormat::kNCHW) {
return true;
}
else {
return false;
}
}
int initialize() override { return 0; }
void terminate() override
{
// if something was initialized in "initialize()"
// it must be terminated here
}
IPluginV2* clone() const override
{
return new SpaceToDepthPlugin(mBlockSize);
}
size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }
int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override
{
CHECK(cudaMemcpyAsync(outputs[0], inputs[0], mInputVolume * batchSize * sizeof(float), cudaMemcpyDeviceToDevice, stream));
return 0;
}
size_t getSerializationSize() const override
{
return sizeof(int);
}
void serialize(void *buffer) const override
{
char *d = static_cast<char *>(buffer);
const char *a = d;
writeToBuffer(d, mBlockSize);
assert(d == a + getSerializationSize());
}
void destroy() override
{
delete this;
}
void setPluginNamespace(const char *pluginNamespace) override
{
mNamespace = pluginNamespace;
}
const char* getPluginNamespace() const override
{
return mNamespace.c_str();
}
private:
int mBlockSize;
DimsCHW mOutDim;
size_t mInputVolume;
std::string mNamespace;
};
// Space to depth plugin specific constants
namespace
{
const char* SPACETODEPTH_PLUGIN_VERSION{"1"};
const char* SPACETODEPTH_PLUGIN_NAME{"SpaceToDepth_TRT"};
} // namespace
class SpaceToDepthCreator: public IPluginCreator
{
public:
SpaceToDepthCreator()
{
// Describe SpaceToDepth's required PluginField arguments
mPluginAttributes.emplace_back(PluginField("blockSize", nullptr, PluginFieldType::kINT32, 1));
// Fill PluginFieldCollection with PluginField arguments metadata
mFC.nbFields = mPluginAttributes.size();
mFC.fields = mPluginAttributes.data();
}
~SpaceToDepthCreator() {}
const char* getPluginName() const override { return SPACETODEPTH_PLUGIN_NAME; }
const char* getPluginVersion() const override { return SPACETODEPTH_PLUGIN_VERSION; }
const PluginFieldCollection* getFieldNames() override { return &mFC; }
IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) override
{
int blockSize;
const PluginField* fields = fc->fields;
// Parse fields from PluginFieldCollection
assert(fc->nbFields == 1); // only block_size
for(int i = 0; i < fc->nbFields; ++i) {
if(strcmp(fields[i].name, "blockSize") == 0) {
assert(fields[i].type == PluginFieldType::kINT32);
blockSize = *(static_cast<const int*>(fields[i].data));
}
}
return new SpaceToDepthPlugin(blockSize);
}
IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override
{
// This object will be deleted when the network is destroyed, which will
// call SpaceToDepth::destroy()
return new SpaceToDepthPlugin(serialData, serialLength);
}
void setPluginNamespace(const char* pluginNamespace) override { mNamespace = pluginNamespace; }
const char* getPluginNamespace() const override { return mNamespace.c_str(); }
private:
static PluginFieldCollection mFC;
static std::vector<PluginField> mPluginAttributes;
std::string mNamespace;
};
// Static class fields initialization
PluginFieldCollection SpaceToDepthCreator::mFC{};
std::vector<PluginField> SpaceToDepthCreator::mPluginAttributes;
REGISTER_TENSORRT_PLUGIN(SpaceToDepthCreator);
int main(int argc, char* argv[])
{
initLibNvInferPlugins(&gLogger, "");
std::string uff_path = "myNet.uff";
std::string img_file = "test_1.png";
int batchSize = 1;
cv::Mat cv_img = cv::imread(img_file);
cv::resize(cv_img, cv_img, cv::Size(INPUT_H, INPUT_W));
int depth = cv_img.channels();
vector<float> inputData(batchSize * INPUT_C * INPUT_H * INPUT_W);
for(int y = 0; y < cv_img.rows; y++) {
for(int x = 0; x < cv_img.cols; x++) {
cv::Vec3b pixel = cv_img.at<cv::Vec3b>(y,x);
inputData[y*cv_img.cols*depth + x*depth + 0] = static_cast<float>(pixel.val[0]) / 255.0;
inputData[y*cv_img.cols*depth + x*depth + 1] = static_cast<float>(pixel.val[1]) / 255.0;
inputData[y*cv_img.cols*depth + x*depth + 2] = static_cast<float>(pixel.val[2]) / 255.0;
}
}
auto parser = nvuffparser::createUffParser();
parser->registerInput("image_placeholder", DimsCHW(3, 480,480), nvuffparser::UffInputOrder::kNCHW);
parser->registerOutput("output");
IHostMemory* trtModelStream{nullptr};
ICudaEngine* tmpEngine = loadModelAndCreateEngine(uff_path.c_str(), batchSize, parser, trtModelStream);
assert(tmpEngine != nullptr);
assert(trtModelStream != nullptr);
tmpEngine->destroy();
// Deserialize the engine.
std::cout << "*** Desserializing ***\n";
IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr);
assert(engine != nullptr);
trtModelStream->destroy();
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
// Host memory for outputs
int nBboxParams = 5;
int nClass = 7;
int nAnchor = 5;
std::vector<float> detectionOut(batchSize * 15 * 15 * nAnchor * (nBboxParams+nClass));
// Run inference
doInference(*context, &inputData[0], &detectionOut[0], batchSize);
}
But i got the error message after run the program
Begin parsing model...
End parsing model...
Begin building engine...
End building engine...
*** Desserializing ***
ERROR: getPluginCreator could not find plugin SpaceToDepth_TRTspaceToDepth version 1 namespace spaceToDepth
ERROR: Cannot deserialize plugin SpaceToDepth_TRT
Segmentation fault (core dumped)
The code above is fixed and the plugin can be deserialized.
could anyone tell me how to fix it?
Thanks