hi,
thanks for replying;
yes we had already tested the solution provided in that forum, without success.
The implementation of the plugin in question is in the code below;
The whole project is pretty large and I’m afraid I cannot disclose the architecture implementation;
As mentioned it always worked on GPUs, with both TensorRT 6 and 7;
Hope these details help.
Thanks again,
f
#include <string.h>
#include <iostream>
#include <cassert>
#include <vector>
#include <functional>
#include <numeric>
#include <algorithm>
//#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "cuda_runtime_api.h"
void boxDecoderLauncher(const int batch_size, const int *map_size, const float thr,
const float *data_in_l, const float *data_in_b, const float *data_in_l_pool, float *data_out_b, float *data_out_s, cudaStream_t stream);
using namespace nvinfer1;
class BoxDecodingLayer : public IPluginV2
{
public:
BoxDecodingLayer(const float score_threshold, const int map_height, const int map_width, const int map_depth)
{
scoreThreshold = score_threshold;
mapHeight = map_height;
mapWidth = map_width;
mapDepth = map_depth;
}
BoxDecodingLayer(const void* data, size_t length)
{
const char* d = static_cast<const char*>(data);
scoreThreshold = read<float>(d);
mapHeight = read<int>(d);
mapWidth = read<int>(d);
mapDepth = read<int>(d);
}
// It makes no sense to construct UffPoolPluginV2 without arguments.
BoxDecodingLayer() = delete;
virtual ~BoxDecodingLayer() {}
int getNbOutputs() const override
{
return 2;
}
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
{
assert(nbInputDims == 3);
assert(inputs[0].nbDims == 3);
assert(inputs[1].nbDims == 3);
assert(inputs[2].nbDims == 3);
if (index == 0) // boxes
return DimsCHW(mapHeight * mapWidth, mapDepth, 4);
if (index == 1) // scores
return Dims2(mapHeight * mapWidth, mapDepth);
return DimsCHW(mapHeight * mapWidth, mapDepth, 4);
}
int initialize() override { return 0; }
void terminate() override { ; }
size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }
int enqueue(int batch_size, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override
{
int map_size[] {mapHeight, mapWidth, mapDepth};
float thr = scoreThreshold;
float *data_in_l = (float*)inputs[0];
float *data_in_b = (float*)inputs[1];
float *data_in_l_pool = (float*)inputs[2];
float *data_out_b = (float*)outputs[0];
float *data_out_s = (float*)outputs[1];
boxDecoderLauncher(batch_size, map_size, thr, data_in_l, data_in_b, data_in_l_pool, data_out_b, data_out_s, stream);
return 0;
}
size_t getSerializationSize() const { return sizeof(float) + 3 * sizeof(int); }
void serialize(void* buffer) const
{
char *d = reinterpret_cast<char*>(buffer);
write(d, scoreThreshold);
write(d, mapHeight);
write(d, mapWidth);
write(d, mapDepth);
}
void configureWithFormat(const Dims* inputs, int nbInputs, const Dims* outputDims, int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) override
{
assert(nbOutputs == 2);
assert(inputs[0].nbDims == 3);
assert(inputs[1].nbDims == 3);
assert(inputs[2].nbDims == 3);
for (int i = 0; i < nbInputs; ++i)
{
assert(inputs[i].d[1] == mapHeight);
assert(inputs[i].d[2] == mapWidth);
}
}
bool supportsFormat(DataType type, PluginFormat format) const override { return (type == DataType::kFLOAT && format == PluginFormat::kNCHW); }
const char* getPluginType() const override { return "BoxDecoding_TRT"; }
const char* getPluginVersion() const override { return "1"; }
void destroy() override { delete this; }
IPluginV2* clone() const { return new BoxDecodingLayer(scoreThreshold, mapHeight, mapWidth, mapDepth); }
void setPluginNamespace(const char* libNamespace) override { mNamespace = libNamespace; }
const char* getPluginNamespace() const override { return mNamespace.c_str(); }
private:
template <typename T>
void write(char*& buffer, const T& val) const
{
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}
template <typename T>
T read(const char*& buffer)
{
T val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
return val;
}
float scoreThreshold;
int mapHeight;
int mapWidth;
int mapDepth;
std::string mNamespace;
};
namespace
{
const char* BOXDECODINGLAYER_PLUGIN_VERSION{"1"};
const char* BOXDECODINGLAYER_PLUGIN_NAME{"BoxDecoding_TRT"};
} // namespace
class BoxDecodingLayerPluginCreator : public IPluginCreator
{
public:
BoxDecodingLayerPluginCreator()
{
mPluginAttributes.emplace_back(PluginField("score_threshold", nullptr, PluginFieldType::kFLOAT32, 1));
mPluginAttributes.emplace_back(PluginField("map_height", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("map_width", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("map_depth", nullptr, PluginFieldType::kINT32, 1));
mFC.nbFields = mPluginAttributes.size();
mFC.fields = mPluginAttributes.data();
}
~BoxDecodingLayerPluginCreator() {}
const char* getPluginName() const override { return BOXDECODINGLAYER_PLUGIN_NAME; }
const char* getPluginVersion() const override { return BOXDECODINGLAYER_PLUGIN_VERSION; }
const PluginFieldCollection* getFieldNames() override { return &mFC; }
IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) override
{
const PluginField* fields = fc->fields;
for (int i = 0; i < fc->nbFields; ++i)
{
const char* attrName = fields[i].name;
if (!strcmp(attrName, "score_threshold"))
{
assert(fields[i].type == PluginFieldType::kFLOAT32);
scoreThreshold = *(static_cast<const float*>(fields[i].data));
}
if (!strcmp(attrName, "map_height"))
{
assert(fields[i].type == PluginFieldType::kINT32);
mapHeight = *(static_cast<const int*>(fields[i].data));
}
if (!strcmp(attrName, "map_width"))
{
assert(fields[i].type == PluginFieldType::kINT32);
mapWidth = *(static_cast<const int*>(fields[i].data));
}
if (!strcmp(attrName, "map_depth"))
{
assert(fields[i].type == PluginFieldType::kINT32);
mapDepth = *(static_cast<const int*>(fields[i].data));
}
}
return new BoxDecodingLayer(scoreThreshold, mapHeight, mapWidth, mapDepth);
}
IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override
{
auto plugin = new BoxDecodingLayer(serialData, serialLength);
mPluginName = name;
return plugin;
}
void setPluginNamespace(const char* libNamespace) override { mNamespace = libNamespace; }
const char* getPluginNamespace() const override { return mNamespace.c_str(); }
private:
float scoreThreshold;
int mapHeight;
int mapWidth;
int mapDepth;
std::string mNamespace;
std::string mPluginName;
std::vector<PluginField> mPluginAttributes;
PluginFieldCollection mFC;
};
REGISTER_TENSORRT_PLUGIN(BoxDecodingLayerPluginCreator);