Environment
TensorRT Version: 7.1.3-1
GPU Type: GTX1060Ti
Nvidia Driver Version: 440.64
CUDA Version: cuda10.2
CUDNN Version: 8.0.0.180-1
Operating System + Version: ubuntu18.04
Description
When using my plugin to convert fp16 engine, I met
Signal: SIGABRT (Aborted)
setFlag(nvinfer1::BuilderFlag::kFP16)
successfully convert to fp16
setFp16Mode(true)
failed to convert to fp16
Code as following:
// trt_.builder_ is with a type of nvinfer1::IBuilder*
nvinfer1::IBuilderConfig* config = trt_.builder_->createBuilderConfig();
config->setMaxWorkspaceSize(MAX_WORKSPACE_SIZE);
if (dataType == nvinfer1::DataType::kHALF)
{
// 1. successfully convert fp16
config->setFlag(nvinfer1::BuilderFlag::kFP16);
// 2. failed to convert fp16
trt_.builder_->setFp16Mode(true);
}
My plugin is used to copy half amount of data of Input layer as output. In configureWithFormat
, I found the dimension of inputDims is different between the two kinds of converting to fp16 above. And there are 3 layers of my pulgin, so the print info have 3 times.
// Configure the layer with input and output data types.
void ChunkV2::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs,
DataType type, PluginFormat format, int maxBatchSize)
{
_n_size_split = inputDims->d[0] / 2 * inputDims->d[1] * inputDims->d[2] *sizeof(float);
std::cerr << inputDims->d[0] << " " << inputDims->d[1] << " " << inputDims->d[2] << std::endl;
std::cerr << "size of split is " << _n_size_split << std::endl;
}
Print info as following:
// print info of correct dimension
//1. setFlag(nvinfer1::BuilderFlag::kFP16)
64 152 152
size of split is 2957312
128 76 76
size of split is 1478656
256 38 38
size of split is 739328
// print info of wrong dimension
//2. trt_.builder_->setFp16Mode(true)
64 152 152
size of split is 2957312
64 152 152 //difference
size of split is 2957312 //difference
Process finished with exit code 134 (interrupted by signal 6: SIGABRT)
plugin.hpp
Click to expand
//plugin.hpp
#ifndef ChunkV2_H_
#define ChunkV2_H_
#include <string>
#include <vector>
#include "NvInfer.h"
namespace nvinfer1
{
class ChunkV2 : public IPluginV2
{
public:
ChunkV2();
ChunkV2(const void* buffer, size_t length);
~ChunkV2()final;
const char* getPluginType() const override;
const char* getPluginVersion() const override;
int32_t getNbOutputs () const override;
Dims getOutputDimensions (int32_t index, const Dims *inputs, int32_t nbInputDims) override;
bool supportsFormat (DataType type, PluginFormat format)const override;
void configureWithFormat (const Dims *inputDims, int32_t nbInputs, const Dims *outputDims,
int32_t nbOutputs, DataType type, PluginFormat format, int32_t maxBatchSize) override;
int32_t initialize() override;
void terminate() override;
size_t getWorkspaceSize(int maxBatchSize) const override;
int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
size_t getSerializationSize() const override;
void serialize(void* buffer) const override;
void destroy() override;
IPluginV2 * clone() const override;
void setPluginNamespace(const char* pluginNamespace) override;
const char* getPluginNamespace() const override;
private:
std::string _s_plugin_namespace;
int _n_size_split;
};
class ChunkV2PluginCreator : public IPluginCreator
{
public:
ChunkV2PluginCreator();
~ChunkV2PluginCreator() override = default;
const char* getPluginName()const override;
const char* getPluginVersion() const override;
const PluginFieldCollection* getFieldNames() override;
IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) override;
IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
void setPluginNamespace(const char* libNamespace) override;
const char* getPluginNamespace() const override;
private:
std::string _s_name_space;
static PluginFieldCollection _fc;
static std::vector<PluginField> _vec_plugin_attributes;
};
};//nampespace nvinfer1
#endif
plugin.cpp
Click to expand
//plugin.cpp
#include <cmath>
#include <stdio.h>
#include <cassert>
#include <iostream>
#include "chunk_V2.h"
#include <cuda_runtime.h>
#include "cuda_utils.hpp"
namespace nvinfer1
{
ChunkV2::ChunkV2()
{
}
ChunkV2::ChunkV2(const void* buffer, size_t size)
{
assert(size == sizeof(_n_size_split));
_n_size_split = *reinterpret_cast<const int*>(buffer);
}
ChunkV2::~ChunkV2()
{
}
int ChunkV2::getNbOutputs() const
{
return 2;
}
Dims ChunkV2::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
{
assert(nbInputDims == 1);
assert(index == 0 || index == 1);
return Dims3(inputs[0].d[0] / 2, inputs[0].d[1], inputs[0].d[2]);
}
int ChunkV2::initialize()
{
return 0;
}
void ChunkV2::terminate()
{
}
size_t ChunkV2::getWorkspaceSize(int maxBatchSize) const
{
return 0;
}
size_t ChunkV2::getSerializationSize() const
{
return sizeof(_n_size_split);
}
void ChunkV2::serialize(void *buffer)const
{
*reinterpret_cast<int*>(buffer) = _n_size_split;
}
bool ChunkV2::supportsFormat(DataType type, PluginFormat format) const
{
return ((type == DataType::kFLOAT || type == DataType::kHALF || type == DataType::kINT8) &&
(format == PluginFormat::kNCHW));
}
// Set plugin namespace
void ChunkV2::setPluginNamespace(const char* pluginNamespace)
{
_s_plugin_namespace = pluginNamespace;
}
const char* ChunkV2::getPluginNamespace() const
{
return _s_plugin_namespace.c_str();
}
// Configure the layer with input and output data types.
void ChunkV2::configureWithFormat(
const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs,
DataType type, PluginFormat format, int maxBatchSize){
_n_size_split = inputDims->d[0] / 2 * inputDims->d[1] * inputDims->d[2] *sizeof(float);
std::cerr << inputDims->d[0] << " " << inputDims->d[1] << " " << inputDims->d[2] << std::endl;
std::cerr << "size of split is " << _n_size_split << std::endl;
ASSERT(format == PluginFormat::kNCHW);
ASSERT(type == DataType::kFLOAT || type == DataType::kHALF);
//mDataType = type;
ASSERT(inputDims[0].nbDims >= 1); // number of dimensions of the input tensor must be >=1
}
const char* ChunkV2::getPluginType()const
{
return "CHUNK_TRT";
}
const char* ChunkV2::getPluginVersion() const
{
return "1";
}
void ChunkV2::destroy()
{
delete this;
}
// Clone the plugin
IPluginV2* ChunkV2::clone() const
{
ChunkV2 *p = new ChunkV2();
p->_n_size_split = _n_size_split;
p->setPluginNamespace(_s_plugin_namespace.c_str());
return p;
}
int ChunkV2::enqueue(int batchSize,
const void* const* inputs,
void** outputs,
void* workspace,
cudaStream_t stream)
{
for (int b = 0; b < batchSize; ++b)
{
NV_CUDA_CHECK(cudaMemcpy((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice));
NV_CUDA_CHECK(cudaMemcpy((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice));
}
return 0;
}
PluginFieldCollection ChunkV2PluginCreator::_fc{};
std::vector<PluginField> ChunkV2PluginCreator::_vec_plugin_attributes;
ChunkV2PluginCreator::ChunkV2PluginCreator()
{
_vec_plugin_attributes.clear();
_fc.nbFields = _vec_plugin_attributes.size();
_fc.fields = _vec_plugin_attributes.data();
}
const char* ChunkV2PluginCreator::getPluginName() const
{
return "CHUNK_TRT";
}
const char* ChunkV2PluginCreator::getPluginVersion() const
{
return "1";
}
const PluginFieldCollection* ChunkV2PluginCreator::getFieldNames()
{
return &_fc;
}
IPluginV2* ChunkV2PluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
{
ChunkV2* obj = new ChunkV2();
obj->setPluginNamespace(_s_name_space.c_str());
return obj;
}
IPluginV2* ChunkV2PluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
{
ChunkV2* obj = new ChunkV2(serialData,serialLength);
obj->setPluginNamespace(_s_name_space.c_str());
return obj;
}
void ChunkV2PluginCreator::setPluginNamespace(const char* libNamespace)
{
_s_name_space = libNamespace;
}
const char* ChunkV2PluginCreator::getPluginNamespace() const
{
return _s_name_space.c_str();
}
REGISTER_TENSORRT_PLUGIN(ChunkV2PluginCreator);
}//namespace nvinfer1
Much appreciate!