Failed to convert engine to fp16, by using setFp16Mode

291087211 · September 29, 2020, 7:55am

Environment

TensorRT Version: 7.1.3-1
GPU Type: GTX1060Ti
Nvidia Driver Version: 440.64
CUDA Version: cuda10.2
CUDNN Version: 8.0.0.180-1
Operating System + Version: ubuntu18.04

Description

When using my plugin to convert fp16 engine, I met

Signal: SIGABRT (Aborted)

setFlag(nvinfer1::BuilderFlag::kFP16) successfully convert to fp16
setFp16Mode(true) failed to convert to fp16
Code as following:

// trt_.builder_ is with a  type of nvinfer1::IBuilder*
nvinfer1::IBuilderConfig* config = trt_.builder_->createBuilderConfig();
config->setMaxWorkspaceSize(MAX_WORKSPACE_SIZE);

if (dataType == nvinfer1::DataType::kHALF)
{
// 1. successfully convert fp16
    config->setFlag(nvinfer1::BuilderFlag::kFP16);

// 2. failed to convert fp16
    trt_.builder_->setFp16Mode(true);
}

My plugin is used to copy half amount of data of Input layer as output. In configureWithFormat, I found the dimension of inputDims is different between the two kinds of converting to fp16 above. And there are 3 layers of my pulgin, so the print info have 3 times.

	// Configure the layer with input and output data types.
void ChunkV2::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs,
			DataType type, PluginFormat format, int maxBatchSize)
{
	_n_size_split = inputDims->d[0] / 2 * inputDims->d[1] * inputDims->d[2] *sizeof(float);
	std::cerr << inputDims->d[0] << " " << inputDims->d[1] << " " << inputDims->d[2] << std::endl;
    std::cerr << "size of split is " <<  _n_size_split << std::endl;
}

Print info as following:

// print info of correct dimension
//1. setFlag(nvinfer1::BuilderFlag::kFP16)
64 152 152
size of split is 2957312
128 76 76
size of split is 1478656
256 38 38
size of split is 739328

// print info of wrong dimension
//2.  trt_.builder_->setFp16Mode(true)
64 152 152
size of split is 2957312
64 152 152 //difference
size of split is 2957312 //difference

Process finished with exit code 134 (interrupted by signal 6: SIGABRT)

plugin.hpp

Click to expand

//plugin.hpp
#ifndef ChunkV2_H_
#define ChunkV2_H_

#include <string>
#include <vector>
#include "NvInfer.h"

namespace nvinfer1
{
	class ChunkV2 : public IPluginV2
	{
	public:
		ChunkV2();
		ChunkV2(const void* buffer, size_t length);
		~ChunkV2()final;

        const char* getPluginType() const override;
        const char* getPluginVersion() const override;
        int32_t	getNbOutputs () const override;
		Dims getOutputDimensions (int32_t index, const Dims *inputs, int32_t nbInputDims) override;
        bool supportsFormat (DataType type, PluginFormat format)const override;
        void configureWithFormat (const Dims *inputDims, int32_t nbInputs, const Dims *outputDims,
                int32_t nbOutputs, DataType type, PluginFormat format, int32_t maxBatchSize) override;
        int32_t initialize() override;
		void terminate() override;
		size_t getWorkspaceSize(int maxBatchSize) const override;
        int32_t	enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
		size_t getSerializationSize() const override;
		void serialize(void* buffer) const override;
		void destroy() override;
        IPluginV2 * clone() const override;

        void setPluginNamespace(const char* pluginNamespace) override;
		const char* getPluginNamespace() const override;
	private:
		std::string _s_plugin_namespace;
		int _n_size_split;
	};


    class ChunkV2PluginCreator : public IPluginCreator
    {
    public:
        ChunkV2PluginCreator();
        ~ChunkV2PluginCreator() override = default;
        const char* getPluginName()const override;
        const char* getPluginVersion() const override;
        const PluginFieldCollection* getFieldNames() override;
		IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) override;
		IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
        void setPluginNamespace(const char* libNamespace) override;
        const char* getPluginNamespace() const override;
    private:
        std::string _s_name_space;
        static PluginFieldCollection _fc;
        static std::vector<PluginField> _vec_plugin_attributes;
    };
};//nampespace nvinfer1

#endif

plugin.cpp

Click to expand

//plugin.cpp
#include <cmath>
#include <stdio.h>
#include <cassert>
#include <iostream>
#include "chunk_V2.h"
#include <cuda_runtime.h>
#include "cuda_utils.hpp"

namespace nvinfer1
{
	ChunkV2::ChunkV2()
	{

	}
	ChunkV2::ChunkV2(const void* buffer, size_t size) 
	{
		assert(size == sizeof(_n_size_split));
		_n_size_split = *reinterpret_cast<const int*>(buffer);
	}
	ChunkV2::~ChunkV2()
	{

	}

	int ChunkV2::getNbOutputs() const
	{
		return 2;
	}

	Dims ChunkV2::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
	{
		assert(nbInputDims == 1);
		assert(index == 0 || index == 1);
		return Dims3(inputs[0].d[0] / 2, inputs[0].d[1], inputs[0].d[2]);
	}

	int ChunkV2::initialize()
	{
		return 0;
	}

	void ChunkV2::terminate()
	{
	}

	size_t ChunkV2::getWorkspaceSize(int maxBatchSize) const
	{
		return 0;
	}

	size_t ChunkV2::getSerializationSize() const
	{
		return sizeof(_n_size_split);
	}

	void ChunkV2::serialize(void *buffer)const
	{
		*reinterpret_cast<int*>(buffer) = _n_size_split;
	}

	bool ChunkV2::supportsFormat(DataType type, PluginFormat format) const
	{
		return ((type == DataType::kFLOAT || type == DataType::kHALF || type == DataType::kINT8) &&
				(format == PluginFormat::kNCHW));
	}

	// Set plugin namespace
	void ChunkV2::setPluginNamespace(const char* pluginNamespace)
	{
		_s_plugin_namespace = pluginNamespace;
	}

	const char* ChunkV2::getPluginNamespace() const
	{
		return _s_plugin_namespace.c_str();
	}

	// Configure the layer with input and output data types.
	void ChunkV2::configureWithFormat(
			const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs,
			DataType type, PluginFormat format, int maxBatchSize){
		_n_size_split = inputDims->d[0] / 2 * inputDims->d[1] * inputDims->d[2] *sizeof(float);
		std::cerr << inputDims->d[0] << " " << inputDims->d[1] << " " << inputDims->d[2] << std::endl;
        std::cerr << "size of split is " <<  _n_size_split << std::endl;
		ASSERT(format == PluginFormat::kNCHW);
		ASSERT(type == DataType::kFLOAT || type == DataType::kHALF);
		//mDataType = type;
		ASSERT(inputDims[0].nbDims >= 1); // number of dimensions of the input tensor must be >=1

	}

	const char* ChunkV2::getPluginType()const
	{
		return "CHUNK_TRT";
	}

	const char* ChunkV2::getPluginVersion() const
	{
		return "1";
	}

	void ChunkV2::destroy()
	{
		delete this;
	}


	// Clone the plugin
	IPluginV2* ChunkV2::clone() const
	{
		ChunkV2 *p = new ChunkV2();
		p->_n_size_split = _n_size_split;
		p->setPluginNamespace(_s_plugin_namespace.c_str());
		return p;
	}

	int ChunkV2::enqueue(int batchSize,
		const void* const* inputs,
		void** outputs,
		void* workspace,
		cudaStream_t stream)
	{
		for (int b = 0; b < batchSize; ++b)
		{
			NV_CUDA_CHECK(cudaMemcpy((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice));
			NV_CUDA_CHECK(cudaMemcpy((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice));
		}
		return 0;
	}


    PluginFieldCollection ChunkV2PluginCreator::_fc{};
    std::vector<PluginField> ChunkV2PluginCreator::_vec_plugin_attributes;

    ChunkV2PluginCreator::ChunkV2PluginCreator()
    {
        _vec_plugin_attributes.clear();
        _fc.nbFields = _vec_plugin_attributes.size();
        _fc.fields = _vec_plugin_attributes.data();
    }

    const char* ChunkV2PluginCreator::getPluginName() const
    {
        return "CHUNK_TRT";
    }

    const char* ChunkV2PluginCreator::getPluginVersion() const
    {
        return "1";
    }

    const PluginFieldCollection* ChunkV2PluginCreator::getFieldNames()
    {
        return &_fc;
    }

	IPluginV2* ChunkV2PluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
    {
		ChunkV2* obj = new ChunkV2();
        obj->setPluginNamespace(_s_name_space.c_str());
        return obj;
    }

	IPluginV2* ChunkV2PluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
    {
		ChunkV2* obj = new ChunkV2(serialData,serialLength);
        obj->setPluginNamespace(_s_name_space.c_str());
        return obj;
    }

    void ChunkV2PluginCreator::setPluginNamespace(const char* libNamespace)
    {
        _s_name_space = libNamespace;
    }

    const char* ChunkV2PluginCreator::getPluginNamespace() const
    {
        return _s_name_space.c_str();
    }

	REGISTER_TENSORRT_PLUGIN(ChunkV2PluginCreator);
}//namespace nvinfer1

Much appreciate!

SunilJB · September 30, 2020, 10:38am

Hi @291087211,
When using buildEngineWithConfig you must use config to setup all builder configuration.
Also setFp16Mode is a deprecate API.

Thanks

291087211 · October 9, 2020, 8:01am

With Trt5,

If I use buildCudaEngine, and setFPmode(true), as following:

trt_.engine_ = trt_.builder_->buildCudaEngine(*trt_.network_);

there are also an error:

ERROR: terminate called after throwing an instance of 'nvinfer1::CudaError'
  what():  std::exception

AakankshaS · October 20, 2020, 4:47pm

Hi @291087211,
TRT 5 support has been deprecated, hence we recommend you to use the latest TRT versions for better support.
Thanks!