A bus error had been occurred while I use addPlugin API in TensorRT

Hi, I want to use “addPlugin” API in TensorRT. I’ve tried but a bus error had been occurred. Anyone can use this API? my source code is here. I used nVidia Jetson TX2 and TensorRT 4.0.

#include <stdio.h>
    #include <stdlib.h>
    #include <vector>
    #include <random>
    #include <cmath>
    #include <time.h>
    #include <iostream>
    #include <cuda_runtime_api.h>
    #include <NvInfer.h>
    #include <NvUtils.h>
    
    using namespace nvinfer1;
    
    class Logger: public nvinfer1::ILogger{
    	public:
    	void log(nvinfer1::ILogger::Severity severity, const char* msg) override{
    		if (severity == Severity::kINFO) return; // Suppress info-level messages
    		switch (severity){
    		    case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
    		    case Severity::kERROR: std::cerr << "ERROR: "; break;
    		    case Severity::kWARNING: std::cerr << "WARNING: "; break;
    		    case Severity::kINFO: std::cerr << "INFO: "; break;
    		    default: std::cerr << "UNKNOWN: "; break;
    		}
    		std::cerr << msg << std::endl;
    	}
    } gLogger;
    
    class testLayer : public IPlugin
    {
    public:
    	testLayer() {;};
    	testLayer(const void* buffer, size_t size) {;};
    
    	inline int getNbOutputs() const override {return 1;};
    	Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override {return DimsCHW(1, 1, 1);};
    
    	void configure(const Dims* inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override {;};
    	inline size_t getWorkspaceSize(int) const override {return 0;};
    
    	int initialize() override {return 0;};
    	int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override {return 0;};
    	void terminate() override {;};
    	
    	size_t getSerializationSize() override {return 0;};
    	void serialize(void* buffer) override {;};
    };
    
    void fc_network(INetworkDefinition* network){
    	float weight_array[10*10];
    	Weights weights{DataType::kFLOAT, weight_array, 10 * 1 * 1 * 10};
    	Weights bias;
    	bias = Weights{DataType::kFLOAT, nullptr, 0};
    
    	auto input = network->addInput("input", DataType::kFLOAT, DimsCHW{1, 1, 10});	
    	auto fc_1 = network->addFullyConnected(*input, 10, weights, bias);
    
    	testLayer testLayer1;
    	auto plugin1 = network->addPlugin(reinterpret_cast<ITensor* const*>(input), 1, *reinterpret_cast<IPlugin*>(&testLayer1));
    
    	fc_1->getOutput(0)->setName("API");
    	network->markOutput(*fc_1->getOutput(0));
    	plugin1->getOutput(0)->setName("Plugin");
    	network->markOutput(*plugin1->getOutput(0));
    }
    
    int main(){
    	float* input = new float[10];
    	float output_api [10];
    	float output_plugin [1];
    
    	void *inputDevice;
    	void *outputDevice_api;
    	void *outputDevice_plugin;
    	cudaMalloc(&inputDevice, 10 * sizeof(float));
    	cudaMalloc(&outputDevice_api, 10 * sizeof(float));
    	cudaMalloc(&outputDevice_plugin, 1 * sizeof(float));
    
    	IBuilder* builder = createInferBuilder(gLogger);
    	builder->setMaxBatchSize(1);
    	builder->setMaxWorkspaceSize(1 << 20);
    	INetworkDefinition* network = builder->createNetwork();
    	fc_network(network);
    	auto engine = builder->buildCudaEngine(*network);
    	IExecutionContext* context = engine->createExecutionContext();
    
    	cudaMemcpy(inputDevice, input, 10 * sizeof(float), cudaMemcpyHostToDevice);
    	context->execute(1, &inputDevice);
    	cudaMemcpy(&output_api, outputDevice_api, 10 * sizeof(float), cudaMemcpyDeviceToHost);
    	cudaMemcpy(&output_plugin, outputDevice_plugin, 1 * sizeof(float), cudaMemcpyDeviceToHost);
    
    	context->destroy();
    	engine->destroy();
    	network->destroy();
    	builder->destroy();
    	delete[] input;
    	cudaFree(inputDevice);
    	cudaFree(outputDevice_api);
    	cudaFree(outputDevice_plugin);
    
    	return 0;
    }

Please reference https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/tensorrt_401/tensorrt-developer-guide/index.html#add_custom_layer

With TRT4, although users extended the IPlugin class in previous versions of TensorRT, it is now recommended that users extend IPluginExt, which includes versioning (to maintain plugin portability in future versions of TensorRT) and enables custom layers that support other data formats beside NCHW and single precision.