Hi, I want to use “addPlugin” API in TensorRT. I’ve tried but a bus error had been occurred. Anyone can use this API? my source code is here. I used nVidia Jetson TX2 and TensorRT 4.0.
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <random>
#include <cmath>
#include <time.h>
#include <iostream>
#include <cuda_runtime_api.h>
#include <NvInfer.h>
#include <NvUtils.h>
using namespace nvinfer1;
class Logger: public nvinfer1::ILogger{
public:
void log(nvinfer1::ILogger::Severity severity, const char* msg) override{
if (severity == Severity::kINFO) return; // Suppress info-level messages
switch (severity){
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
case Severity::kERROR: std::cerr << "ERROR: "; break;
case Severity::kWARNING: std::cerr << "WARNING: "; break;
case Severity::kINFO: std::cerr << "INFO: "; break;
default: std::cerr << "UNKNOWN: "; break;
}
std::cerr << msg << std::endl;
}
} gLogger;
class testLayer : public IPlugin
{
public:
testLayer() {;};
testLayer(const void* buffer, size_t size) {;};
inline int getNbOutputs() const override {return 1;};
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override {return DimsCHW(1, 1, 1);};
void configure(const Dims* inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override {;};
inline size_t getWorkspaceSize(int) const override {return 0;};
int initialize() override {return 0;};
int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override {return 0;};
void terminate() override {;};
size_t getSerializationSize() override {return 0;};
void serialize(void* buffer) override {;};
};
void fc_network(INetworkDefinition* network){
float weight_array[10*10];
Weights weights{DataType::kFLOAT, weight_array, 10 * 1 * 1 * 10};
Weights bias;
bias = Weights{DataType::kFLOAT, nullptr, 0};
auto input = network->addInput("input", DataType::kFLOAT, DimsCHW{1, 1, 10});
auto fc_1 = network->addFullyConnected(*input, 10, weights, bias);
testLayer testLayer1;
auto plugin1 = network->addPlugin(reinterpret_cast<ITensor* const*>(input), 1, *reinterpret_cast<IPlugin*>(&testLayer1));
fc_1->getOutput(0)->setName("API");
network->markOutput(*fc_1->getOutput(0));
plugin1->getOutput(0)->setName("Plugin");
network->markOutput(*plugin1->getOutput(0));
}
int main(){
float* input = new float[10];
float output_api [10];
float output_plugin [1];
void *inputDevice;
void *outputDevice_api;
void *outputDevice_plugin;
cudaMalloc(&inputDevice, 10 * sizeof(float));
cudaMalloc(&outputDevice_api, 10 * sizeof(float));
cudaMalloc(&outputDevice_plugin, 1 * sizeof(float));
IBuilder* builder = createInferBuilder(gLogger);
builder->setMaxBatchSize(1);
builder->setMaxWorkspaceSize(1 << 20);
INetworkDefinition* network = builder->createNetwork();
fc_network(network);
auto engine = builder->buildCudaEngine(*network);
IExecutionContext* context = engine->createExecutionContext();
cudaMemcpy(inputDevice, input, 10 * sizeof(float), cudaMemcpyHostToDevice);
context->execute(1, &inputDevice);
cudaMemcpy(&output_api, outputDevice_api, 10 * sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(&output_plugin, outputDevice_plugin, 1 * sizeof(float), cudaMemcpyDeviceToHost);
context->destroy();
engine->destroy();
network->destroy();
builder->destroy();
delete[] input;
cudaFree(inputDevice);
cudaFree(outputDevice_api);
cudaFree(outputDevice_plugin);
return 0;
}