Serialized engine contains plugin, but no plugin factory was provided

y14uc339 · June 5, 2020, 10:41am

Please provide complete information as applicable to your setup.

• Hardware Platform (Jetson / GPU) → dGPU aws T4
• DeepStream Version - 5.0
• TensorRT Version → 7+
• NVIDIA GPU Driver Version (valid for GPU only) → 440.82

I have created TensorRT engine using tensorRT API. And was trying to use the engine file to do inference on RetinaFace detection. Below is my config file:

[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
model-engine-file=../../../samples/models/RetinaFace_Detector/retina_r50.engine

# create labels file
# labelfile-path=./labels.txt
batch-size=1
model-color-format=0
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=1
interval=0
gie-unique-id=1
is-classifier=0
custom-lib-path=./libdecodeplugin.so

[class-attrs-all]
threshold=0.2
group-threshold=1
## Set eps=0.7 and minBoxes for enable-dbscan=1
eps=0.2
#minBoxes=3
roi-top-offset=0
roi-bottom-offset=0
detected-min-w=0
detected-min-h=0
detected-max-w=1920
detected-max-h=1080

And this is the custom plugin that I defined while creating tensorRT engine file:

#ifndef _DECODE_CU_H
#define _DECODE_CU_H

#include "decode.h"
#include "stdio.h"

namespace nvinfer1
{
    DecodePlugin::DecodePlugin()
    {
    }

    DecodePlugin::~DecodePlugin()
    {
    }

    // create the plugin at runtime from a byte stream
    DecodePlugin::DecodePlugin(const void* data, size_t length)
    {
    }

    void DecodePlugin::serialize(void* buffer) const
    {
    }

    size_t DecodePlugin::getSerializationSize() const
    {  
        return 0;
    }

    int DecodePlugin::initialize()
    { 
        return 0;
    }

    Dims DecodePlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
    {
        //output the result to channel
        int totalCount = 0;
        totalCount += decodeplugin::INPUT_H / 8 * decodeplugin::INPUT_W / 8 * 2 * sizeof(decodeplugin::Detection) / sizeof(float);
        totalCount += decodeplugin::INPUT_H / 16 * decodeplugin::INPUT_W / 16 * 2 * sizeof(decodeplugin::Detection) / sizeof(float);
        totalCount += decodeplugin::INPUT_H / 32 * decodeplugin::INPUT_W / 32 * 2 * sizeof(decodeplugin::Detection) / sizeof(float);

        return Dims3(totalCount + 1, 1, 1);
    }

    // Set plugin namespace
    void DecodePlugin::setPluginNamespace(const char* pluginNamespace)
    {
        mPluginNamespace = pluginNamespace;
    }

    const char* DecodePlugin::getPluginNamespace() const
    {
        return mPluginNamespace;
    }

    // Return the DataType of the plugin output at the requested index
    DataType DecodePlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
    {
        return DataType::kFLOAT;
    }

    // Return true if output tensor is broadcast across a batch.
    bool DecodePlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
    {
        return false;
    }

    // Return true if plugin can use input that is broadcast across batch without replication.
    bool DecodePlugin::canBroadcastInputAcrossBatch(int inputIndex) const
    {
        return false;
    }

    void DecodePlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
    {
    }

    // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
    void DecodePlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
    {
    }

    // Detach the plugin object from its execution context.
    void DecodePlugin::detachFromContext() {}

    const char* DecodePlugin::getPluginType() const
    {
        return "Decode_TRT";
    }

    const char* DecodePlugin::getPluginVersion() const
    {
        return "1";
    }

    void DecodePlugin::destroy()
    {
        delete this;
    }

    // Clone the plugin
    IPluginV2IOExt* DecodePlugin::clone() const
    {
        DecodePlugin *p = new DecodePlugin();
        p->setPluginNamespace(mPluginNamespace);
        return p;
    }

    __device__ float Logist(float data){ return 1./(1. + expf(-data)); };

    __global__ void CalDetection(const float *input, float *output, int num_elem, int step, int anchor) {
    
        int idx = threadIdx.x + blockDim.x * blockIdx.x;
        if (idx >= num_elem) return;

        int h = decodeplugin::INPUT_H / step;
        int w = decodeplugin::INPUT_W / step;
        int y = idx / w;
        int x = idx % w;
        const float *bbox_reg = &input[0];
        const float *cls_reg = &input[2 * 4 * num_elem];
        const float *lmk_reg = &input[2 * 4 * num_elem + 2 * 2 * num_elem];

        for (int k = 0; k < 2; ++k) {
            float conf1 = cls_reg[idx + k * num_elem * 2];
            float conf2 = cls_reg[idx + k * num_elem * 2 + num_elem];
            conf2 = expf(conf2) / (expf(conf1) + expf(conf2));
            if (conf2 <= 0.02) continue;

            float *res_count = output;
            int count = (int)atomicAdd(res_count, 1);
            char* data = (char *)res_count + sizeof(float) + count * sizeof(decodeplugin::Detection);
            decodeplugin::Detection* det = (decodeplugin::Detection*)(data);

            float prior[4];
            prior[0] = ((float)x + 0.5) / w;
            prior[1] = ((float)y + 0.5) / h;
            prior[2] = (float)anchor * (k + 1) / decodeplugin::INPUT_W;
            prior[3] = (float)anchor * (k + 1) / decodeplugin::INPUT_H;

            //Location
            det->bbox[0] = prior[0] + bbox_reg[idx + k * num_elem * 4] * 0.1 * prior[2];
            det->bbox[1] = prior[1] + bbox_reg[idx + k * num_elem * 4 + num_elem] * 0.1 * prior[3];
            det->bbox[2] = prior[2] * expf(bbox_reg[idx + k * num_elem * 4 + num_elem * 2] * 0.2);
            det->bbox[3] = prior[3] * expf(bbox_reg[idx + k * num_elem * 4 + num_elem * 3] * 0.2);
            det->bbox[0] -= det->bbox[2] / 2;
            det->bbox[1] -= det->bbox[3] / 2;
            det->bbox[2] += det->bbox[0];
            det->bbox[3] += det->bbox[1];
            det->bbox[0] *= decodeplugin::INPUT_W;
            det->bbox[1] *= decodeplugin::INPUT_H;
            det->bbox[2] *= decodeplugin::INPUT_W;
            det->bbox[3] *= decodeplugin::INPUT_H;
            det->class_confidence = conf2;
            for (int i = 0; i < 10; i += 2) {
                det->landmark[i] = prior[0] + lmk_reg[idx + k * num_elem * 10 + num_elem * i] * 0.1 * prior[2];
                det->landmark[i+1] = prior[1] + lmk_reg[idx + k * num_elem * 10 + num_elem * (i + 1)] * 0.1 * prior[3];
                det->landmark[i] *= decodeplugin::INPUT_W;
                det->landmark[i+1] *= decodeplugin::INPUT_H;
            }
        }
    }

    void DecodePlugin::forwardGpu(const float *const * inputs, float * output, cudaStream_t stream, int batchSize) 
    {
        int num_elem = 0;
        int base_step = 8;
        int base_anchor = 16;
        int thread_count;
        cudaMemset(output, 0, sizeof(float));
        for (unsigned int i = 0; i < 3; ++i)
        {
            num_elem = decodeplugin::INPUT_H / base_step * decodeplugin::INPUT_W / base_step;
            thread_count = (num_elem < thread_count_) ? num_elem : thread_count_;
            CalDetection<<< (num_elem + thread_count - 1) / thread_count, thread_count>>>
                (inputs[i], output, num_elem, base_step, base_anchor);
            base_step *= 2;
            base_anchor *= 4;
        }
    }

    int DecodePlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
    {
        //assert(batchSize == 1);
        //GPU
        //CUDA_CHECK(cudaStreamSynchronize(stream));
        forwardGpu((const float *const *)inputs,(float *)outputs[0],stream,batchSize);

        return 0;
    };

    PluginFieldCollection DecodePluginCreator::mFC{};
    std::vector<PluginField> DecodePluginCreator::mPluginAttributes;

    DecodePluginCreator::DecodePluginCreator()
    {
        mPluginAttributes.clear();

        mFC.nbFields = mPluginAttributes.size();
        mFC.fields = mPluginAttributes.data();
    }

    const char* DecodePluginCreator::getPluginName() const
    {
        return "Decode_TRT";
    }

    const char* DecodePluginCreator::getPluginVersion() const
    {
        return "1";
    }

    const PluginFieldCollection* DecodePluginCreator::getFieldNames()
    {
        return &mFC;
    }

    IPluginV2IOExt* DecodePluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
    {
        DecodePlugin* obj = new DecodePlugin();
        obj->setPluginNamespace(mNamespace.c_str());
        return obj;
    }

    IPluginV2IOExt* DecodePluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
    {
        // This object will be deleted when the network is destroyed, which will
        // call PReluPlugin::destroy()
        DecodePlugin* obj = new DecodePlugin(serialData, serialLength);
        obj->setPluginNamespace(mNamespace.c_str());
        return obj;
    }

}

for which I have a libdecodeplugin.so that I have mentioned in the config file.
Deserializing engine file with this plugin works when I tested in tensorrt docker container but fails when deepstream deserializes using the lib.so liibrary for this plugin. I havent found any resources yet.
How do I include this so that I can deserialize tensorRT engine file successfully in deepstream?
and this is the terminal output:

root@73183ab47661:/opt/nvidia/deepstream/deepstream-5.0/sources/apps/deepstream-retinaface-multistream# ./deepstream-retinaface-multistream-app ../../../samples/streams/sample_720p.mp4 

(gst-plugin-scanner:26): GStreamer-WARNING **: 07:09:47.376: Failed to load plugin '/usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_inferserver.so': libtrtserver.so: cannot open shared object file: No such file or directory
Warn: 'threshold' parameter has been deprecated. Use 'pre-cluster-threshold' instead.
Now playing: ../../../samples/streams/sample_720p.mp4
ERROR: ../nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: INVALID_ARGUMENT: getPluginCreator could not find plugin Decode_TRT version 1
ERROR: ../nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: safeDeserializationUtils.cpp (293) - Serialization Error in load: 0 (Cannot deserialize plugin since corresponding IPluginCreator not found in Plugin Registry)
ERROR: ../nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: INVALID_STATE: std::exception
ERROR: ../nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: INVALID_CONFIG: Deserialize the cuda engine failed.
ERROR: ../nvdsinfer/nvdsinfer_model_builder.cpp:1452 Deserialize engine failed from file: /opt/nvidia/deepstream/deepstream-5.0/sources/apps/deepstream-retinaface-multistream/tensorrt_engines_awsT4/retina_r50.engine
0:00:04.315405593    25 0x5618a017b8d0 WARN                 nvinfer gstnvinfer.cpp:599:gst_nvinfer_logger:<primary-nvinference-engine> NvDsInferContext[UID 1]: Warning from NvDsInferContextImpl::deserializeEngineAndBackend() <nvdsinfer_context_impl.cpp:1566> [UID = 1]: deserialize engine from file :/opt/nvidia/deepstream/deepstream-5.0/sources/apps/deepstream-retinaface-multistream/tensorrt_engines_awsT4/retina_r50.engine failed
0:00:04.315444719    25 0x5618a017b8d0 WARN                 nvinfer gstnvinfer.cpp:599:gst_nvinfer_logger:<primary-nvinference-engine> NvDsInferContext[UID 1]: Warning from NvDsInferContextImpl::generateBackendContext() <nvdsinfer_context_impl.cpp:1673> [UID = 1]: deserialize backend context from engine from file :/opt/nvidia/deepstream/deepstream-5.0/sources/apps/deepstream-retinaface-multistream/tensorrt_engines_awsT4/retina_r50.engine failed, try rebuild
0:00:04.315464105    25 0x5618a017b8d0 INFO                 nvinfer gstnvinfer.cpp:602:gst_nvinfer_logger:<primary-nvinference-engine> NvDsInferContext[UID 1]: Info from NvDsInferContextImpl::buildModel() <nvdsinfer_context_impl.cpp:1591> [UID = 1]: Trying to create engine from model files
ERROR: ../nvdsinfer/nvdsinfer_model_builder.cpp:934 failed to build network since there is no model file matched.
ERROR: ../nvdsinfer/nvdsinfer_model_builder.cpp:872 failed to build network.
0:00:04.315759096    25 0x5618a017b8d0 ERROR                nvinfer gstnvinfer.cpp:596:gst_nvinfer_logger:<primary-nvinference-engine> NvDsInferContext[UID 1]: Error in NvDsInferContextImpl::buildModel() <nvdsinfer_context_impl.cpp:1611> [UID = 1]: build engine file failed
0:00:04.315783396    25 0x5618a017b8d0 ERROR                nvinfer gstnvinfer.cpp:596:gst_nvinfer_logger:<primary-nvinference-engine> NvDsInferContext[UID 1]: Error in NvDsInferContextImpl::generateBackendContext() <nvdsinfer_context_impl.cpp:1697> [UID = 1]: build backend context failed
0:00:04.315800708    25 0x5618a017b8d0 ERROR                nvinfer gstnvinfer.cpp:596:gst_nvinfer_logger:<primary-nvinference-engine> NvDsInferContext[UID 1]: Error in NvDsInferContextImpl::initialize() <nvdsinfer_context_impl.cpp:1024> [UID = 1]: generate backend failed, check config file settings
0:00:04.315966590    25 0x5618a017b8d0 WARN                 nvinfer gstnvinfer.cpp:781:gst_nvinfer_start:<primary-nvinference-engine> error: Failed to create NvDsInferContext instance
0:00:04.315979631    25 0x5618a017b8d0 WARN                 nvinfer gstnvinfer.cpp:781:gst_nvinfer_start:<primary-nvinference-engine> error: Config file path: retinaface_pgie_config.txt, NvDsInfer Error: NVDSINFER_CONFIG_FAILED
Running...
ERROR from element primary-nvinference-engine: Failed to create NvDsInferContext instance
Error details: gstnvinfer.cpp(781): gst_nvinfer_start (): /GstPipeline:dstest1-pipeline/GstNvInfer:primary-nvinference-engine:
Config file path: retinaface_pgie_config.txt, NvDsInfer Error: NVDSINFER_CONFIG_FAILED
Returned, stopping playback
Deleting pipeline

ersheng · June 8, 2020, 8:25am

@y14uc339

There are some items to be clarified.

How did you build your TensorRT engine with the APIs? Do you mind showing me the code? You can just give me the minimum implementation that can reproduce this error.
Could you please show me all source code updates you have done to TensorRT/parsers/ and TensorRT/plugin/ as well as your commands & operations to reproduce this fault step by step?
It would be wonderful that you sent me a packed file including your config files, updated source code files, together with the workspace like deepstream-retinaface-multistream. You can give me a link of one drive, google drive or baidu drive so that I can download.

y14uc339 · June 8, 2020, 8:51am

Hi @ersheng , I dont really understand when you talk about TensorRT/parsers. Everything that I have done is in the below drive link. Yeah below is the drive link with both DeepStream app and RetinaFace Tensorrt that I used to create the serialized engine file. I have placed the engine file and libdecodeplugin.so that were generated during tensorrt serialization inside deepstream app but if you want to generate them again you can always do that using the readme that I have attached with the TensorRT folder. Please let me know if you need any help.

https://drive.google.com/open?id=1Yb_vDTWC2BoFb_TqDcc7XVrOfS6G-F52

To start and reroduce the error in deepstream docker container:

Copy and paste the deepstream app in /opt/nvidia/deepstream/deepstream-5.0/sources/apps
$ make
$ ./deepstream-retinaface-multistream-app …/…/…/samples/streams/sample_720p.mp4

ersheng · June 8, 2020, 8:57am

@y14uc339

Parser is needed if TensorRT has to convert onnx, uff or caffe models into engine files.
Since you manually use APIs to build engine by yourself, maybe you don’t need to use parsers or update any code inside TensorRT/parsers

y14uc339 · June 8, 2020, 9:02am

Okay @ersheng ! Yeah I am manually using APIs. Also are you able to access the drive folder? Let me know if you need any help!

ersheng · June 8, 2020, 9:16am

@y14uc339
I sent an access request needing your approval

y14uc339 · June 8, 2020, 9:17am

@ersheng approved like 2 mins ago.

y14uc339 · June 8, 2020, 9:22am

@ersheng I made a small update to decode.cu in tensorrt folder and added this line at the end in the parent namespace of that file:

REGISTER_TENSORRT_PLUGIN(DecodePluginCreator);

And generated the libdecodeplugin.so again placed it inside deepstream app, And the error after this is mentioned here :

If this is of any help.

ersheng · June 9, 2020, 5:53am

@y14uc339

There is maybe another option that you could build your plugin alongside with other existing plugins under <TensorRT_OSS_root>/plugin/ so that your plugin will be added into libnvinfer_plugin.so so that you may prevent dependency missing problems.

Topic		Replies	Views
There is a error when run deepstream-mrcnn-app DeepStream SDK	60	3089	October 12, 2021
Cannot run deepstream-test-1 in deepstream_python_apps: Where is the ../../../../samples/ folder? DeepStream SDK	5	2276	October 12, 2021
ERROR: [TRT} stdArchiveReader ... Serialization assertion TAO Toolkit tensorrt	12	6589	September 25, 2022
How to run deepstream-app without building engine? DeepStream SDK	6	497	March 11, 2024
Is there anything that needs extra attention when using my own engine file? DeepStream SDK tensorrt , nvbugs	18	1350	July 20, 2021
Deepstream5.1 运行python test1时报错 DeepStream SDK	6	635	October 12, 2021
DeepStream 1.1.8 demo app deepstream-test1 failed DeepStream SDK cudnn , jetson , deepstream	7	21	December 30, 2024
Failed to run the example(deepstream_image_meta_test) DeepStream SDK	12	442	January 26, 2022
Run BACK-TO-BACK-DETECTORS REFERENCE APP under DeepStream SDK 5.0 DeepStream SDK	16	998	October 12, 2021
Could not open lib: /opt/nvidia/deepstream/deepstream-5.0/sources/apps/myapp/libdecodeplugin.so, error string: undefined symbol: getPluginRegistry DeepStream SDK	8	1479	October 12, 2021

Serialized engine contains plugin, but no plugin factory was provided

Related topics