Could not open lib: /opt/nvidia/deepstream/deepstream-5.0/sources/apps/myapp/libdecodeplugin.so, error string: undefined symbol: getPluginRegistry

y14uc339 · June 8, 2020, 7:54am

Please provide complete information as applicable to your setup.

• Hardware Platform (Jetson / GPU) → aws dGPU T4
• DeepStream Version → SDK 5.0
• TensorRT Version → 7
• NVIDIA GPU Driver Version (valid for GPU only) → 440.82

Created custom layers which are written into the library libdecodeplugin.so. The code for custom layers which use IPluginCreator to create plugins is below:

#include "decode.h"
#include "stdio.h"

namespace nvinfer1
{
    DecodePlugin::DecodePlugin()
    {
    }

    DecodePlugin::~DecodePlugin()
    {
    }

    // create the plugin at runtime from a byte stream
    DecodePlugin::DecodePlugin(const void* data, size_t length)
    {
    }

    void DecodePlugin::serialize(void* buffer) const
    {
    }

    size_t DecodePlugin::getSerializationSize() const
    {  
        return 0;
    }

    int DecodePlugin::initialize()
    { 
        return 0;
    }

    Dims DecodePlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
    {
        //output the result to channel
        int totalCount = 0;
        totalCount += decodeplugin::INPUT_H / 8 * decodeplugin::INPUT_W / 8 * 2 * sizeof(decodeplugin::Detection) / sizeof(float);
        totalCount += decodeplugin::INPUT_H / 16 * decodeplugin::INPUT_W / 16 * 2 * sizeof(decodeplugin::Detection) / sizeof(float);
        totalCount += decodeplugin::INPUT_H / 32 * decodeplugin::INPUT_W / 32 * 2 * sizeof(decodeplugin::Detection) / sizeof(float);

        return Dims3(totalCount + 1, 1, 1);
    }

    // Set plugin namespace
    void DecodePlugin::setPluginNamespace(const char* pluginNamespace)
    {
        mPluginNamespace = pluginNamespace;
    }

    const char* DecodePlugin::getPluginNamespace() const
    {
        return mPluginNamespace;
    }

    // Return the DataType of the plugin output at the requested index
    DataType DecodePlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
    {
        return DataType::kFLOAT;
    }

    // Return true if output tensor is broadcast across a batch.
    bool DecodePlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
    {
        return false;
    }

    // Return true if plugin can use input that is broadcast across batch without replication.
    bool DecodePlugin::canBroadcastInputAcrossBatch(int inputIndex) const
    {
        return false;
    }

    void DecodePlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
    {
    }

    // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
    void DecodePlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
    {
    }

    // Detach the plugin object from its execution context.
    void DecodePlugin::detachFromContext() {}

    const char* DecodePlugin::getPluginType() const
    {
        return "Decode_TRT";
    }

    const char* DecodePlugin::getPluginVersion() const
    {
        return "1";
    }

    void DecodePlugin::destroy()
    {
        delete this;
    }

    // Clone the plugin
    IPluginV2IOExt* DecodePlugin::clone() const
    {
        DecodePlugin *p = new DecodePlugin();
        p->setPluginNamespace(mPluginNamespace);
        return p;
    }

    __device__ float Logist(float data){ return 1./(1. + expf(-data)); };

    __global__ void CalDetection(const float *input, float *output, int num_elem, int step, int anchor) {
    
        int idx = threadIdx.x + blockDim.x * blockIdx.x;
        if (idx >= num_elem) return;

        int h = decodeplugin::INPUT_H / step;
        int w = decodeplugin::INPUT_W / step;
        int y = idx / w;
        int x = idx % w;
        const float *bbox_reg = &input[0];
        const float *cls_reg = &input[2 * 4 * num_elem];
        const float *lmk_reg = &input[2 * 4 * num_elem + 2 * 2 * num_elem];

        for (int k = 0; k < 2; ++k) {
            float conf1 = cls_reg[idx + k * num_elem * 2];
            float conf2 = cls_reg[idx + k * num_elem * 2 + num_elem];
            conf2 = expf(conf2) / (expf(conf1) + expf(conf2));
            if (conf2 <= 0.02) continue;

            float *res_count = output;
            int count = (int)atomicAdd(res_count, 1);
            char* data = (char *)res_count + sizeof(float) + count * sizeof(decodeplugin::Detection);
            decodeplugin::Detection* det = (decodeplugin::Detection*)(data);

            float prior[4];
            prior[0] = ((float)x + 0.5) / w;
            prior[1] = ((float)y + 0.5) / h;
            prior[2] = (float)anchor * (k + 1) / decodeplugin::INPUT_W;
            prior[3] = (float)anchor * (k + 1) / decodeplugin::INPUT_H;

            //Location
            det->bbox[0] = prior[0] + bbox_reg[idx + k * num_elem * 4] * 0.1 * prior[2];
            det->bbox[1] = prior[1] + bbox_reg[idx + k * num_elem * 4 + num_elem] * 0.1 * prior[3];
            det->bbox[2] = prior[2] * expf(bbox_reg[idx + k * num_elem * 4 + num_elem * 2] * 0.2);
            det->bbox[3] = prior[3] * expf(bbox_reg[idx + k * num_elem * 4 + num_elem * 3] * 0.2);
            det->bbox[0] -= det->bbox[2] / 2;
            det->bbox[1] -= det->bbox[3] / 2;
            det->bbox[2] += det->bbox[0];
            det->bbox[3] += det->bbox[1];
            det->bbox[0] *= decodeplugin::INPUT_W;
            det->bbox[1] *= decodeplugin::INPUT_H;
            det->bbox[2] *= decodeplugin::INPUT_W;
            det->bbox[3] *= decodeplugin::INPUT_H;
            det->class_confidence = conf2;
            for (int i = 0; i < 10; i += 2) {
                det->landmark[i] = prior[0] + lmk_reg[idx + k * num_elem * 10 + num_elem * i] * 0.1 * prior[2];
                det->landmark[i+1] = prior[1] + lmk_reg[idx + k * num_elem * 10 + num_elem * (i + 1)] * 0.1 * prior[3];
                det->landmark[i] *= decodeplugin::INPUT_W;
                det->landmark[i+1] *= decodeplugin::INPUT_H;
            }
        }
    }

    void DecodePlugin::forwardGpu(const float *const * inputs, float * output, cudaStream_t stream, int batchSize) 
    {
        int num_elem = 0;
        int base_step = 8;
        int base_anchor = 16;
        int thread_count;
        cudaMemset(output, 0, sizeof(float));
        for (unsigned int i = 0; i < 3; ++i)
        {
            num_elem = decodeplugin::INPUT_H / base_step * decodeplugin::INPUT_W / base_step;
            thread_count = (num_elem < thread_count_) ? num_elem : thread_count_;
            CalDetection<<< (num_elem + thread_count - 1) / thread_count, thread_count>>>
                (inputs[i], output, num_elem, base_step, base_anchor);
            base_step *= 2;
            base_anchor *= 4;
        }
    }

    int DecodePlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
    {
        //assert(batchSize == 1);
        //GPU
        //CUDA_CHECK(cudaStreamSynchronize(stream));
        forwardGpu((const float *const *)inputs,(float *)outputs[0],stream,batchSize);

        return 0;
    };

    PluginFieldCollection DecodePluginCreator::mFC{};
    std::vector<PluginField> DecodePluginCreator::mPluginAttributes;

    DecodePluginCreator::DecodePluginCreator()
    {
        mPluginAttributes.clear();

        mFC.nbFields = mPluginAttributes.size();
        mFC.fields = mPluginAttributes.data();
    }

    const char* DecodePluginCreator::getPluginName() const
    {
        return DECODE_PLUGIN_NAME;
    }

    const char* DecodePluginCreator::getPluginVersion() const
    {
        return DECODE_PLUGIN_VERSION;
    }

    const PluginFieldCollection* DecodePluginCreator::getFieldNames()
    {
        return &mFC;
    }

    IPluginV2IOExt* DecodePluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
    {
        DecodePlugin* obj = new DecodePlugin();
        obj->setPluginNamespace(mNamespace.c_str());
        return obj;
    }

    IPluginV2IOExt* DecodePluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
    {
        // This object will be deleted when the network is destroyed, which will
        // call PReluPlugin::destroy()
        DecodePlugin* obj = new DecodePlugin(serialData, serialLength);
        obj->setPluginNamespace(mNamespace.c_str());
        return obj;
    }
    REGISTER_TENSORRT_PLUGIN(DecodePluginCreator);
}

saved in .cu extension file. Now when I deserialize the engine file in NVIDIA tensorrt container works perfectly fine. But when I use the libdecodeplugin.so to deserialize the generated engine file it doesn’t seem to work in NVIDIA DeepStream container. Below is the terminal output:

Warn: 'threshold' parameter has been deprecated. Use 'pre-cluster-threshold' instead.
Now playing: ../../../samples/streams/sample_720p.mp4
ERROR: ../nvdsinfer/nvdsinfer_func_utils.cpp:60 Could not open lib: /opt/nvidia/deepstream/deepstream-5.0/sources/apps/deepstream-retinaface-multistream/libdecodeplugin.so, error string: /opt/nvidia/deepstream/deepstream-5.0/sources/apps/deepstream-retinaface-multistream/libdecodeplugin.so: undefined symbol: getPluginRegistry
0:00:00.986501706   101 0x557a92760ed0 ERROR                nvinfer gstnvinfer.cpp:596:gst_nvinfer_logger:<primary-nvinference-engine> NvDsInferContext[UID 1]: Error in NvDsInferContextImpl::initialize() <nvdsinfer_context_impl.cpp:1015> [UID = 1]: Could not open custom lib: (null)
0:00:00.986549778   101 0x557a92760ed0 WARN                 nvinfer gstnvinfer.cpp:781:gst_nvinfer_start:<primary-nvinference-engine> error: Failed to create NvDsInferContext instance
0:00:00.986563826   101 0x557a92760ed0 WARN                 nvinfer gstnvinfer.cpp:781:gst_nvinfer_start:<primary-nvinference-engine> error: Config file path: retinaface_pgie_config.txt, NvDsInfer Error: NVDSINFER_CUSTOM_LIB_FAILED
Running...
ERROR from element primary-nvinference-engine: Failed to create NvDsInferContext instance
Error details: gstnvinfer.cpp(781): gst_nvinfer_start (): /GstPipeline:dstest1-pipeline/GstNvInfer:primary-nvinference-engine:
Config file path: retinaface_pgie_config.txt, NvDsInfer Error: NVDSINFER_CUSTOM_LIB_FAILED
Returned, stopping playback
Deleting pipeline

I did not find any good resources for this issue. Thanks in advance!

bcao · June 8, 2020, 2:02pm

Which TensorRT container are you using?
Could you share the result of “ldd libdecodeplugin.so” in your TensorRT container?

y14uc339 · June 8, 2020, 2:22pm

Using this TensorRT container →
nvcr.io/nvidia/tensorrt:20.03-py3

@bcao Result of ldd libdecodeplugin.so in TensorRT container:

root@84f80a226000:/home/retinaface_tensorrt/build# ldd libdecodeplugin.so 
    linux-vdso.so.1 (0x00007fff9c9ea000)
    libcudart.so.10.2 => /usr/local/cuda/lib64/libcudart.so.10.2 (0x00007f4fdb949000)
    libstdc++.so.6 => /usr/lib/x86_64-linux-gnu/libstdc++.so.6 (0x00007f4fdb5c0000)
    libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x00007f4fdb3a8000)
    libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f4fdafb7000)
    /lib64/ld-linux-x86-64.so.2 (0x00007f4fdbdcf000)
    libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007f4fdadb3000)
    libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007f4fdab94000)
    librt.so.1 => /lib/x86_64-linux-gnu/librt.so.1 (0x00007f4fda98c000)
    libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007f4fda5ee000)

bcao · June 9, 2020, 2:21am

getPluginRegistry is defined in /usr/lib/x86_64-linux-gnu/libnvinfer.so.7 , I think you should link to the library.

ersheng · June 9, 2020, 5:36am

@y14uc339

Add an extra row to CMakeList.txt to link your libdecodeplugin.so with nvinfer:

target_link_libraries(decodeplugin nvinfer)
target_link_libraries(retina_50 nvinfer)
target_link_libraries(retina_50 cudart)
target_link_libraries(retina_50 decodeplugin)
target_link_libraries(retina_50 ${OpenCV_LIBRARIES})

And you’d better update cmake minimum to 3.13:

cmake_minimum_required(VERSION 3.13)

y14uc339 · June 9, 2020, 5:38am

Okay I’ll try this right away!

Topic		Replies	Views
Serialized engine contains plugin, but no plugin factory was provided DeepStream SDK	11	1721	October 12, 2021
Cannot find -ldecodeplugin TensorRT tensorrt	2	295	March 6, 2024
Failling in building sample from TLT-DEEPSTREAM TAO Toolkit	25	3851	October 12, 2021
Decodebin not picking NVIDIA decoder plugin on identical setup Computer Vision & Image Processing tensorrt , camera , cuda , ubuntu , gstreamer , deepstream	0	49	November 5, 2025
Error: Decodebin did not pick nvidia decoder plugin DeepStream SDK	10	878	February 29, 2024
Deepstream SDK 3 error DeepStream SDK	2	570	September 7, 2023
Deepstream-app: error while loading shared libraries: libnvinfer.so.6: cannot open shared object file + ubuntu 18.04 DeepStream SDK	27	8730	October 12, 2021
Error on running samples with deepstream-app DeepStream SDK	7	2236	October 12, 2021
No such element or plugin 'avdec_h264' in DeepStream 6.2 container DeepStream SDK	5	1872	February 23, 2023
Failed to load plugin '/usr/lib/x86_64-linux-gnu/gstreamer-1.0/deepstream/libnvdsgst_udp.so' DeepStream SDK gstreamer , deepstream61	4	7689	October 21, 2022

Could not open lib: /opt/nvidia/deepstream/deepstream-5.0/sources/apps/myapp/libdecodeplugin.so, error string: undefined symbol: getPluginRegistry

Related topics