Display facial landmarks, on the OSD

• Hardware Platform (GPU)
• DeepStream Version 5.0
• TensorRT Version 7.0
• NVIDIA GPU Driver Version (valid for GPU only)
• Issue Type( questions)

•Able to load and infer tensorrt retina face model, on Deepstream.
•Written a custom bbox parser that draws appropriate bbox on the face

Requirments

  1. Want to display the landmark points, on the OSD.
  2. Also want to use the (bbox and landmarks) -----> Affine-transformation----->intermediate output------> Secondary-GIE.

Here is the custom_bbox_parser file


#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include "nvdsinfer_custom_impl.h"
#include "gstnvdsmeta.h"

#define LNM_ARRAY_SIZE 11

float clamp(const float val, const float minVal, const float maxVal)
{
    assert(minVal <= maxVal);
    return std::min(maxVal, std::max(minVal, val));
}

extern "C" bool NvDsInferParseCustomRetinaFace(
    std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
    NvDsInferNetworkInfo const& networkInfo,
    NvDsInferParseDetectionParams const& detectionParams,
    std::vector<NvDsInferParseObjectInfo>& objectList);

void *set_metadata_ptr(std::array<float, LNM_ARRAY_SIZE> & arr)
{
    gfloat *user_metadata = (gfloat*)g_malloc0(10*sizeof(gfloat));

    for(int i = 1; i < 11; i++) {
       user_metadata[i] = arr[i];
    }
    return (void *)user_metadata;
}

static bool cmp(const NvDsInferParseObjectInfo & a, const NvDsInferParseObjectInfo & b) {
    return a.detectionConfidence > b.detectionConfidence;
}

static bool cmp1(const std::array<float, LNM_ARRAY_SIZE> &a, const std::array<float, LNM_ARRAY_SIZE> &b) {
    return a[0] > b[0];
}

static float iou( const NvDsInferParseObjectInfo & lbox, const NvDsInferParseObjectInfo & rbox) {
    float interBox[] = {
        // std::max(lbox[0], rbox[0]), //left
        // std::min(lbox[2], rbox[2]), //right
        // std::max(lbox[1], rbox[1]), //top
        // std::min(lbox[3], rbox[3]), //bottom

        std::max(lbox.left, rbox.left), //left
        std::min(lbox.width + lbox.left, rbox.width+ rbox.left), //right
        std::max(lbox.top, rbox.top), //top
        std::min(lbox.height +lbox.top, rbox.height + rbox.top), //bottom
    };

    if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
        return 0.0f;

    float interBoxS = (interBox[1] - interBox[0]) * (interBox[3] - interBox[2]);
    return interBoxS / ((lbox.width) * (lbox.height) + (rbox.width) * (rbox.height) -interBoxS + 0.000001f);
}



/* RetinaFace implementations */
static NvDsInferParseObjectInfo convertBBoxRetinaFace(const float& bx1, const float& by1, const float& bx2,
                                     const float& by2, const uint& netW, const uint& netH)
{
    NvDsInferParseObjectInfo b;
    // Restore coordinates to network input resolution

    //float x1 = bx1 * netW;
    //float y1 = by1 * netH;
    //float x2 = bx2 * netW;
    //float y2 = by2 * netH;

    

    float x1 = clamp(bx1, 0, netW);
    float y1 = clamp(by1, 0, netH);
    float x2 = clamp(bx2, 0, netW);
    float y2 = clamp(by2, 0, netH);

    b.left = x1;
    b.width = clamp(x2 - x1, 0, netW);
    b.top = y1;
    b.height = clamp(y2 - y1, 0, netH);
    return b;
}

static void addBBoxProposalRetinaFace(const float bx, const float by, const float bw, const float bh,
                     const uint& netW, const uint& netH, const int maxIndex,
                     const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo, std::array<float, LNM_ARRAY_SIZE>& lnm, 
                     std::vector<std::array<float, LNM_ARRAY_SIZE>>& i_lnm_vec)
{
    NvDsInferParseObjectInfo bbi = convertBBoxRetinaFace(bx, by, bw, bh, netW, netH);
    if (bbi.width < 1 || bbi.height < 1) return;
    bbi.detectionConfidence = maxProb;
    bbi.classId = maxIndex;
    //std::copy(std::begin(lnm), std::end(lnm), std::begin(bbi.landmarks));
    //bbi.landmarks = lnm;
    binfo.push_back(bbi);
    i_lnm_vec.push_back(lnm);
}

static std::vector<NvDsInferParseObjectInfo> decodeRetinaFaceTensor(
    const float* boxes,
    NvDsInferParseDetectionParams const& detectionParams,
    const uint& netW, const uint& netH)
{
    std::vector<NvDsInferParseObjectInfo> binfo;
    std::vector<NvDsInferParseObjectInfo> res;
    NvDsUserMetaList *obj_user_meta_list = NULL;
    std::array<float, LNM_ARRAY_SIZE> lnm;
    std::vector<std::array<float, LNM_ARRAY_SIZE>> i_lnm_vec;
    std::vector<std::array<float, LNM_ARRAY_SIZE>> f_lnm_vec;

    //std::cout << "Number of Detected boxes: " << boxes[0] << std::endl;
    for (uint b = 0; b < boxes[0]; b++){
        float maxProb = boxes[15 * b + 1 + 4];
        if (maxProb<=0.1) continue;
        
        //bbox
        float bx1 = boxes[15*b + 1];
        float by1 = boxes[15*b + 1 + 1];
        float bx2 = boxes[15*b + 1 + 2];
        float by2 = boxes[15*b + 1 + 3];
        
        //landmarks
        for (uint i = 0; i < LNM_ARRAY_SIZE; i++ )
            lnm[i]=boxes[15*b + 1 + 4 + i];

        int maxIndex = 0;
        if (maxProb > detectionParams.perClassPreclusterThreshold[maxIndex])
        {
            
            addBBoxProposalRetinaFace(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo, lnm, i_lnm_vec);
        }
    }

    
    std::sort(binfo.begin(), binfo.end(), cmp);
    std::sort(i_lnm_vec.begin(), i_lnm_vec.end(), cmp1);
    /*for ( size_t m = 0; m < binfo.size(); ++m )
    {
        NvDsInferParseObjectInfo item = binfo[m];
        std::array<float, LNM_ARRAY_SIZE> item1 = i_lnm_vec[m];
        assert( item.detectionConfidence == item1[0]);
        std::cout << "BBox Cords: " << item.left << " " << item.top << " " << (item.left+item.width) << " " << (item.top+item.height) << " " <<  item.detectionConfidence <<  std::endl;
        std::cout << "LNMs Cords: " << std::endl;
        for(auto i = item1.begin(); i != item1.end(); ++i)
        {
    	std::cout << *i << std::endl;
        }

    }*/
    assert( binfo.size() == i_lnm_vec.size());
    //std::cout << "Before IOU: " << binfo.size() << " " << i_lnm_vec.size() << std::endl;
    for (size_t m = 0; m < binfo.size(); ++m) {
        auto& item = binfo[m];
        auto& item1 = i_lnm_vec[m];
        res.push_back(item);
        f_lnm_vec.push_back(item1);
        for (size_t n = m + 1; n < binfo.size(); ++n) {
            if (iou(item, binfo[n]) > 0.4) { //nms threshold 0.4
                binfo.erase(binfo.begin()+n);
                i_lnm_vec.erase(i_lnm_vec.begin()+n);
                --n;
            }

        }
    }
    assert( res.size() == f_lnm_vec.size());
    //std::cout << "After IOU: " << res.size() << " " << f_lnm_vec.size() << std::endl;
    for ( size_t m = 0; m < f_lnm_vec.size(); ++m )
    {
        //NvDsInferParseObjectInfo item = binfo[m];
        std::array<float, LNM_ARRAY_SIZE> item1 = f_lnm_vec[m];
        NvDsUserMeta* um1;
        um1->user_meta_data = set_metadata_ptr(item1);
        //std::cout << "BBox Cords: " << item.left << " " << item.top << " " << (item.left+item.width) << " " << (item.top+item.height) << " " <<  item.detectionConfidence <<  std::endl;
        //std::cout << "LNMs Cords: " << std::endl;
        //for(auto i = item1.begin(); i != item1.end(); ++i)
        //{
    	//std::cout << *i << std::endl;
        //}
        obj_user_meta_list = g_list_append(obj_user_meta_list,um1);

    }

    
    return res;
}

extern "C" bool NvDsInferParseCustomRetinaFace(
    std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
    NvDsInferNetworkInfo const& networkInfo,
    NvDsInferParseDetectionParams const& detectionParams,
    std::vector<NvDsInferParseObjectInfo>& objectList)
{
    std::vector<NvDsInferParseObjectInfo> objects; 

    const NvDsInferLayerInfo &boxes = outputLayersInfo[0]; // num_boxes x 4

    // 3 dimensional: [num_boxes, 1, 4]
    assert(boxes.inferDims.numDims == 3);
 

    //std::cout << "Network Info: " << networkInfo.height << "  " << networkInfo.width << std::endl;

    std::vector<NvDsInferParseObjectInfo> outObjs =
        decodeRetinaFaceTensor(
            (const float*)(boxes.buffer), detectionParams,
            networkInfo.width, networkInfo.height);

    objects.insert(objects.end(), outObjs.begin(), outObjs.end());

    objectList = objects;

    return true;
}
/* RetinaFace implementations end*/


/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomRetinaFace);


1 Like

Hi @ronakchhatbar ,
You mean you are having issues in passing these landmarks further in the pipeline.
Could you share more details abnout this or a reproduce code?

Thanks!

@mchi,

Yes, we are having issues in passing the landmarks down the pipeline.

These are the steps we have followed:

  • Based on this repo we wrote the custom bbox function for Retinaface. We also added the NMS logic to filter the Bboxes. Below is the code for the custom bbox parse function:
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include "nvdsinfer_custom_impl.h"

#define LNM_ARRAY_SIZE 11

float clamp(const float val, const float minVal, const float maxVal)
{
    assert(minVal <= maxVal);
    return std::min(maxVal, std::max(minVal, val));
}

extern "C" bool NvDsInferParseCustomRetinaFace(
    std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
    NvDsInferNetworkInfo const& networkInfo,
    NvDsInferParseDetectionParams const& detectionParams,
    std::vector<NvDsInferParseObjectInfo>& objectList);


static bool cmp(const NvDsInferParseObjectInfo & a, const NvDsInferParseObjectInfo & b) {
    return a.detectionConfidence > b.detectionConfidence;
}

static bool cmp1(const std::array<float, LNM_ARRAY_SIZE> &a, const std::array<float, LNM_ARRAY_SIZE> &b) {
    return a[0] > b[0];
}

static float iou( const NvDsInferParseObjectInfo & lbox, const NvDsInferParseObjectInfo & rbox) {
    float interBox[] = {

        std::max(lbox.left, rbox.left), //left
        std::min(lbox.width + lbox.left, rbox.width+ rbox.left), //right
        std::max(lbox.top, rbox.top), //top
        std::min(lbox.height +lbox.top, rbox.height + rbox.top), //bottom
    };

    if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
        return 0.0f;

    float interBoxS = (interBox[1] - interBox[0]) * (interBox[3] - interBox[2]);
    return interBoxS / ((lbox.width) * (lbox.height) + (rbox.width) * (rbox.height) -interBoxS + 0.000001f);
}



/* RetinaFace implementations */
static NvDsInferParseObjectInfo convertBBoxRetinaFace(const float& bx1, const float& by1, const float& bx2,
                                     const float& by2, const uint& netW, const uint& netH)
{
    NvDsInferParseObjectInfo b;
    // Restore coordinates to network input resolution

    float x1 = clamp(bx1, 0, netW);
    float y1 = clamp(by1, 0, netH);
    float x2 = clamp(bx2, 0, netW);
    float y2 = clamp(by2, 0, netH);

    b.left = x1;
    b.width = clamp(x2 - x1, 0, netW);
    b.top = y1;
    b.height = clamp(y2 - y1, 0, netH);
    return b;
}

static void addBBoxProposalRetinaFace(const float bx, const float by, const float bw, const float bh,
                     const uint& netW, const uint& netH, const int maxIndex,
                     const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo, std::array<float, LNM_ARRAY_SIZE>& lnm, 
                     std::vector<std::array<float, LNM_ARRAY_SIZE>>& i_lnm_vec)
{
    NvDsInferParseObjectInfo bbi = convertBBoxRetinaFace(bx, by, bw, bh, netW, netH);
    if (bbi.width < 1 || bbi.height < 1) return;
    bbi.detectionConfidence = maxProb;
    bbi.classId = maxIndex;
    binfo.push_back(bbi);
    i_lnm_vec.push_back(lnm);
}

static std::vector<NvDsInferParseObjectInfo> decodeRetinaFaceTensor(
    const float* boxes,
    NvDsInferParseDetectionParams const& detectionParams,
    const uint& netW, const uint& netH)
{
    std::vector<NvDsInferParseObjectInfo> binfo;
    std::vector<NvDsInferParseObjectInfo> res;
    NvDsUserMetaList *obj_user_meta_list = NULL;
    std::array<float, LNM_ARRAY_SIZE> lnm;
    std::vector<std::array<float, LNM_ARRAY_SIZE>> i_lnm_vec;
    std::vector<std::array<float, LNM_ARRAY_SIZE>> f_lnm_vec;

  
    for (uint b = 0; b < boxes[0]; b++){
        float maxProb = boxes[15 * b + 1 + 4];
        if (maxProb<=0.1) continue;
        
        //bbox
        float bx1 = boxes[15*b + 1];
        float by1 = boxes[15*b + 1 + 1];
        float bx2 = boxes[15*b + 1 + 2];
        float by2 = boxes[15*b + 1 + 3];
        
        //landmarks
        for (uint i = 0; i < LNM_ARRAY_SIZE; i++ )
            lnm[i]=boxes[15*b + 1 + 4 + i];

        int maxIndex = 0;
        if (maxProb > detectionParams.perClassPreclusterThreshold[maxIndex])
        {
            
            addBBoxProposalRetinaFace(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo, lnm, i_lnm_vec);
        }
    }

    
    std::sort(binfo.begin(), binfo.end(), cmp);
    std::sort(i_lnm_vec.begin(), i_lnm_vec.end(), cmp1);
    assert( binfo.size() == i_lnm_vec.size());
    //std::cout << "Before IOU: " << binfo.size() << " " << i_lnm_vec.size() << std::endl;
    for (size_t m = 0; m < binfo.size(); ++m) {
        auto& item = binfo[m];
        auto& item1 = i_lnm_vec[m];
        res.push_back(item);
        f_lnm_vec.push_back(item1);
        for (size_t n = m + 1; n < binfo.size(); ++n) {
            if (iou(item, binfo[n]) > 0.4) { //nms threshold 0.4
                binfo.erase(binfo.begin()+n);
                i_lnm_vec.erase(i_lnm_vec.begin()+n);
                --n;
            }

        }
    }
    assert( res.size() == f_lnm_vec.size());
    return res;
}

extern "C" bool NvDsInferParseCustomRetinaFace(
    std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
    NvDsInferNetworkInfo const& networkInfo,
    NvDsInferParseDetectionParams const& detectionParams,
    std::vector<NvDsInferParseObjectInfo>& objectList)
{
    std::vector<NvDsInferParseObjectInfo> objects; 

    const NvDsInferLayerInfo &boxes = outputLayersInfo[0]; // num_boxes x 4

    // 3 dimensional: [num_boxes, 1, 4]
    assert(boxes.inferDims.numDims == 3);
 

    //std::cout << "Network Info: " << networkInfo.height << "  " << networkInfo.width << std::endl;

    std::vector<NvDsInferParseObjectInfo> outObjs =
        decodeRetinaFaceTensor(
            (const float*)(boxes.buffer), detectionParams,
            networkInfo.width, networkInfo.height);

    objects.insert(objects.end(), outObjs.begin(), outObjs.end());

    objectList = objects;

    return true;
}
/* RetinaFace implementations end*/


/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomRetinaFace);

  • We are able to pass the filtered bboxes further in the pipeline using the above function and visualize them in the OSD. The model config and the app config are attached for your reference.
    triton_config_retina.txt (2.6 KB) triton_deepstream_app_config_retina.txt (3.3 KB)

  • We want to also visualize the 5 Landmarks points for every bbox and which are stored in the
    f_lnm_vec vector for every frame. We are having issues in passing this vector further in the pipeline. We tried modifying the NvDsInferParseObjectInfo struct in nvdsinfer.h by adding a field for the landmarks. But, it gave SEG FAULTS and not advised as per this post.

  • We also tried passing the landmarks using the set_meta_ptr function which is adopted from the deepstream_user_metadata_app.c in sources/sample_apps. But, still getting SEG FAULTS.

{
    std::cout << "Malloc before" <<std::endl;
    gfloat *user_metadata = (gfloat*)g_malloc0(10*sizeof(gfloat));
    for(int i = 1; i < 11; i++) {
       user_metadata[i] = arr[i];
    }
    return (void *)user_metadata;
}

Function call:

for ( size_t m = 0; m < f_lnm_vec.size(); ++m )
    {
        std::array<float, LNM_ARRAY_SIZE> item1 = f_lnm_vec[m];
        NvDsUserMeta* um1;
        std::cout << "set m pointer" << std::endl;
        um1->user_meta_data = set_metadata_ptr(item1);
        obj_user_meta_list = g_list_append(obj_user_meta_list,um1);

    }

We have added the above code in custom bbox.
nvdsparsebbox_retina.cpp (6.8 KB)

Hello @mchi,

Is something more required from my end? If so, do let me know.

Thanks,
Ronak.

Hi @ronakchhatbar ,
Can you just share us a package? WIth this package, we can build, run and reproduce this issue.

Not sure how and where to apply the changes you mentioned above for the repo.

Hi, Ronakchhatbar
Run into error with your package, please see below, failed at loading model, i see it under directory, models/retinaface/1/model.plan, but from readme doc,
cp tensorrtx/retinaface/build/retina_50.trt cwd/models/retinaface/1/model.plan
do i use the right model file?
besides, from the readme,
Copy the libdecodeplugin.so file generated in tensorrtx in the current directory
i see you use deepstream-app, did you use standard or did some changes? if standard, the preload library should be not necessary.
thanks.

root@643292f8e640:/opt/nvidia/deepstream/deepstream-5.0/sources/DS_retina/DS_retina# LD_PRELOAD=libdecodeplugin.so deepstream-app -c triton_deepstream_app_config_retina.txt
ERROR: ld.so: object ‘libdecodeplugin.so’ from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.
2021-02-19 14:06:14.455132: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
I0219 14:06:16.900867 122 metrics.cc:164] found 2 GPUs supporting NVML metrics
I0219 14:06:16.906390 122 metrics.cc:173] GPU 0: Tesla T4
I0219 14:06:16.912214 122 metrics.cc:173] GPU 1: Tesla P4
I0219 14:06:16.912392 122 server.cc:120] Initializing Triton Inference Server
E0219 14:06:17.016108 122 server.cc:138] failed to enable peer access for some device pairs
I0219 14:06:17.023506 122 server_status.cc:55] New status tracking for model ‘retinaface’
I0219 14:06:17.023640 122 model_repository_manager.cc:680] loading: retinaface:1
E0219 14:06:17.026535 122 logging.cc:43] Parameter check failed at: runtime.cpp::deserializeCudaEngine::30, condition: (blob) != nullptr
E0219 14:06:17.026604 122 model_repository_manager.cc:840] failed to load ‘retinaface’ version 1: Internal: unable to create TensorRT engine
ERROR: infer_trtis_server.cpp:617 TRTIS: failed to load model retinaface, trtis_err_str:INTERNAL, err_msg:failed to load ‘retinaface’, versions that are not available: 1
ERROR: infer_trtis_backend.cpp:42 failed to load model: retinaface, nvinfer error:NVDSINFER_TRTIS_ERROR
ERROR: infer_trtis_backend.cpp:184 failed to initialize backend while ensuring model:retinaface ready, nvinfer error:NVDSINFER_TRTIS_ERROR
0:00:03.250426991 122 0x5630adf5bec0 ERROR nvinferserver gstnvinferserver.cpp:362:gst_nvinfer_server_logger:<primary_gie> nvinferserver[UID 1]: Error in createNNBackend() <infer_trtis_context.cpp:223> [UID = 1]: failed to initialize trtis backend for model:retinaface, nvinfer error:NVDSINFER_TRTIS_ERROR
I0219 14:06:17.027039 122 server.cc:179] Waiting for in-flight inferences to complete.
I0219 14:06:17.027062 122 server.cc:194] Timeout 30: Found 0 live models and 0 in-flight requests
0:00:03.250607910 122 0x5630adf5bec0 ERROR nvinferserver gstnvinferserver.cpp:362:gst_nvinfer_server_logger:<primary_gie> nvinferserver[UID 1]: Error in initialize() <infer_base_context.cpp:78> [UID = 1]: create nn-backend failed, check config file settings, nvinfer error:NVDSINFER_TRTIS_ERROR
0:00:03.250626609 122 0x5630adf5bec0 WARN nvinferserver gstnvinferserver_impl.cpp:439:start:<primary_gie> error: Failed to initialize InferTrtIsContext
0:00:03.250634506 122 0x5630adf5bec0 WARN nvinferserver gstnvinferserver_impl.cpp:439:start:<primary_gie> error: Config file path: /opt/nvidia/deepstream/deepstream-5.0/sources/DS_retina/DS_retina/triton_config_retina.txt
0:00:03.251217817 122 0x5630adf5bec0 WARN nvinferserver gstnvinferserver.cpp:460:gst_nvinfer_server_start:<primary_gie> error: gstnvinferserver_impl start failed
** ERROR: main:655: Failed to set pipeline to PAUSED
Quitting

@amycao

Did you generate the tensorrt engine and copy to 1/? That engine file has to be generated from tensorrtx/retinaface/ for the GPU you are using, and then to be copied.

The model file I have shared is just a place holder.
Also, the libdecodeplugin.so file has to be used, that is generated in the tensorrtx/retinaface/build/

Please give full path for libdecodeplugin.so file

LD_PRELOAD=/fullpath/libdecodeplugin.so deepstream-app -c triton_deepstream_app_config_retina.txt

Hi @ronakchhatbar ,
I followed tensorrtx/retinaface at master · wang-xinyu/tensorrtx · GitHub to generate retina_r50.engine (FP16 prevision) and libdecodeplugin.so under tensorrtx/retinaface/build/ , and put them under
DS_retina/libdecodeplugin.so
DS_retina/models/retinaface/1/model.plan ($ cp retina_r50.engine DS_retina/models/retinaface/1/model.plan)

then execute command below, it still failed.
I’m confused that, since you already have tensorrt engine, why do you run nvinferserver instead of nvinfer ?
Now, I’m trying to read your post processing code to understand the issue. But, could you share me the failure log since I can’t reproduce the issue on my side.

$ LD_PRELOAD=/root/DS_retina/libdecodeplugin.so deepstream-app -c triton_deepstream_app_config_retina.txt
LD_PRELOAD=/root/DS_retina/libdecodeplugin.so deepstream-app -c triton_deepstream_app_config_retina.txt
2021-02-23 04:45:00.645274: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
I0223 04:45:03.115607 2059 metrics.cc:164] found 2 GPUs supporting NVML metrics
I0223 04:45:03.121257 2059 metrics.cc:173] GPU 0: Tesla T4
I0223 04:45:03.127130 2059 metrics.cc:173] GPU 1: Tesla P4
I0223 04:45:03.127302 2059 server.cc:120] Initializing Triton Inference Server
E0223 04:45:03.233991 2059 server.cc:138] failed to enable peer access for some device pairs
I0223 04:45:03.244209 2059 server_status.cc:55] New status tracking for model ‘retinaface’
I0223 04:45:03.244318 2059 model_repository_manager.cc:680] loading: retinaface:1
I0223 04:45:05.425665 2059 plan_backend.cc:267] Creating instance retinaface_0_0_gpu0 on GPU 0 (7.5) using model.plan
W0223 04:45:05.429412 2059 logging.cc:46] Current optimization profile is: 0. Please ensure there are no enqueued operations pending in this context prior to switching profiles
I0223 04:45:05.429756 2059 plan_backend.cc:599] Created instance retinaface_0_0_gpu0 on GPU 0 with stream priority 0
E0223 04:45:07.457735 2059 logging.cc:43] INVALID_CONFIG: The engine plan file is generated on an incompatible device, expecting compute 6.1 got compute 7.5, please rebuild.
E0223 04:45:07.457788 2059 logging.cc:43] engine.cpp (1407) - Serialization Error in deserialize: 0 (Core engine deserialization failure)
E0223 04:45:07.457896 2059 logging.cc:43] INVALID_STATE: std::exception
E0223 04:45:07.457932 2059 logging.cc:43] INVALID_CONFIG: Deserialize the cuda engine failed.
E0223 04:45:08.198888 2059 model_repository_manager.cc:840] failed to load ‘retinaface’ version 1: Internal: unable to create TensorRT engine
ERROR: infer_trtis_server.cpp:617 TRTIS: failed to load model retinaface, trtis_err_str:INTERNAL, err_msg:failed to load ‘retinaface’, versions that are not available: 1
ERROR: infer_trtis_backend.cpp:42 failed to load model: retinaface, nvinfer error:NVDSINFER_TRTIS_ERROR
ERROR: infer_trtis_backend.cpp:184 failed to initialize backend while ensuring model:retinaface ready, nvinfer error:NVDSINFER_TRTIS_ERROR
0:00:08.197391100 2059 0x558f7f4b50c0 ERROR nvinferserver gstnvinferserver.cpp:362:gst_nvinfer_server_logger:<primary_gie> nvinferserver[UID 1]: Error in createNNBackend() <infer_trtis_context.cpp:223> [UID = 1]: failed to initialize trtis backend for model:retinaface, nvinfer error:NVDSINFER_TRTIS_ERROR
I0223 04:45:08.200177 2059 server.cc:179] Waiting for in-flight inferences to complete.
I0223 04:45:08.200229 2059 server.cc:194] Timeout 30: Found 0 live models and 0 in-flight requests
0:00:08.197798057 2059 0x558f7f4b50c0 ERROR nvinferserver gstnvinferserver.cpp:362:gst_nvinfer_server_logger:<primary_gie> nvinferserver[UID 1]: Error in initialize() <infer_base_context.cpp:78> [UID = 1]: create nn-backend failed, check config file settings, nvinfer error:NVDSINFER_TRTIS_ERROR
0:00:08.197836691 2059 0x558f7f4b50c0 WARN nvinferserver gstnvinferserver_impl.cpp:439:start:<primary_gie> error: Failed to initialize InferTrtIsContext
0:00:08.197852318 2059 0x558f7f4b50c0 WARN nvinferserver gstnvinferserver_impl.cpp:439:start:<primary_gie> error: Config file path: /root/DS_retina/triton_config_retina.txt
0:00:08.198915563 2059 0x558f7f4b50c0 WARN nvinferserver gstnvinferserver.cpp:460:gst_nvinfer_server_start:<primary_gie> error: gstnvinferserver_impl start failed
** ERROR: main:655: Failed to set pipeline to PAUSED
Quitting
ERROR from primary_gie: Failed to initialize InferTrtIsContext
Debug info: gstnvinferserver_impl.cpp(439): start (): /GstPipeline:pipeline/GstBin:primary_gie_bin/GstNvInferServer:primary_gie:
Config file path: /root/DS_retina/triton_config_retina.txt
ERROR from primary_gie: gstnvinferserver_impl start failed
Debug info: gstnvinferserver.cpp(460): gst_nvinfer_server_start (): /GstPipeline:pipeline/GstBin:primary_gie_bin/GstNvInferServer:primary_gie
App run failed

I’m confused that, since you already have tensorrt engine, why do you run nvinferserver instead of nvinfer ?

We were facing some issues on loading nvinfer so we went with nvinferserver.

Now, I’m trying to read your post processing code to understand the issue. But, could you share me the failure log since I can’t reproduce the issue on my side.

The error we are facing.

# LD_PRELOAD=//yolo/yolov4_deepstream/deepstream_yolov4/libdecodeplugin.so deepstream-app -c triton_deepstream_app_config_retina.txt
2021-02-23 07:32:51.811133: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
I0223 07:32:54.516730 790 metrics.cc:164] found 1 GPUs supporting NVML metrics
I0223 07:32:54.522272 790 metrics.cc:173]   GPU 0: Tesla P100-SXM2-16GB
I0223 07:32:54.522570 790 server.cc:120] Initializing Triton Inference Server
I0223 07:32:54.654536 790 server_status.cc:55] New status tracking for model 'retinaface'
I0223 07:32:54.654646 790 model_repository_manager.cc:680] loading: retinaface:1
I0223 07:32:56.353702 790 plan_backend.cc:267] Creating instance retinaface_0_0_gpu0 on GPU 0 (6.0) using model.plan
W0223 07:32:56.358840 790 logging.cc:46] Current optimization profile is: 0. Please ensure there are no enqueued operations pending in this context prior to switching profiles
I0223 07:32:56.359280 790 plan_backend.cc:599] Created instance retinaface_0_0_gpu0 on GPU 0 with stream priority 0
I0223 07:32:56.363486 790 model_repository_manager.cc:837] successfully loaded 'retinaface' version 1
INFO: infer_trtis_backend.cpp:206 TrtISBackend id:1 initialized model: retinaface

Runtime commands:
	h: Print this help
	q: Quit

	p: Pause
	r: Resume

** INFO: <bus_callback:181>: Pipeline ready

W0223 07:32:56.524954 790 metrics.cc:276] failed to get energy consumption for GPU 0, NVML_ERROR 3
** INFO: <bus_callback:167>: Pipeline running

After IOU: 4 4
got lnm data
set m pointer
Malloc before
Malloc aftre
Test
Segmentation fault (core dumped)

This is the error we are facing.

BTW, should we create our custom data structure, to pass our customized data, like in apps/sample_apps/deepstream-user-metadata-test?

@mchi, @amycao

Any luck?

Sorry for the delay, will look when free.