Modify NvDsInferObjectDetectionInfo for get mask from Mask RCNN

Hello,

I am trying to get mask from Mask RCNN, I modified NvDsInferObjectDetectionInfo structure in the nvsdinfer.h file. I also changed something in the nvdsinfer_custombboxparser_mrcnn_uff.cpp file. I created successfully .so file, but when I run on Deepstream, it’s fail. I think my nvdsinfer_custombboxparser_mrcnn_uff.cpp file have some problem. How can I fix that?

nvdsinfer_custombboxparser_mrcnn_uff.cpp

#include <cstring>
#include <iostream>
#include <vector>

#include <fstream>

#include "nvdsinfer_custom_impl.h"
#include <cassert>

#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define CLIP(a,min,max) (MAX(MIN(a, max), min))
#define DIVIDE_AND_ROUND_UP(a, b) ((a + b - 1) / b)

using namespace std;

// Max number of final detections
static const int DETECTION_MAX_INSTANCES = 100;

// Number of classification classes (including background)
static const int NUM_CLASSES = 1 + 1; // COCO has 80 classes

static const int MASK_POOL_SIZE = 14;
static const nvinfer1::DimsCHW INPUT_SHAPE{3, 1024, 1024};
//static const Dims2 MODEL_DETECTION_SHAPE{DETECTION_MAX_INSTANCES, 6};
//static const Dims4 MODEL_MASK_SHAPE{DETECTION_MAX_INSTANCES, NUM_CLASSES, 28, 28};

struct MRCNNBBox {
    float x1, y1, x2, y2;
};

struct MRCNNMask {
    float raw[MASK_POOL_SIZE * 2 * MASK_POOL_SIZE * 2];
};

struct MRCNNBBoxInfo {
    MRCNNBBox box;
    int label = -1;
    float prob = 0.0f;

    MRCNNMask* mask = nullptr;
};

struct RawDetection {
    float y1, x1, y2, x2, class_id, score;
};


/* This is a sample bounding box parsing function for the sample mask rcnn
 *
 * detector model provided with the SDK. */

/* C-linkage to prevent name-mangling */
extern "C"
bool NvDsInferParseCustomMrcnnUff (
    std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
    NvDsInferNetworkInfo  const &networkInfo,
    NvDsInferParseDetectionParams const &detectionParams,
    std::vector<NvDsInferObjectDetectionInfo> &objectList);

//static int64_t volume(const nvinfer1::Dims& d) {
//    return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
//}

std::vector<MRCNNBBoxInfo> decodeOutput(void* detectionsHost, void* masksHost) {
    int input_dim_h = INPUT_SHAPE.d[1];
    int input_dim_w = INPUT_SHAPE.d[2];
    assert(input_dim_h == input_dim_w);

    std::vector<MRCNNBBoxInfo> binfo;

    //int detectionOffset = volume(MODEL_DETECTION_SHAPE); // (100,6)
    //int maskOffset = volume(MODEL_MASK_SHAPE);           // (100, 81, 28, 28)

    RawDetection* detections = reinterpret_cast<RawDetection*>((float*) detectionsHost);
    MRCNNMask* masks = reinterpret_cast<MRCNNMask*>((float*) masksHost);

    // ofstream myfile;
    // myfile.open("/home/nvidia/Desktop/mask.txt");

    for (int det_id = 0; det_id < DETECTION_MAX_INSTANCES; det_id++) {
        RawDetection cur_det = detections[det_id];
        MRCNNMask cur_mask = masks[det_id];

        int label = (int) cur_det.class_id;
        if (label <= 0)
            continue;
        
        MRCNNBBoxInfo det;
        det.label = label;
        det.prob = cur_det.score;

        det.box.x1 = cur_det.x1 ;
        det.box.y1 = cur_det.y1 ;
        det.box.x2 = cur_det.x2 ;
        det.box.y2 = cur_det.y2 ;

        if (det.box.x2 <= det.box.x1 || det.box.y2 <= det.box.y1)
            continue;

        // det.mask = masks + det_id * NUM_CLASSES + label;

        // for(int raw_id=0; raw_id < MASK_POOL_SIZE * 2 * MASK_POOL_SIZE * 2; raw_id++){
        //     cout << "mask " << cur_mask.raw[raw_id] << endl;
        //     myfile << cur_mask.raw[raw_id] << " ";
        // }
        det.mask = &cur_mask;
        binfo.push_back(det);

    }

    return binfo;
}

extern "C"
bool NvDsInferParseCustomMrcnnUff (std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
                                   NvDsInferNetworkInfo  const &networkInfo,
                                   NvDsInferParseDetectionParams const &detectionParams,
                                   std::vector<NvDsInferObjectDetectionInfo> &objectList) {

    static int detIndex = -1;
    static int maskIndex = -1;

    /* Find the detection layer */
    if (detIndex == -1) {
        for (unsigned int i = 0; i < outputLayersInfo.size(); i++) {
            if (strcmp(outputLayersInfo[i].layerName, "mrcnn_detection") == 0) {
                detIndex = i;
                break;
            }
        }
        if (detIndex == -1) {
            std::cerr << "Could not find detection layer buffer while parsing" << std::endl;
            return false;
        }
    }

    /* Find the mask layer */
    if (maskIndex == -1) {
        for (unsigned int i = 0; i < outputLayersInfo.size(); i++) {
            if (strcmp(outputLayersInfo[i].layerName, "mrcnn_mask/Sigmoid") == 0) {
                maskIndex = i;
                break;
            }
        }
        if (maskIndex == -1) {
            std::cerr << "Could not find mask layer buffer while parsing" << std::endl;
            return false;
        }
    }

    float* out_det = (float *) outputLayersInfo[detIndex].buffer;
    float* out_mask = (float *) outputLayersInfo[maskIndex].buffer;

    std::vector<MRCNNBBoxInfo> binfo = decodeOutput(out_det, out_mask);

    for (unsigned int roi_id = 0; roi_id < binfo.size(); roi_id++) {
        NvDsInferObjectDetectionInfo object;
        object.classId = binfo[roi_id].label;
        object.detectionConfidence = binfo[roi_id].prob;

        /* Clip object box co-ordinates to network resolution */
        object.left = CLIP(binfo[roi_id].box.x1 * networkInfo.width, 0, networkInfo.width - 1);
        object.top = CLIP(binfo[roi_id].box.y1 * networkInfo.height, 0, networkInfo.height - 1);
        object.width = CLIP((binfo[roi_id].box.x2 - binfo[roi_id].box.x1) * networkInfo.width, 0, networkInfo.width - 1);
        object.height = CLIP((binfo[roi_id].box.y2 - binfo[roi_id].box.y1) * networkInfo.height, 0, networkInfo.height - 1);

        for(int raw_id=0; raw_id < MASK_POOL_SIZE * 2 * MASK_POOL_SIZE * 2; raw_id++){
            object.mask_cus[raw_id] = binfo[roi_id].mask->raw[raw_id];

        objectList.push_back(object);
        
        }
    }

    return true;
}

/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomMrcnnUff);

Thanks !

Hi
Can you refer to this sample, in case it helps:

Thanks

Hi SunilJB,

Thanks for your help! But do you have any solution for show mask of object on DeepStream without dump mask to ppm file?

Thanks

May be this sample will help in that case:

Thanks

Hi SunilJB,

Yes. I modified code in your link, which is “nvdsinfer_custombboxparser_mrcnn_uff.cpp” file I attached in first comment. I try to put mask to object in here:

for(int raw_id=0; raw_id < MASK_POOL_SIZE * 2 * MASK_POOL_SIZE * 2; raw_id++){
            object.mask_cus[raw_id] = binfo[roi_id].mask->raw[raw_id];

        objectList.push_back(object);

But maybe something wrong because when I run on DeepStream, it’s fail .

Thanks

NvDsInferObjectDetectionInfo struct doesn’t have any “mask_cus” field.
https://docs.nvidia.com/metropolis/deepstream/4.0/dev-guide/DeepStream_Development_Guide/baggage/structNvDsInferObjectDetectionInfo.html

Thanks

Hi SunilJB,

I modified nvdsinfer.h and update the deepstream app which is located at /usr/bin like here

Thanks

Request you to raise issue in Deepstream SDK forum:
https://forums.developer.nvidia.com/c/accelerated-computing/intelligent-video-analytics/deepstream-sdk/15

Thanks

Thanks you.