Run inference on video files using Mask_Rcnn

Dear @Morganh

I want to run inference on video using mask rcnn model trained using tao. My requirement is that I want to run mask rcnn model using Deepstream on video file otherwise we have created script which perform inference on image and that can be update for video easily.

To run this model on deepstream container I tried below approach.

1. With tao command

root@smarg:~/data/Pritam/Script/SEGMENTATION/DS_SEGMENTATION/MULTI_STREAM_SEGMENTATION/deepstream_tao_apps-release-tao3.0_ds6.1ga/apps/tao_segmentation# ./ds-tao-segmentation -c ../../../SCRIPT/BOX-SEGMENT-CONFIG-TAO.txt -i /root/data/Pritam/TAO/Model-Training/BOX-SEGMENTATION_V1.0/mask_rcnn/VIDEO/CONTAINER_1.h264 -b 1 -d 1

BOX-SEGMENT-CONFIG-TAO.txt

[property]
net-scale-factor=0.017507
offsets=123.675;116.28;103.53

gpu-id=0
net-scale-factor=1.0
model-color-format=0
tlt-model-key=
uff-file=/root/data/Pritam/Script/SEGMENTATION/MODELS/model.epoch-24.uff
model-engine-file=/root/data/Pritam/Script/SEGMENTATION/MODELS/model.epoch-24.uff_b1_gpu0_fp16.engine
infer-dims=3;640;640
uff-input-order=0
uff-input-blob-name=Input
batch-size=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=2
interval=0
gie-unique-id=1
network-type=3
#output-blob-names=generate_detections;mask_fcn_logits/BiasAdd
output-blob-names=generate_detections
output-tensor-meta=0
segmentation-threshold=0.0

#parse-bbox-func-name=NvDsInferParseCustomSSD
#custom-lib-path=nvdsinfer_custom_impl_ssd/libnvdsinfer_custom_impl_ssd.so
#scaling-filter=0
#scaling-compute-hw=0
parse-bbox-instance-mask-func-name=NvDsInferParseCustomMrcnnTLTV2

custom-lib-path=/opt/nvidia/deepstream/deepstream/lib/libnvds_infercustomparser.so


cluster-mode=4
output-instance-mask=1

[class-attrs-all]
pre-cluster-threshold=0.6

OUTPUT:

I am unable to see a single output on a video but the images of same video giving output using our mask_rcnn inference below is out of mask_rcnn inference

I also explored some other sample application but not getting any single detection using deepstream or deepstream-tao. Can u please suggest how to run infer on video using mask rcnn inside deepstream or deepstream-tao. Is there any sample application available then please suggest.

Thanks.

For deepstream running mask_rcnn model, please refer to https://docs.nvidia.com/tao/tao-toolkit/text/ds_tao/mrcnn_ds.html#deploying-to-deepstream-mrcnn.

Hi @Morganh

I tried and refer the same way as explained but unable to get output.

Below is the modified script.

import sys

sys.path.append('../')
import gi
import math
import cv2
import pyds
import numpy as np
import os.path
from os import path

gi.require_version('Gst', '1.0')
from gi.repository import GLib, Gst

from common.is_aarch_64 import is_aarch64
from common.bus_call import bus_call

MAX_DISPLAY_LEN = 64
MUXER_OUTPUT_WIDTH = 640
MUXER_OUTPUT_HEIGHT = 640
MUXER_BATCH_TIMEOUT_USEC = 4000000
TILED_OUTPUT_WIDTH = 640
TILED_OUTPUT_HEIGHT = 640
COLORS = [[128, 128, 64], [0, 0, 128], [0, 128, 128], [128, 0, 0],
          [128, 0, 128], [128, 128, 0], [0, 128, 0], [0, 0, 64],
          [0, 0, 192], [0, 128, 64], [0, 128, 192], [128, 0, 64],
          [128, 0, 192], [128, 128, 128]]

def map_mask_as_display_bgr(mask):
    m_list = list(set(mask.flatten()))
    shp = mask.shape
    bgr = np.zeros((shp[0], shp[1], 3))
    for idx in m_list:
        bgr[mask == idx] = COLORS[idx]
    return bgr

def seg_src_pad_buffer_probe(pad, info, u_data):
    gst_buffer = info.get_buffer()
    if not gst_buffer:
        print("Unable to get GstBuffer ")
        return

    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
    l_frame = batch_meta.frame_meta_list
    while l_frame is not None:
        try:
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break
        frame_number = frame_meta.frame_num
        l_user = frame_meta.frame_user_meta_list
        while l_user is not None:
            try:
                seg_user_meta = pyds.NvDsUserMeta.cast(l_user.data)
            except StopIteration:
                break
            if seg_user_meta and seg_user_meta.base_meta.meta_type == pyds.NVDSINFER_SEGMENTATION_META:
                try:
                    segmeta = pyds.NvDsInferSegmentationMeta.cast(seg_user_meta.user_meta_data)
                except StopIteration:
                    break
                masks = pyds.get_segmentation_masks(segmeta)
                masks = np.array(masks, copy=True, order='C')
                frame_image = map_mask_as_display_bgr(masks)
                cv2.imwrite(folder_name + "/" + str(frame_number) + ".jpg", frame_image)
            try:
                l_user = l_user.next
            except StopIteration:
                break
        try:
            l_frame = l_frame.next
        except StopIteration:
            break
    return Gst.PadProbeReturn.OK

def cb_newpad(decodebin, decoder_src_pad, data):
    print("In cb_newpad\n")
    caps = decoder_src_pad.get_current_caps()
    gststruct = caps.get_structure(0)
    gstname = gststruct.get_name()
    source_bin = data
    features = caps.get_features(0)

    if (gstname.find("video") != -1):
        if features.contains("memory:NVMM"):
            bin_ghost_pad = source_bin.get_static_pad("src")
            if not bin_ghost_pad.set_target(decoder_src_pad):
                sys.stderr.write("Failed to link decoder src pad to source bin ghost pad\n")
        else:
            sys.stderr.write("Error: Decodebin did not pick nvidia decoder plugin.\n")

def decodebin_child_added(child_proxy, Object, name, user_data):
    print("Decodebin child added:", name, "\n")
    if name.find("decodebin") != -1:
        Object.connect("child-added", decodebin_child_added, user_data)

    if not is_aarch64() and name.find("nvv4l2decoder") != -1:
        Object.set_property("cudadec-memtype", 2)

def create_source_bin(index, uri):
    print("Creating source bin")
    bin_name = "source-bin-%02d" % index
    print(bin_name)
    nbin = Gst.Bin.new(bin_name)
    if not nbin:
        sys.stderr.write("Unable to create source bin \n")

    uri_decode_bin = Gst.ElementFactory.make("uridecodebin", "uri-decode-bin")
    if not uri_decode_bin:
        sys.stderr.write("Unable to create uri decode bin \n")
    uri_decode_bin.set_property("uri", uri)

    uri_decode_bin.connect("pad-added", cb_newpad, nbin)
    uri_decode_bin.connect("child-added", decodebin_child_added, nbin)

    Gst.Bin.add(nbin, uri_decode_bin)
    bin_pad = nbin.add_pad(Gst.GhostPad.new_no_target("src", Gst.PadDirection.SRC))
    if not bin_pad:
        sys.stderr.write("Failed to add ghost pad in source bin \n")
        return None
    return nbin

def main(args):
    if len(args) < 4:
        sys.stderr.write("usage: %s config_file <rtsp or video file> <path to save seg images>\n" % args[0])
        sys.exit(1)

    global folder_name
    folder_name = args[-1]
    if path.exists(folder_name):
        sys.stderr.write("The output folder %s already exists. Please remove it first.\n" % folder_name)
        sys.exit(1)

    os.mkdir(folder_name)

    config_file = args[1]
    num_sources = len(args) - 3
    Gst.init(None)

    pipeline = Gst.Pipeline()

    if not pipeline:
        sys.stderr.write("Unable to create Pipeline \n")

    streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
    
    if not streammux:
        sys.stderr.write("Unable to create NvStreamMux \n")

    streammux.set_property('width', MUXER_OUTPUT_WIDTH)
    streammux.set_property('height', MUXER_OUTPUT_HEIGHT)
    streammux.set_property('batch-size', num_sources)
    streammux.set_property('batched-push-timeout', MUXER_BATCH_TIMEOUT_USEC)

    pipeline.add(streammux)

    # Create source bins for each stream
    for i in range(num_sources):
        uri_name = args[i + 2]
        source_bin = create_source_bin(i, uri_name)
        if not source_bin:
            sys.stderr.write("Unable to create source bin \n")
        pipeline.add(source_bin)
        padname = "sink_%u" % i
        sinkpad = streammux.get_request_pad(padname)
        if not sinkpad:
            sys.stderr.write("Unable to create sink pad bin \n")
        srcpad = source_bin.get_static_pad("src")
        if not srcpad:
            sys.stderr.write("Unable to create src pad bin \n")
        srcpad.link(sinkpad)

    seg = Gst.ElementFactory.make("nvinfer", "primary-nvinference-engine")
    if not seg:
        sys.stderr.write("Unable to create primary inference engine\n")
    
    nvsegvisual = Gst.ElementFactory.make("nvsegvisual", "nvsegvisual")
    if not nvsegvisual:
        sys.stderr.write("Unable to create nvsegvisual\n")

    sink = Gst.ElementFactory.make("nveglglessink", "nvvideo-renderer")
    if not sink:
        sys.stderr.write("Unable to create egl sink \n")

    seg.set_property('config-file-path', config_file)
    seg.set_property("batch-size", num_sources)
    nvsegvisual.set_property('batch-size', num_sources)
    nvsegvisual.set_property('width', 640)
    nvsegvisual.set_property('height', 640)
    sink.set_property("qos", 0)

    pipeline.add(seg)
    pipeline.add(nvsegvisual)
    pipeline.add(sink)

    streammux.link(seg)
    seg.link(nvsegvisual)
    nvsegvisual.link(sink)

    loop = GLib.MainLoop()
    bus = pipeline.get_bus()
    bus.add_signal_watch()
    bus.connect("message", bus_call, loop)

    # Start playback
    pipeline.set_state(Gst.State.PLAYING)
    try:
        loop.run()
    except:
        pass

    pipeline.set_state(Gst.State.NULL)

if __name__ == '__main__':
    sys.exit(main(sys.argv))


Config:

################################################################################
# SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

# Following properties are mandatory when engine files are not specified:
#   int8-calib-file(Only in INT8), model-file-format
#   Caffemodel mandatory properties: model-file, proto-file, output-blob-names
#   UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names
#   ONNX: onnx-file
#
# Mandatory properties for detectors:
#   num-detected-classes
#
# Optional properties for detectors:
#   cluster-mode(Default=Group Rectangles), interval(Primary mode only, Default=0)
#   custom-lib-path,
#   parse-bbox-func-name
#
# Mandatory properties for classifiers:
#   classifier-threshold, is-classifier
#
# Optional properties for classifiers:
#   classifier-async-mode(Secondary mode only, Default=false)
#
# Optional properties in secondary mode:
#   operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes),
#   input-object-min-width, input-object-min-height, input-object-max-width,
#   input-object-max-height
#
# Following properties are always recommended:
#   batch-size(Default=1)
#
# Other optional properties:
#   net-scale-factor(Default=1), network-mode(Default=0 i.e FP32),
#   model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path,
#   mean-file, gie-unique-id(Default=0), offsets, process-mode (Default=1 i.e. primary),
#   custom-lib-path, network-mode(Default=0 i.e FP32)
#
# The values in the config file are overridden by values set through GObject
# properties.

[property]
net-scale-factor=0.017507
offsets=123.675;116.28;103.53

gpu-id=0
net-scale-factor=1.0
model-color-format=0
uff-file=/root/data/Pritam/Script/SEGMENTATION/MODELS/model.epoch-24.uff
model-engine-file=/root/data/Pritam/Script/SEGMENTATION/MODELS/model.epoch-24.uff_b1_gpu0_fp16.engine
infer-dims=3;640;640
uff-input-order=0
uff-input-blob-name=Input
batch-size=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=2
interval=0
gie-unique-id=1
network-type=3
output-blob-names=generate_detections;mask_fcn_logits/BiasAdd
output-tensor-meta=0
segmentation-threshold=0.0

#parse-bbox-func-name=NvDsInferParseCustomSSD
#custom-lib-path=nvdsinfer_custom_impl_ssd/libnvdsinfer_custom_impl_ssd.so
#scaling-filter=0
#scaling-compute-hw=0
parse-bbox-instance-mask-func-name=NvDsInferParseCustomMrcnnTLTV2

custom-lib-path=/opt/nvidia/deepstream/deepstream-7.0/lib/libnvds_infercustomparser.so


cluster-mode=4
output-instance-mask=1

[class-attrs-all]
roi-top-offset=0
roi-bottom-offset=0
pre-cluster-threshold=0.2
detected-min-w=0
detected-min-h=0
detected-max-w=0
detected-max-h=0



It is not giving b-box or segmentation.

This is related to deepstream. Could you please create a topic to deepstream forum for better help?

Yes I had created but didn’t get clarity.

Please track with that topic since the latest error is related to deepstream.