Create Custom Bounding boxes

I’m creating an alarm system, that has an “detection area” to prevent many alarms fire by the same event.

All working fine, i will like to show that “detection area” in the output stream video, for testing and understanding better.

Is possible to do this?, im working with the python deepstream-imagedata-multistream example as a code base.

Im ususing Deepstream SDK 5.0

Thanks!

Would you mind to share your setup?

Hi janduano,

Is this still an issue to support? Could you share your setup?

I’m not sure what you refer with setup, but i’m using a Jeston Nano, with Deepstream SDK 5.0

The code is this:

import sys
sys.path.append('../')
import gi
import configparser
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst
from gi.repository import GLib
from ctypes import *
import time
import sys
import math
import platform
from common.is_aarch_64 import is_aarch64
from common.bus_call import bus_call
from common.FPS import GETFPS
from datetime import datetime
import numpy as np
import pyds
import cv2
import os
import os.path
from os import path
from shapely.geometry import Polygon
from pprint import pprint

from datetime import datetime

import pymongo
from pymongo import MongoClient

import logging
import sys

import json


fps_streams={}
frame_count={}
saved_count={}
global PGIE_CLASS_ID_VEHICLE
global PGIE_CLASS_ID_PERSON

MAX_DISPLAY_LEN = 64
PGIE_CLASS_ID_VEHICLE = 0
PGIE_CLASS_ID_BICYCLE = 1
PGIE_CLASS_ID_PERSON = 2
PGIE_CLASS_ID_ROADSIGN = 3
MUXER_OUTPUT_WIDTH = 640
MUXER_OUTPUT_HEIGHT = 480
MUXER_BATCH_TIMEOUT_USEC = 4000000
TILED_OUTPUT_WIDTH = 640
TILED_OUTPUT_HEIGHT = 480
GST_CAPS_FEATURES_NVMM = "memory:NVMM"
pgie_classes_str = ["Vehicle", "TwoWheeler", "Person", "RoadSign"]

OUTPUT_STREAM = False

CLIENT_DB = None
mongo_url = "localhost"
mongo_port = 27017
LIFE_DETECTION_AREA = 5*60
MAX_TIME_DETECTION_AREA = 8*60
FPS = 12
CONFIDENCE_SAVE_IMAGE = 0.7
CONFIDENCE_VALIDATE_SAVE_IMAGE = 0.3
COF_INTERACTION_MIN_AREA = 0.6
FACTOR_DETECTION_AREA = 0.8
TAKE_IMAGE = False
START_TIME = datetime.now().strftime("%H:%M:%S")

SOURCES_RTSP = [
    {
        'id': '26_plaza_park',
        'name': "26-Plaza Park ",
        'rtsp': "...",
        'area': [(1112, 49), (886, 983), (1693, 572), (1422, 42)],
        'cant_alarms': 0,
        'last_alarm': -800,
        'last_detection': -800,
        'alarms': []
    },
    {
        'id': '36_va_palermo',
        'name': "32-Ve Palermo ",
        'rtsp': "...",
        'area': [(141, 1062), (1032, 96), (1190, 178), (1417, 1067)],
        'cant_alarms': 0,
        'last_alarm': -800,
        'last_detection': -800,
        'alarms': []
    },
    {
        'id': '76_gariocho',
        'name': "76 - Gariocho ",
        'rtsp': "...",
        'area': [(1343, 39), (1372, 193), (1048, 1078), (675, 576)],
        'cant_alarms': 0,
        'last_alarm': -800,
        'last_detection': -800,
        'alarms': []
    },
    {
        'id': '112_alta_corte',
        'name': "112-Alta Corte",
        'rtsp': "...",
        'area': [(534, 1078), (865, 1078), (1599, 415), (1730, 6), (200, 649)],
        'cant_alarms': 0,
        'last_alarm': -800,
        'last_detection': -800,
        'alarms': []
    }
]


# tiler_sink_pad_buffer_probe  will extract metadata received on tiler src pad
# and update params for drawing rectangle, object information etc.

def tiler_sink_pad_buffer_probe(pad, info, u_data):
    frame_number = 0
    num_rects = 0
    gst_buffer = info.get_buffer()
    if not gst_buffer:
        print("Unable to get GstBuffer ")
        return

    client = MongoClient(mongo_url, mongo_port)
    mongo_db = client.local


    # Retrieve batch metadata from the gst_buffer
    # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
    # C address of gst_buffer as input, which is obtained with hash(gst_buffer)
    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))

    l_frame = batch_meta.frame_meta_list
    while l_frame is not None:
        try:
            # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
            # The casting is done by pyds.NvDsFrameMeta.cast()
            # The casting also keeps ownership of the underlying memory
            # in the C code, so the Python garbage collector will leave
            # it alone.
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break

        frame_number = frame_meta.frame_num
        l_obj = frame_meta.obj_meta_list
        num_rects = frame_meta.num_obj_meta
        is_first_obj = True
        save_image = False
        obj_counter = {
            PGIE_CLASS_ID_VEHICLE: 0,
            PGIE_CLASS_ID_PERSON: 0,
            PGIE_CLASS_ID_BICYCLE: 0,
            PGIE_CLASS_ID_ROADSIGN: 0
        }

        while l_obj is not None:
            try:
                # Casting l_obj.data to pyds.NvDsObjectMeta
                obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
            except StopIteration:
                break
            obj_counter[obj_meta.class_id] += 1


            if (fire_alarm(mongo_db, obj_meta, frame_meta, frame_number)):
                save_detection_image(obj_meta, gst_buffer, frame_meta, frame_number)

            try:
                l_obj = l_obj.next
            except StopIteration:
                break

            fps_streams["stream{0}".format(frame_meta.pad_index)].get_fps()

            if (is_first_obj and frame_number % (12*30) == 0):
                is_first_obj = False
                create_sumery(mongo_db)

        try:
            l_frame = l_frame.next
        except StopIteration:
            break

    return Gst.PadProbeReturn.OK

def draw_bounding_boxes(image,obj_meta,confidence):
    confidence='{0:.2f}'.format(confidence)
    rect_params=obj_meta.rect_params
    top=int(rect_params.top)
    left=int(rect_params.left)
    width=int(rect_params.width)
    height=int(rect_params.height)
    obj_name=pgie_classes_str[obj_meta.class_id]
    image=cv2.rectangle(image,(left,top),(left+width,top+height),(0,0,255,0),2)
    xy = "("+str(top)+","+str(left)+")"+"("+str(width)+","+str(height)+")"
    # Note that on some systems cv2.putText erroneously draws horizontal lines across the image
    image=cv2.putText(image,obj_name+',C='+str(confidence)+"- xy="+xy,(left-10,top-10),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255,0),2)
    return image


def cb_newpad(decodebin, decoder_src_pad,data):
    print("In cb_newpad\n")
    caps=decoder_src_pad.get_current_caps()
    gststruct=caps.get_structure(0)
    gstname=gststruct.get_name()
    source_bin=data
    features=caps.get_features(0)

    # Need to check if the pad created by the decodebin is for video and not
    # audio.
    if(gstname.find("video")!=-1):
        # Link the decodebin pad only if decodebin has picked nvidia
        # decoder plugin nvdec_*. We do this by checking if the pad caps contain
        # NVMM memory features.
        if features.contains("memory:NVMM"):
            # Get the source bin ghost pad
            bin_ghost_pad=source_bin.get_static_pad("src")
            if not bin_ghost_pad.set_target(decoder_src_pad):
                sys.stderr.write("Failed to link decoder src pad to source bin ghost pad\n")
        else:
            sys.stderr.write(" Error: Decodebin did not pick nvidia decoder plugin.\n")

def decodebin_child_added(child_proxy,Object,name,user_data):
    print("Decodebin child added:", name, "\n")
    if(name.find("decodebin") != -1):
        Object.connect("child-added",decodebin_child_added,user_data)
    if(is_aarch64() and name.find("nvv4l2decoder") != -1):
        print("Seting bufapi_version\n")
        Object.set_property("bufapi-version",True)

def create_source_bin(index, uri):
    print("Creating source bin")

    # Create a source GstBin to abstract this bin's content from the rest of the
    # pipeline
    bin_name = "source-bin-%02d" % index

    nbin = Gst.Bin.new(bin_name)
    if not nbin:
        sys.stderr.write(" Unable to create source bin \n")

    # Source element for reading from the uri.
    # We will use decodebin and let it figure out the container format of the
    # stream and the codec and plug the appropriate demux and decode plugins.
    uri_decode_bin = Gst.ElementFactory.make("uridecodebin", "uri-decode-bin")
    if not uri_decode_bin:
        sys.stderr.write(" Unable to create uri decode bin \n")
    # We set the input uri to the source element
    uri_decode_bin.set_property("uri", uri)
    # Connect to the "pad-added" signal of the decodebin which generates a
    # callback once a new pad for raw data has beed created by the decodebin
    uri_decode_bin.connect("pad-added", cb_newpad, nbin)
    uri_decode_bin.connect("child-added", decodebin_child_added, nbin)

    # We need to create a ghost pad for the source bin which will act as a proxy
    # for the video decoder src pad. The ghost pad will not have a target right
    # now. Once the decode bin creates the video decoder and generates the
    # cb_newpad callback, we will set the ghost pad target to the video decoder
    # src pad.
    Gst.Bin.add(nbin, uri_decode_bin)
    bin_pad = nbin.add_pad(Gst.GhostPad.new_no_target("src", Gst.PadDirection.SRC))
    if not bin_pad:
        sys.stderr.write(" Failed to add ghost pad in source bin \n")
        return None
    return nbin


def create_sumery(mongo_db):

    camaras = []
    print("\n/--------  SOURCES_RTSP   ", START_TIME, ' | ', datetime.now().strftime("%H:%M:%S"), ' -------/')
    print('Camara          |  Nro Alarms | Cant Alarms | Last Alarm |  Last Detection')
    for stream in SOURCES_RTSP:
        print("------------------------------------------------------------------------")
        print(stream['name'], ' |    ', stream['cant_alarms'], '     |    ', stream['cant_alarms'], '    |    ', 
              round(stream['last_alarm'] / FPS, 1), '   |   ', round(stream['last_detection'] / FPS, 1))
        print(stream['alarms'])
        camaras.append(
            {
                'camara': stream['name'],
                'cant_alarms': stream['cant_alarms'],
                'last_alarm': round(stream['last_alarm'] / FPS, 1),
                'last_detection': round(stream['last_detection'] / FPS, 1),
                'alarms': stream['alarms'],
            }
        )
    print("_________________________________________________________________________________________")
    data = {
        "start_time": START_TIME,
        "camaras": camaras,
        'timestamp': datetime.now()
    }
    mongo_db.alarms_sumery.insert_one(data)

    return data

def fire_alarm(mongo_db, obj_meta, frame_meta, frame_number):

    stream_id = frame_meta.pad_index

    detection = json_meta_info(obj_meta, obj_meta.confidence, str(stream_id))

    if (detection['obj_name'] != 'Person'):
        return False

    need_to_fire_alarm = validation_fire_alarm2(detection, stream_id, frame_number)

    if (need_to_fire_alarm):
        do_fire_alarm(mongo_db, detection, stream_id, frame_number)

        return True

    return False

def validation_fire_alarm2(detection, stream_id, frame_number):
    source = SOURCES_RTSP[stream_id]

    if not validate_detection_area(detection, stream_id):
        return False

    if source['last_detection'] + (LIFE_DETECTION_AREA * FPS) > frame_number:
        SOURCES_RTSP[stream_id]['last_detection'] = frame_number

        if frame_number > source['last_alarm'] + (MAX_TIME_DETECTION_AREA * FPS):
            return True
        return False

    return True


def validate_detection_area(detection, stream_id):
    #if Detection in Detection area seguir
    area = Polygon(SOURCES_RTSP[stream_id]['area'])
    polDet = create_polygon(detection)

    if (False): #testing
        cof = conf_polygon_interseption_area(polDet, area)
        print("*******/*******")
        print("Stream: "+str(stream_id))
        print(detection)
        print("Cof:"+str(cof))
        print(DETECTION_AREA[stream_id])
        print("*******/*******")

    if (conf_polygon_interseption_area(polDet, area) > COF_INTERACTION_MIN_AREA):
        return True

    return False


def do_fire_alarm(mongo_db, detection, stream_id, frame_number):
    mongo_db.alarms.insert_one(detection)

    SOURCES_RTSP[stream_id]['last_detection'] = frame_number
    SOURCES_RTSP[stream_id]['last_alarm'] = frame_number
    SOURCES_RTSP[stream_id]['cant_alarms'] += 1
    SOURCES_RTSP[stream_id]['alarms'].append(detection['timestamp'].strftime("%H:%M:%S"))

    if (True):
        print("/************************************************/")
        print("/------- ALARM FIRE  -------", str(stream_id), "----")
        print(str(stream_id), " - ", str(SOURCES_RTSP[stream_id]['name']), " - ", str(datetime.now()))
        print("/************************************************/")

def save_detection_image(obj_meta, gst_buffer, frame_meta, frame_number):

    if(TAKE_IMAGE and obj_meta.confidence<CONFIDENCE_SAVE_IMAGE):
        # Getting Image data using nvbufsurface
        # the input should be address of buffer and batch_id
        n_frame=pyds.get_nvds_buf_surface(hash(gst_buffer),frame_meta.batch_id)
        #convert python array into numy array format.
        frame_image=np.array(n_frame,copy=True,order='C')
        #covert the array into cv2 default color format
        frame_image=cv2.cvtColor(frame_image,cv2.COLOR_RGBA2BGRA)

        frame_image=draw_bounding_boxes(frame_image,obj_meta,obj_meta.confidence)

        file_name = "/stream_"+str(frame_meta.pad_index)+"_frame_"+str(frame_number)+".jpg"
        folder_store = folder_name

        if (obj_meta.confidence<CONFIDENCE_VALIDATE_SAVE_IMAGE):
            folder_store = folder_name+'/validate'

        cv2.imwrite(folder_store+file_name,frame_image)
        saved_count["stream_"+str(frame_meta.pad_index)]+=1

def json_meta_info(obj_meta,confidence, stream_id):
    rect_params = obj_meta.rect_params
    return {
        "camara"        :stream_id,
        "camara_name"   :SOURCES_RTSP[int(stream_id)]['name'],
        "obj_name"      :pgie_classes_str[obj_meta.class_id],
        "confidence"    :'{0:.2f}'.format(confidence),
        "top"           :int(rect_params.top),
        "left"          :int(rect_params.left),
        "width"         :int(rect_params.width),
        "height"        :int(rect_params.height),
        'timestamp'     :datetime.now()
    }



def conf_polygon_interseption_area(poly, area):
    interseption = polygon_interseption(poly, area)
    if (interseption != None):
        return interseption.area /poly.area;

    return 0

def polygon_interseption(poly, area):  # returns None if rectangles don't intersect
    if (poly.intersects(area)):
        return poly.intersection(area)
    return None


def create_polygon(data):
    a = (data["left"], data["top"])
    b = (data["left"] + data["width"], data["top"])
    c = (data["left"] + data["width"], data["top"] - data["height"])
    d = (data["left"], data["top"] - data["height"] )

    return Polygon([a, b, c, d])


def create_pipeline():
    number_sources = len(SOURCES_RTSP) - 1
     # Standard GStreamer initialization
    GObject.threads_init()
    Gst.init(None)

    # Create gstreamer elements */
    # Create Pipeline element that will form a connection of other elements
    print("Creating Pipeline \n ")
    pipeline = Gst.Pipeline()
    is_live = False

    if not pipeline:
        sys.stderr.write(" Unable to create Pipeline \n")
    print("Creating streamux \n ")

    # Create nvstreammux instance to form batches from one or more sources.
    streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
    if not streammux:
        sys.stderr.write(" Unable to create NvStreamMux \n")

    pipeline.add(streammux)

    i = 0
    for StreamRtsp in SOURCES_RTSP:

        frame_count["stream_"+StreamRtsp['id']] = 0
        saved_count["stream_"+StreamRtsp['id']] = 0

        print("Creating source_bin ", str(StreamRtsp['id']), " \n ")

        is_live = True

        source_bin = create_source_bin(i, StreamRtsp['rtsp'])

        if not source_bin:
            sys.stderr.write("Unable to create source bin \n")

        pipeline.add(source_bin)

        padname = "sink_%u" % i
        sinkpad = streammux.get_request_pad(padname)
        if not sinkpad:
            sys.stderr.write("Unable to create sink pad bin \n")

        srcpad = source_bin.get_static_pad("src")
        if not srcpad:
            sys.stderr.write("Unable to create src pad bin \n")
        srcpad.link(sinkpad)

        i = i+1


    print("Creating Pgie \n ")
    pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
    if not pgie:
        sys.stderr.write(" Unable to create pgie \n")
    # Add nvvidconv1 and filter1 to convert the frames to RGBA
    # which is easier to work with in Python.
    print("Creating nvvidconv1 \n ")
    nvvidconv1 = Gst.ElementFactory.make("nvvideoconvert", "convertor1")
    if not nvvidconv1:
        sys.stderr.write(" Unable to create nvvidconv1 \n")
    print("Creating filter1 \n ")
    caps1 = Gst.Caps.from_string("video/x-raw(memory:NVMM), format=RGBA")
    filter1 = Gst.ElementFactory.make("capsfilter", "filter1")
    if not filter1:
        sys.stderr.write(" Unable to get the caps filter1 \n")
    filter1.set_property("caps", caps1)
    print("Creating tiler \n ")
    tiler = Gst.ElementFactory.make("nvmultistreamtiler", "nvtiler")
    if not tiler:
        sys.stderr.write(" Unable to create tiler \n")
    print("Creating nvvidconv \n ")
    nvvidconv = Gst.ElementFactory.make("nvvideoconvert", "convertor")
    if not nvvidconv:
        sys.stderr.write(" Unable to create nvvidconv \n")
    print("Creating nvosd \n ")

    #Screen desiplay Remove
    nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")
    if not nvosd:
        sys.stderr.write(" Unable to create nvosd \n")

    if(is_aarch64()):
        print("Creating transform \n ")
        transform =  Gst.ElementFactory.make("nvegltransform", "nvegl-transform")
        if not transform:
            sys.stderr.write(" Unable to create transform \n")

    print("Creating EGLSink \n")
    sink = Gst.ElementFactory.make("nveglglessink", "nvvideo-renderer")
    if not sink:
        sys.stderr.write(" Unable to create egl sink \n")

    if is_live:
        print("Atleast one of the sources is live")
        streammux.set_property('live-source', 1)

    streammux.set_property('width', 1920)
    streammux.set_property('height', 1080)
    streammux.set_property('batch-size', number_sources)
    streammux.set_property('batched-push-timeout', 4000000)
    pgie.set_property('config-file-path', "config.txt")
    pgie_batch_size = pgie.get_property("batch-size")

    if (pgie_batch_size != number_sources):
        print("WARNING: Overriding infer-config batch-size", pgie_batch_size, " with number of sources ",
              number_sources, " \n")
        pgie.set_property("batch-size", number_sources)
    tiler_rows = int(math.sqrt(number_sources))
    tiler_columns = int(math.ceil((1.0*number_sources)/tiler_rows))
    tiler.set_property("rows", tiler_rows)
    tiler.set_property("columns", tiler_columns)
    tiler.set_property("width", TILED_OUTPUT_WIDTH)
    tiler.set_property("height", TILED_OUTPUT_HEIGHT)

    sink.set_property("sync", 0)

    if not is_aarch64():
        # Use CUDA unified memory in the pipeline so frames
        # can be easily accessed on CPU in Python.
        mem_type = int(pyds.NVBUF_MEM_CUDA_UNIFIED)
        streammux.set_property("nvbuf-memory-type", mem_type)
        nvvidconv.set_property("nvbuf-memory-type", mem_type)
        nvvidconv1.set_property("nvbuf-memory-type", mem_type)
        tiler.set_property("nvbuf-memory-type", mem_type)

    print("Adding elements to Pipeline \n")
    pipeline.add(pgie)
    pipeline.add(tiler)
    pipeline.add(nvvidconv)
    pipeline.add(filter1)
    pipeline.add(nvvidconv1)

    pipeline.add(nvosd)

    if is_aarch64():
        pipeline.add(transform)
    pipeline.add(sink)

    print("Linking elements in the Pipeline \n")
    streammux.link(pgie)
    pgie.link(nvvidconv1)
    nvvidconv1.link(filter1)
    filter1.link(tiler)
    tiler.link(nvvidconv)
    nvvidconv.link(nvosd)
    if is_aarch64():
        nvosd.link(transform)
        transform.link(sink)
    else:
        nvosd.link(sink)


    tiler_sink_pad=tiler.get_static_pad("sink")
    if not tiler_sink_pad:
        sys.stderr.write(" Unable to get src pad \n")
    else:
        tiler_sink_pad.add_probe(Gst.PadProbeType.BUFFER, tiler_sink_pad_buffer_probe, 0)


    return pipeline

def init_configuration(args):
    # Check input arguments

    number_sources = len(SOURCES_RTSP)

    # init Data
    for i in range(0, number_sources):
        fps_streams["stream{0}".format(i)] = GETFPS(i)


    global folder_name
    folder_name = args[-1]
    if not path.exists(folder_name):
        os.mkdir(folder_name)
        os.mkdir(folder_name+'/validate')

    print("Frames will be saved in ", folder_name)

def main(args):
    init_configuration(args)

    pipeline = create_pipeline()

    # create an event loop and feed gstreamer bus mesages to it
    loop = GObject.MainLoop()
    bus = pipeline.get_bus()
    bus.add_signal_watch()
    bus.connect("message", bus_call, loop)

    # List the sources
    print("Now playing...")
    i = 0
    for source in SOURCES_RTSP:
        print(i, ": ", source['name'])
        i = i+1

    print("Starting pipeline \n")
    # start play back and listed to events		
    pipeline.set_state(Gst.State.PLAYING)
    try:
        loop.run()
    except:
        pass
    # cleanup
    print("Exiting app\n")
    pipeline.set_state(Gst.State.NULL)

if __name__ == '__main__':
    sys.exit(main(sys.argv))

I think you need to add NvOSD_RectParams for each object refer Deepstream Python API Reference — Deepstream Deepstream Version: 6.1.1 documentation

1 Like

Thanks! I gonna check it out, is possible to create polygon or only rectangles?

NvOSD_RectParams is for rectangles.

Can you please show code example?
We casted an object obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
And then we do height = pyds.NvOSD_RectParams(obj_meta).height?