Thank you for your response.
Previously, I was using P2PNet and modified the RTSP-in-RTSP-out example. I am currently modifying the DeepStream Python code to fit a better model I found.
Below is the pipeline structure of the Python code I am currently using.
Gst.init(None)
mem_type = int(pyds.NVBUF_MEM_CUDA_UNIFIED)
loop = GLib.MainLoop()
pipeline = Gst.Pipeline()
if not pipeline:
sys.stderr.write('ERROR: Failed to create pipeline\n')
sys.exit(1)
streammux = Gst.ElementFactory.make('nvstreammux', 'nvstreammux')
if not streammux:
sys.stderr.write('ERROR: Failed to create nvstreammux\n')
sys.exit(1)
streammux.set_property("nvbuf-memory-type", mem_type)
pipeline.add(streammux)
tiler = Gst.ElementFactory.make('nvmultistreamtiler', 'nvtiler')
if not tiler:
sys.stderr.write('ERROR: Failed to create nvmultistreamtiler\n')
sys.exit(1)
tiler.set_property('rows', ROWS)
tiler.set_property('columns', COLUMNS)
tiler.set_property('width', STREAMMUX_WIDTH / ROWS)
tiler.set_property('height', STREAMMUX_HEIGHT / COLUMNS)
pipeline.add(tiler)
pgie = Gst.ElementFactory.make('nvinfer', 'pgie')
if not pgie:
sys.stderr.write('ERROR: Failed to create nvinfer\n')
sys.exit(1)
converter = Gst.ElementFactory.make('nvvideoconvert', 'nvvideoconvert')
if not converter:
sys.stderr.write('ERROR: Failed to create nvvideoconvert\n')
sys.exit(1)
converter.set_property("nvbuf-memory-type", mem_type)
capsfilter = Gst.ElementFactory.make('capsfilter', 'capsfilter')
if not capsfilter:
sys.stderr.write('ERROR: Failed to create capsfilter\n')
sys.exit(1)
capsfilter.set_property('caps', Gst.Caps.from_string('video/x-raw(memory:NVMM), format=RGBA'))
pipeline.add(capsfilter)
osd = Gst.ElementFactory.make('nvdsosd', 'nvdsosd')
if not osd:
sys.stderr.write('ERROR: Failed to create nvdsosd\n')
sys.exit(1)
nvvidconv_postosd = Gst.ElementFactory.make("nvvideoconvert", "convertor_postosd")
caps = Gst.ElementFactory.make("capsfilter", "filter")
caps.set_property(
"caps", Gst.Caps.from_string(f"video/x-raw(memory:NVMM), format=I420, width=(int)1920, height=(int)1080")
)
# Make the encoder
encoder = Gst.ElementFactory.make("nvv4l2h264enc", "encoder")
if not encoder:
sys.stderr.write(" Unable to create encoder")
encoder.set_property("bitrate", bitrate)
# encoder.set_property("bitrate", 8000000)
encoder.set_property("iframeinterval", 60)
encoder.set_property("profile", 2)
# Make the payload-encode video into RTP packets
rtppay = Gst.ElementFactory.make("rtph264pay", "rtppay")
# rtppay.set_property("mtu", 1400)
print("Creating H264 rtppay")
# Make the UDP sink
udpsink_port_num = 25400
sink = Gst.ElementFactory.make("udpsink", "udpsink")
if not sink:
sys.stderr.write(" Unable to create udpsink")
sink.set_property("host", "127.0.0.1")
sink.set_property("port", udpsink_port_num)
sink.set_property("async", False)
sink.set_property("sync", True)
sink.set_property("buffer-size", buf_size)
streammux.set_property('batch-size', STREAMMUX_BATCH_SIZE)
streammux.set_property('batched-push-timeout', 25000)
streammux.set_property('width', STREAMMUX_WIDTH)
streammux.set_property('height', STREAMMUX_HEIGHT)
streammux.set_property('enable-padding', 0)
streammux.set_property('live-source', True)
streammux.set_property('attach-sys-ts', True)
pgie.set_property('config-file-path', CONFIG_INFER)
osd.set_property('process-mode', int(pyds.MODE_GPU))
osd.set_property('display-bbox', 1)
osd.set_property('display-text', 0)
pipeline.add(pgie)
pipeline.add(converter)
pipeline.add(osd)
pipeline.add(nvvidconv_postosd)
pipeline.add(caps)
pipeline.add(encoder)
pipeline.add(rtppay)
pipeline.add(sink)
streammux.link(tiler)
tiler.link(pgie)
pgie.link(converter)
converter.link(capsfilter)
capsfilter.link(osd)
osd.link(nvvidconv_postosd)
nvvidconv_postosd.link(caps)
caps.link(encoder)
encoder.link(rtppay)
rtppay.link(sink)
# Create sources as before, using create_uridecode_bin function
sources = [None, None, None, None]
for idx in range(4):
sources[idx] = 'rtsp://' + str(cctv_id[idx]) + ':' + str(cctv_pw[idx]) + '@' + \
str(ip[idx]) + ':' + str(port[idx]) + '/profile2/media.smp'
for idx, source in enumerate(sources):
source_bin = create_uridecode_bin(idx, source, streammux)
pipeline.add(source_bin)
source_bin.sync_state_with_parent()
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect('message', bus_call, loop)
# Connect custom probe function to pgie src pad
pgie_src_pad = pgie.get_static_pad('src')
if not pgie_src_pad:
sys.stderr.write('ERROR: Failed to get pgie src pad\n')
sys.exit(1)
else:
pgie_src_pad.add_probe(Gst.PadProbeType.BUFFER, tracker_src_pad_buffer_probe, 0)
srcpad = capsfilter.get_static_pad("src")
if not srcpad:
sys.stderr.write('ERROR: Failed to get streammux src pad\n')
sys.exit(1)
else:
reconnect_flags = [False, False, False, False]
srcpad.add_probe(Gst.PadProbeType.BUFFER, tiler_src_pad_buffer_probe, reconnect_flags)
rtsp_port_num = 18555
server = GstRtspServer.RTSPServer.new()
server.props.service = "%d" % rtsp_port_num
server.attach(None)
factory = GstRtspServer.RTSPMediaFactory.new()
factory.set_launch(
'( udpsrc name=pay0 port=%d buffer-size=%d caps="application/x-rtp, media=video, clock-rate=90000, '
'encoding-name=(string)%s, payload=96 " )'
% (udpsink_port_num, buf_size, 'H264')
)
factory.set_shared(True)
server.get_mount_points().add_factory("/ds-test", factory)
print(
"\n *** DeepStream: Launched RTSP Streaming at rtsp://localhost:%d/ds-test ***\n\n"
% rtsp_port_num
)
pipeline.set_state(Gst.State.PLAYING)
The ROI method is not what I am looking for. I am sharing the link to the paper of the new model I found: https://arxiv.org/pdf/2403.09281v1
To summarize the image processing method:
- Image Segmentation: The large image is divided into smaller blocks, and each block is processed individually. This allows for better capture of details in the entire image.
- Block-level Classification: Each block is assigned to a specific class (crowd count) using the CLIP model. This provides a more accurate understanding of the crowd density in the entire image.
- Synthesis: The results of all blocks are combined to generate a final density map for the entire image.
I am trying to implement this process in DeepStream, and I am curious if it is possible to implement the image segmentation feature.