• Hardware Platform (Jetson / GPU) GPU
• DeepStream Version 7.0 (docker image: nvcr.io/nvidia/deepstream:7.0-triton-multiarch )
• NVIDIA GPU Driver Version (valid for GPU only) 535.171.04
Hi,
I’ve been working on modifying the deepstream-3d-action-recognition
sample application located at /opt/nvidia/deepstream/deepstream/apps/sample_apps/deepstream-3d-action-recognition/
. My goal was to integrate the 3D ActionRecognitionNet model as a Secondary GIE (SGIE) within the pipeline.
To achieve this, I made the following modifications:
-
Pipeline Modification: I modified the pipeline by adding a primary inference detector and a tracker before the action recognition model. You can view the updated pipeline graph here:
deepstream-3d-action-recognition_graph.zip (7.6 KB) -
Code Changes: I modified the the
main
function in thedeepstream_3d_action_recognition.cpp
file to incorporate the primary inference detector and the tracker. You can view the updatedmain
function here:
int main(int argc, char *argv[])
{
g_setenv("GST_DEBUG_DUMP_DOT_DIR", "/opt/nvidia/deepstream/deepstream/sources/apps/sample_apps/deepstream-3d-action-recognition", TRUE);
GMainLoop *loop = NULL;
GstElement *pipeline = NULL, *streammux = NULL, *sink = NULL, *pgie = NULL,
*preprocess = NULL, *queue1 = NULL, *queue2 = NULL,
*queue3 = NULL, *queue4 = NULL, *queue5 = NULL, *queue6 = NULL,
*nvvidconv = NULL, *nvosd = NULL, *tiler = NULL;
GstElement *queue_detector=NULL, *pgie_detector = NULL, *tracker = NULL;
GstBus *bus = NULL;
guint bus_watch_id;
GstPad *pgie_src_pad = NULL;
guint i, num_sources;
guint tiler_rows, tiler_columns;
int current_device = -1;
cudaGetDevice(¤t_device);
struct cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, current_device);
/* Standard GStreamer initialization */
gst_init(&argc, &argv);
/* setup signal handler */
_intr_setup();
/* Check input arguments */
if (argc < 3 || strncmp(argv[1], "-c", 3))
{
g_printerr("Usage: %s -c <action_recognition_config.txt>\n", argv[0]);
return -1;
}
if (!parse_action_config(argv[2], gActionConfig)) {
g_printerr("parse config file: %s failed.\n", argv[2]);
return -1;
}
if (gActionConfig.debug >= kDebugVerbose) {
setenv(ENV_CUSTOM_SEQUENC_DEBUG, "1", 1);
} else {
unsetenv(ENV_CUSTOM_SEQUENC_DEBUG);
}
num_sources = gActionConfig.uri_list.size();
loop = g_main_loop_new(NULL, FALSE);
/* Create gstreamer elements */
/* Create Pipeline element that will form a connection of other elements */
pipeline = gst_pipeline_new("preprocess-test-pipeline");
/* Create nvstreammux instance to form batches from one or more sources. */
streammux = gst_element_factory_make("nvstreammux", "stream-muxer");
if (!pipeline || !streammux)
{
g_printerr("One element could not be created. Exiting.\n");
return -1;
}
gst_bin_add(GST_BIN(pipeline), streammux);
for (i = 0; i < num_sources; i++)
{
GstPad *sinkpad, *srcpad;
gchar pad_name[16] = {};
GstElement *source_bin = create_source_bin(i, gActionConfig.uri_list[i].c_str());
if (!source_bin)
{
g_printerr("Failed to create source bin. Exiting.\n");
return -1;
}
gst_bin_add(GST_BIN(pipeline), source_bin);
g_snprintf(pad_name, 15, "sink_%u", i);
sinkpad = gst_element_request_pad_simple(streammux, pad_name);
if (!sinkpad)
{
g_printerr("Streammux request sink pad failed. Exiting.\n");
return -1;
}
srcpad = gst_element_get_static_pad(source_bin, "src");
if (!srcpad)
{
g_printerr("Failed to get src pad of source bin. Exiting.\n");
return -1;
}
if (gst_pad_link(srcpad, sinkpad) != GST_PAD_LINK_OK)
{
g_printerr("Failed to link source bin to stream muxer. Exiting.\n");
return -1;
}
gst_object_unref(srcpad);
gst_object_unref(sinkpad);
}
queue_detector = gst_element_factory_make("queue", "queue-detector");
pgie_detector = gst_element_factory_make("nvinfer", "primary-detector");
tracker = gst_element_factory_make ("nvtracker", "tracker");
if (!pgie_detector || !tracker)
{
g_printerr("One element could not be created. Exiting.\n");
return -1;
}
g_object_set(G_OBJECT(pgie_detector),
"config-file-path", "dstest1_pgie_config.yml",
NULL);
g_object_set(G_OBJECT(tracker),
"ll-config-file", "/opt/nvidia/deepstream/deepstream/samples/configs/deepstream-app/config_tracker_NvDCF_perf.yml",
"ll-lib-file", "/opt/nvidia/deepstream/deepstream/lib/libnvds_nvmultiobjecttracker.so",
NULL);
/* to preprocess the rois and form a raw tensor for inferencing */
preprocess = gst_element_factory_make("nvdspreprocess", "preprocess-plugin");
/* Create inference plugin to inference batched frames. */
if (!gActionConfig.triton_infer_config.empty()) {
pgie = gst_element_factory_make("nvinferserver", "primary-triton-nvinference");
} else {
pgie = gst_element_factory_make("nvinfer", "primary-nvinference-engine");
}
/* Add queue elements between every two elements */
queue1 = gst_element_factory_make("queue", "queue1");
queue2 = gst_element_factory_make("queue", "queue2");
queue3 = gst_element_factory_make("queue", "queue3");
if (!preprocess || !pgie || !queue1 || !queue2 || !queue3)
{
g_printerr("One element could not be created. Exiting.\n");
return -1;
}
if (gActionConfig.useFakeSink) {
sink = gst_element_factory_make("fakesink", "nvvideo-sink");
if (!sink)
{
g_printerr("element fakesink could not be created. Exiting.\n");
return -1;
}
} else {
queue4 = gst_element_factory_make("queue", "queue4");
queue5 = gst_element_factory_make("queue", "queue5");
queue6 = gst_element_factory_make("queue", "queue6");
/* Use nvtiler to composite the batched frames into a 2D tiled array based
* on the source of the frames. */
tiler = gst_element_factory_make("nvmultistreamtiler", "nvtiler");
/* Use convertor to convert from NV12 to RGBA as required by nvosd */
nvvidconv = gst_element_factory_make("nvvideoconvert", "nvvideo-converter");
/* Create OSD to draw on the converted RGBA buffer */
nvosd = gst_element_factory_make("nvdsosd", "nv-onscreendisplay");
/* Finally render the osd output */
if (prop.integrated)
{
sink = gst_element_factory_make("nv3dsink", "nv3d-sink");
} else {
#ifdef __aarch64__
sink = gst_element_factory_make ("nv3dsink", "nvvideo-renderer");
#else
sink = gst_element_factory_make ("nveglglessink", "nvvideo-renderer");
#endif
}
if (!tiler || !nvvidconv || !nvosd || !sink)
{
g_printerr("One element could not be created. Exiting.\n");
return -1;
}
tiler_rows = (guint)sqrt(num_sources);
tiler_columns = (guint)ceil(1.0 * num_sources / tiler_rows);
/* we set the tiler properties here */
g_object_set(G_OBJECT(tiler), "rows", tiler_rows, "columns", tiler_columns,
"width", gActionConfig.tiler_width, "height", gActionConfig.tiler_height, NULL);
g_object_set(G_OBJECT(nvosd), "process-mode", OSD_PROCESS_MODE,
"display-text", OSD_DISPLAY_TEXT, NULL);
}
g_object_set(G_OBJECT(streammux), "batch-size", num_sources, NULL);
g_object_set(G_OBJECT(streammux), "width", gActionConfig.muxer_width, "height",
gActionConfig.muxer_height,
"batched-push-timeout", gActionConfig.muxer_batch_timeout, NULL);
g_object_set(G_OBJECT(preprocess), "config-file", gActionConfig.preprocess_config.c_str(), NULL);
/* Configure the nvinfer element using the nvinfer config file. */
g_object_set(G_OBJECT(pgie), "input-tensor-meta", TRUE,
"config-file-path",
(!gActionConfig.triton_infer_config.empty() ?
gActionConfig.triton_infer_config.c_str() :
gActionConfig.infer_config.c_str()), NULL);
g_print("num-sources = %d\n", num_sources);
g_object_set(G_OBJECT(sink), "qos", 0, "sync", gActionConfig.display_sync, NULL);
/* we add a message handler */
bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
bus_watch_id = gst_bus_add_watch(bus, bus_call, loop);
gst_object_unref(bus);
/* Set up the pipeline */
/* we add all elements into the pipeline */
if (gActionConfig.useFakeSink) {
gst_bin_add_many(GST_BIN(pipeline), queue_detector, pgie_detector, tracker,
queue1, preprocess, queue2, pgie, queue3, sink, NULL);
/* we link the elements together
* nvstreammux -> nvinfer -> nvtiler -> nvvidconv -> nvosd -> video-renderer */
if (!gst_element_link_many(streammux, queue_detector, pgie_detector, tracker,
queue1, preprocess, queue2, pgie, queue3, sink, NULL))
{
g_printerr("Elements could not be linked. Exiting.\n");
return -1;
}
}
else
{
gst_bin_add_many(GST_BIN(pipeline), queue_detector, pgie_detector, tracker,
queue1, preprocess, queue2, pgie, queue3, tiler,
queue4, nvvidconv, queue5, nvosd, queue6, sink, NULL);
/* we link the elements together
* nvstreammux -> nvinfer -> nvtiler -> nvvidconv -> nvosd -> video-renderer */
if (!gst_element_link_many(streammux, queue_detector, pgie_detector, tracker,
queue1, preprocess, queue2, pgie, queue3, tiler,
queue4, nvvidconv, queue5, nvosd, queue6, sink, NULL))
{
g_printerr("Elements could not be linked. Exiting.\n");
return -1;
}
}
/* Lets add probe to get informed of the meta data generated, we add probe to
* the sink pad of the osd element, since by that time, the buffer would have
* had got all the metadata. */
pgie_src_pad = gst_element_get_static_pad(pgie, "src");
if (!pgie_src_pad)
g_print("Unable to get pgie src pad\n");
else
gst_pad_add_probe(pgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER,
pgie_src_pad_buffer_probe, NULL, NULL);
gst_object_unref(pgie_src_pad);
/* Set the pipeline to "playing" state */
g_print("Now playing:");
for (i = 0; i < num_sources; i++)
{
g_print(" %s,", gActionConfig.uri_list[i].c_str());
}
g_print("\n");
GST_DEBUG_BIN_TO_DOT_FILE(GST_BIN(pipeline), GST_DEBUG_GRAPH_SHOW_ALL, "pipeline");
gst_element_set_state(pipeline, GST_STATE_PLAYING);
gPipeline = pipeline;
/* Wait till pipeline encounters an error or EOS */
g_print("Running...\n");
g_main_loop_run(loop);
gPipeline = nullptr;
/* Out of the main loop, clean up nicely */
g_print("Returned, stopping playback\n");
gst_element_set_state(pipeline, GST_STATE_NULL);
g_print("Deleting pipeline\n");
gst_object_unref(GST_OBJECT(pipeline));
g_source_remove(bus_watch_id);
g_main_loop_unref(loop);
return 0;
}
- Primary Detector Configuration: For the primary detector, I used the configuration file
dstest1_config.yml
from/opt/nvidia/deepstream/deepstream/sources/apps/sample_apps/deepstream-test1
. The only change I made was setting thegie-unique-id
to 2. - Preprocessing Configuration: I adjusted the
config_preprocess_3d_custom.txt
file to perform preprocessing on object ROIs instead of entire frames. Here’s the modified configuration:
[property]
enable=1
target-unique-ids=1
# 0=process on objects 1=process on frames
process-on-frame=0
# network-input-shape: batch, channel, sequence, height, width
# 3D sequence of 32 images
network-input-shape= 1;3;32;224;224
# 0=RGB, 1=BGR, 2=GRAY
network-color-format=0
# 0=NCHW, 1=NHWC, 2=CUSTOM
network-input-order=2
# 0=FP32, 1=UINT8, 2=INT8, 3=UINT32, 4=INT32, 5=FP16
tensor-data-type=0
tensor-name=input_rgb
processing-width=224
processing-height=224
# 0=NVBUF_MEM_DEFAULT 1=NVBUF_MEM_CUDA_PINNED 2=NVBUF_MEM_CUDA_DEVICE
# 3=NVBUF_MEM_CUDA_UNIFIED 4=NVBUF_MEM_SURFACE_ARRAY(Jetson)
scaling-pool-memory-type=0
# 0=NvBufSurfTransformCompute_Default 1=NvBufSurfTransformCompute_GPU
# 2=NvBufSurfTransformCompute_VIC(Jetson)
scaling-pool-compute-hw=0
# Scaling Interpolation method
# 0=NvBufSurfTransformInter_Nearest 1=NvBufSurfTransformInter_Bilinear 2=NvBufSurfTransformInter_Algo1
# 3=NvBufSurfTransformInter_Algo2 4=NvBufSurfTransformInter_Algo3 5=NvBufSurfTransformInter_Algo4
# 6=NvBufSurfTransformInter_Default
scaling-filter=0
# model input tensor pool size
tensor-buf-pool-size=8
custom-lib-path=/opt/nvidia/deepstream/deepstream/lib/libnvds_custom_sequence_preprocess.so
custom-tensor-preparation-function=CustomSequenceTensorPreparation
# 3D conv custom params
[user-configs]
channel-scale-factors=0.007843137;0.007843137;0.007843137
channel-mean-offsets=127.5;127.5;127.5
stride=1
subsample=0
[group-0]
src-ids=0
process-on-roi=0
#process only person
operate-on-class-ids=2
process-on-all-objects=1
- Input Source: I used a single input source and set the debug level to verbose in
deepstream_action_recognition_config.txt
:
...
# stream/file source list
uri-list=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_walk.mov
...
# Log debug level. 0: disabled. 1: debug. 2: verbose.
debug=2
- Batch Size: I also set the batch size to 1 in the
config_infer_primary_3d_action.txt
file.
Issue Encountered:
After running the application, it freezes after processing just a few frames.
To diagnose the problem, I compiled and ran the code in debug mode and discovered that the issue lies within the custom_sequence_preprocess
library. Specifically, the process hangs after a few iterations of the BufferManager::addRoi
function in the sequence_image_process.cpp
file, particularly at line 158 during the call to newBlock->buf = _allocator->acquire();
.
Could anyone provide insights or suggestions on how to resolve this issue?
Thanks in advance.