I am trying to stream video from an IP camera to a Jetson Xavier. I have this working pipeline on command line:
gst-launch-1.0 udpsrc port=5050 ! application/x-rtp,encoding-name=H264,payload=96 ! rtph264depay ! h264parse ! queue ! omxh264dec ! nvvidconv ! tee ! xvimagesink sync=false -e
I run it and the proper video stream pops up.
I am now trying to change the deepstream-yolo example in deepstream_reference_apps to take the rtsp source instead of a file source. I am trying to code it up with the C API like this:
/* Create gstreamer elements */
/* Create Pipeline element that will form a connection of other elements */
pipeline = gst_pipeline_new ("ds-yolo-pipeline");
/* Source element for reading from the file */
source = gst_element_factory_make ("udpsrc", "udpsrc");
depay = gst_element_factory_make ("rtph264depay", "rtph264depay");
queue = gst_element_factory_make ("queue", "queue");
/* Since the data format in the input file is elementary h264 stream,
* we need a h264parser */
h264parser = gst_element_factory_make ("h264parse", "h264-parser");
/* Use nvdec_h264/omxh264dec for hardware accelerated decode on GPU */
if (!g_strcmp0 ("Tesla", argv[1])) {
decoder = gst_element_factory_make ("nvdec_h264", "nvh264-decoder");
} else if (!g_strcmp0 ("Tegra", argv[1])) {
decoder = gst_element_factory_make ("omxh264dec", "openmax-decoder");
} else {
g_printerr ("Incorrect platform. Choose between Telsa/Tegra. Exiting.\n");
return -1;
/* Use convertor to convert from NV12 to RGBA as required by nvosd and yolo plugins */
nvvidconv = gst_element_factory_make ("nvvidconv", "nvvideo-converter");
tee = gst_element_factory_make ("tee", "tee");
/* Use yolo to run inference instead of pgie */
yolo = gst_element_factory_make ("nvyolo", "yolo-inference-engine");
/* Create OSD to draw on the converted RGBA buffer */
nvosd = gst_element_factory_make ("nvosd", "nv-onscreendisplay");
/* Finally render the osd output */
if (!g_strcmp0 ("Tesla", argv[1])) {
sink = gst_element_factory_make ("nveglglessink", "nvvideo-renderer");
} else if (!g_strcmp0 ("Tegra", argv[1])) {
sink = gst_element_factory_make ("nvoverlaysink", "nvvideo-renderer");
} else {
g_printerr ("Incorrect platform. Choose between Telsa/Tegra. Exiting.\n");
return -1;
/* caps filter for nvvidconv to convert NV12 to RGBA as nvosd expects input
* in RGBA format */
filter1 = gst_element_factory_make ("capsfilter", "filter1");
filter2 = gst_element_factory_make ("capsfilter", "filter2");
if (!pipeline || !source || !h264parser || !decoder || !filter1 || !nvvidconv
|| !filter2 || !nvosd || !sink || !yolo || !tee || !depay || !queue ) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
/* we set the input filename to the source element */
//g_object_set (G_OBJECT (source), "device", argv[2], NULL);
g_object_set (G_OBJECT (source), "port", (gint)5050, NULL);
g_object_set (G_OBJECT (yolo), "config-file-path", argv[3], NULL);
/* we set the osd properties here */
g_object_set (G_OBJECT (nvosd), "font-size", 15, NULL);
/* we add a message handler */
bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline));
bus_watch_id = gst_bus_add_watch (bus, bus_call, loop);
gst_object_unref (bus);
/* Set up the pipeline */
/* we add all elements into the pipeline */
gst_bin_add_many (GST_BIN (pipeline), source, filter1, depay, h264parser, queue, decoder, nvvidconv, filter2, tee, yolo, nvosd, sink, NULL);
caps1 = gst_caps_from_string ("application/x-rtp,encoding-name=H264,payload=96");
g_object_set (G_OBJECT (filter1), "caps", caps1, NULL);
gst_caps_unref (caps1);
caps2 = gst_caps_from_string ("video/x-raw(memory:NVMM), format=RGBA");
g_object_set (G_OBJECT (filter2), "caps", caps2, NULL);
gst_caps_unref (caps2);
/* we link the elements together */
/* file-source -> h264-parser -> nvh264-decoder ->
* filter1 -> nvvidconv -> filter2 -> yolo -> nvosd -> video-renderer */
if (gst_element_link_many (source, filter1, depay, h264parser, queue, decoder, nvvidconv, tee, sink, NULL) != TRUE)
g_print("error at link many\n");
return -1;
/* Lets add probe to get informed of the meta data generated, we add probe to
* the sink pad of the osd element, since by that time, the buffer would have
* had got all the metadata. */
osd_sink_pad = gst_element_get_static_pad (nvosd, "sink");
if (!osd_sink_pad)
g_print ("Unable to get sink pad\n");
osd_probe_id = gst_pad_add_probe (osd_sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
osd_sink_pad_buffer_probe, NULL, NULL);
/* Set the pipeline to "playing" state */
g_print ("Now playing: %s\n", argv[2]);
gst_element_set_state (pipeline, GST_STATE_PLAYING);
/* Wait till pipeline encounters an error or EOS */
g_print ("Running...\n");
g_main_loop_run (loop);
Note, that I didn’t even try to link yolo or nvosd into the pipeline as I wanted to just get the video streaming at first. When I run the app, this is my output:
nvidia@jetson-0423318033008:~/deepstream_reference_apps/yolo$ deepstream-yolo-app Tegra 5050 config/yolov2.txt
Now playing: 5050
Loading pre-trained weights...
Loading complete!
Total Number of weights read : 50578686
layer inp_size out_size weightPtr
(1) conv-bn-leaky 3 x 416 x 416 32 x 416 x 416 992
(2) maxpool 32 x 416 x 416 32 x 208 x 208 992
(3) conv-bn-leaky 32 x 208 x 208 64 x 208 x 208 19680
(4) maxpool 64 x 208 x 208 64 x 104 x 104 19680
(5) conv-bn-leaky 64 x 104 x 104 128 x 104 x 104 93920
(6) conv-bn-leaky 128 x 104 x 104 64 x 104 x 104 102368
(7) conv-bn-leaky 64 x 104 x 104 128 x 104 x 104 176608
(8) maxpool 128 x 104 x 104 128 x 52 x 52 176608
(9) conv-bn-leaky 128 x 52 x 52 256 x 52 x 52 472544
(10) conv-bn-leaky 256 x 52 x 52 128 x 52 x 52 505824
(11) conv-bn-leaky 128 x 52 x 52 256 x 52 x 52 801760
(12) maxpool 256 x 52 x 52 256 x 26 x 26 801760
(13) conv-bn-leaky 256 x 26 x 26 512 x 26 x 26 1983456
(14) conv-bn-leaky 512 x 26 x 26 256 x 26 x 26 2115552
(15) conv-bn-leaky 256 x 26 x 26 512 x 26 x 26 3297248
(16) conv-bn-leaky 512 x 26 x 26 256 x 26 x 26 3429344
(17) conv-bn-leaky 256 x 26 x 26 512 x 26 x 26 4611040
(18) maxpool 512 x 26 x 26 512 x 13 x 13 4611040
(19) conv-bn-leaky 512 x 13 x 13 1024 x 13 x 13 9333728
(20) conv-bn-leaky 1024 x 13 x 13 512 x 13 x 13 9860064
(21) conv-bn-leaky 512 x 13 x 13 1024 x 13 x 13 14582752
(22) conv-bn-leaky 1024 x 13 x 13 512 x 13 x 13 15109088
(23) conv-bn-leaky 512 x 13 x 13 1024 x 13 x 13 19831776
(24) conv-bn-leaky 1024 x 13 x 13 1024 x 13 x 13 29273056
(25) conv-bn-leaky 1024 x 13 x 13 1024 x 13 x 13 38714336
(26) route - 512 x 26 x 26 38714336
(27) conv-bn-leaky 512 x 26 x 26 64 x 26 x 26 38747360
(28) reorg 64 x 26 x 26 256 x 13 x 13 38747360
(29) route - 1280 x 13 x 13 38747360
(30) conv-bn-leaky 1280 x 13 x 13 1024 x 13 x 13 50547936
(31) conv-linear 1024 x 13 x 13 30 x 13 x 13 50578686
(32) region 30 x 13 x 13 30 x 13 x 13 50578686
Anchors are being converted to network input resolution i.e. Anchors x 32 (stride)
Output blob names :
Using previously generated plan file located at data/blob-yolov2-kFLOAT-kGPU-batch1.engine
Loading TRT Engine...
WARNING: Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.
Loading Complete!
NvMMLiteOpen : Block : BlockType = 261
NvMMLiteBlockCreate : Block : BlockType = 261
Allocating new output: 1920x1088 (x 12), ThumbnailMode = 0
Over-riding video dimension with display dimensionOPENMAX: HandleNewStreamFormat: 3528: Send OMX_EventPortSettingsChanged: nFrameWidth = 1920, nFrameHeight = 1080
---> NVMEDIA: Video-conferencing detected !!!!!!!!!
And then it does not progress any further. How would I go about changing the source to a stream? And do you have any advice on where to link yolo and nvosd when I get the streaming working? I’m new to gstreamer and deepstream so I am not sure what some of the output means.
Thank you so much!