Jetson h264 decoder flush deadlock

Hi,

i was able to stably reproduce deadlock in decoder. I use 00_video_decode without patches, original code without add usleep for simulation live stream. And start two 00_video_decode process

  • first process: while true; do ./video_decode H264 --disable-rendering --input-nalu ../../../video_samples/bunny.h264 ; done
  • secondary process: while true; do ./video_decode H264 --disable-rendering --input-nalu ../../../video_samples/transcoded_bunny_first_gop.h264 ; done

After a while, one of the processes freezes

Looks like, dec_capture_loop_fcn func deadlock in capture_plane dqBuffer

(gdb) bt
#0  0x0000007fb7f332a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b308d3a8) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1  0x0000007fb7f332a4 in __pthread_cond_wait_common (abstime=0x0, mutex=0x55b308d350, cond=0x55b308d380) at pthread_cond_wait.c:502
#2  0x0000007fb7f332a4 in __pthread_cond_wait (cond=0x55b308d380, mutex=0x55b308d350) at pthread_cond_wait.c:655
#3  0x0000007fb7570fdc in  () at /usr/lib/aarch64-linux-gnu/tegra/libnvos.so
#4  0x0000007fb39eabf0 in TegraV4L2_Poll_CPlane () at /usr/lib/aarch64-linux-gnu/tegra/libtegrav4l2.so
#5  0x0000007fb73e52e4 in plugin_ioctl () at /usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvvideocodec.so
#6  0x0000007fb7e14d68 in v4l2_ioctl (fd=14, request=3227014673) at libv4l2.c:1152
#7  0x000000557328fcc0 in NvV4l2ElementPlane::dqBuffer(v4l2_buffer&, NvBuffer**, NvBuffer**, unsigned int) (this=0x55b2fc8468, v4l2_buf=..., buffer=0x7fa710d5b8, shared_buffer=0x0, num_retries=0)
    at NvV4l2ElementPlane.cpp:126
#8  0x0000005573258720 in dec_capture_loop_fcn(void*) (arg=0x7fd9a886e8) at video_decode_main.cpp:1055
#9  0x0000007fb7f2d088 in start_thread (arg=0x7fd9a884cf) at pthread_create.c:463
#10 0x0000007fb79a0ffc in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78
(gdb) frame 8
#8  0x0000005573258720 in dec_capture_loop_fcn (arg=0x7fd9a886e8) at video_decode_main.cpp:1055
1055                if (dec->capture_plane.dqBuffer(v4l2_buf, &dec_buffer, NULL, 0))
(gdb) list
1050                memset(&v4l2_buf, 0, sizeof(v4l2_buf));
1051                memset(planes, 0, sizeof(planes));
1052                v4l2_buf.m.planes = planes;
1053
1054                /* Dequeue a filled buffer. */
1055                if (dec->capture_plane.dqBuffer(v4l2_buf, &dec_buffer, NULL, 0))
1056                {
1057                    if (errno == EAGAIN)
1058                    {
1059                        usleep(1000);
(gdb) p ctx.got_eos
$2 = true

I feel it doesn’t matter what type of video, interlace or progressive.

bunny.h264 and transcoded_bunny_first_gop.h264 is progressive h264 from l4t2-demo/video_samples at main · maxlapshin/l4t2-demo · GitHub

jetson xavier nx, l4t nvidia sdk 32.6.1

# dpkg -l | grep l4t
ii  cuda-repo-l4t-10-2-local-10.2.89       1.0-1                                      arm64        cuda repository configuration files
ii  nvidia-l4t-3d-core                     32.6.1-20210916210945                      arm64        NVIDIA GL EGL Package
ii  nvidia-l4t-apt-source                  32.6.1-20210726122859                      arm64        NVIDIA L4T apt source list debian package
ii  nvidia-l4t-bootloader                  32.6.1-20210726122859                      arm64        NVIDIA Bootloader Package
ii  nvidia-l4t-camera                      32.6.1-20210916210945                      arm64        NVIDIA Camera Package
ii  nvidia-l4t-configs                     32.6.1-20210726122859                      arm64        NVIDIA configs debian package
ii  nvidia-l4t-core                        32.6.1-20210726122859                      arm64        NVIDIA Core Package
ii  nvidia-l4t-cuda                        32.6.1-20210916210945                      arm64        NVIDIA CUDA Package
ii  nvidia-l4t-firmware                    32.6.1-20210916210945                      arm64        NVIDIA Firmware Package
ii  nvidia-l4t-graphics-demos              32.6.1-20210916210945                      arm64        NVIDIA graphics demo applications
ii  nvidia-l4t-gstreamer                   32.6.1-20210916210945                      arm64        NVIDIA GST Application files
ii  nvidia-l4t-init                        32.6.1-20210916210945                      arm64        NVIDIA Init debian package
ii  nvidia-l4t-initrd                      32.6.1-20210726122859                      arm64        NVIDIA initrd debian package
ii  nvidia-l4t-jetson-io                   32.6.1-20210726122859                      arm64        NVIDIA Jetson.IO debian package
ii  nvidia-l4t-jetson-multimedia-api       32.6.1-20210916210945                      arm64        NVIDIA Jetson Multimedia API is a collection of lower-level APIs that support flexible application development.
ii  nvidia-l4t-kernel                      4.9.253-tegra-32.6.1-20210726122859        arm64        NVIDIA Kernel Package
ii  nvidia-l4t-kernel-dtbs                 4.9.253-tegra-32.6.1-20210726122859        arm64        NVIDIA Kernel DTB Package
ii  nvidia-l4t-kernel-headers              4.9.253-tegra-32.6.1-20210726122859        arm64        NVIDIA Linux Tegra Kernel Headers Package
ii  nvidia-l4t-libvulkan                   32.6.1-20210916210945                      arm64        NVIDIA Vulkan Loader Package
ii  nvidia-l4t-multimedia                  32.6.1-20210916210945                      arm64        NVIDIA Multimedia Package
ii  nvidia-l4t-multimedia-utils            32.6.1-20210916210945                      arm64        NVIDIA Multimedia Package
ii  nvidia-l4t-oem-config                  32.6.1-20210726122859                      arm64        NVIDIA OEM-Config Package
ii  nvidia-l4t-tools                       32.6.1-20210726122859                      arm64        NVIDIA Public Test Tools Package
ii  nvidia-l4t-wayland                     32.6.1-20210916210945                      arm64        NVIDIA Wayland Package
ii  nvidia-l4t-weston                      32.6.1-20210916210945                      arm64        NVIDIA Weston Package
ii  nvidia-l4t-x11                         32.6.1-20210916210945                      arm64        NVIDIA X11 Package
ii  nvidia-l4t-xusb-firmware               32.6.1-20210726122859                      arm64        NVIDIA USB Firmware Package

Hi,
It looks similar to
Decoder flush for interlaced source does not work - #14 by DaneLLL

So you still see the issue with the patch? We don’t observe the issue after applying the patch. Would like to know if you have different observation.

Hi,

at that patch we patched multivideo_transcoder and i do not see how to apply patch for 16_multivideo_transcode to 00_video_decode. Also in that thread deadlocks happened every time in encoder output plane ioctl.

Also,

i can not reproduce this deadlock with video sample(sd.h264) from Decoder flush for interlaced source does not work - #4 by khizbulin , but reproduce well transcoded peace of sd.h264:

ffmpeg -y -i sd.h264 -t 00:01 -c copy sd01.h264
ffmpeg -y -i sd.h264 -t 00:05 -c copy sd05.h264
multivideo_transcode num_files 1 sd01.h264 H264 sd01_transcoded.h264 H264
multivideo_transcode num_files 1 sd05.h264 H264 sd05_transcoded.h264 H264
while true; do  ./video_decode H264 --disable-rendering --input-nalu sd05_transcoded.h264 ; done &
while true; do  ./video_decode H264 --disable-rendering --input-nalu sd01_transcoded.h264 ; done &

after 30-40minutes decoder process with sd01_transcoded.h264 freeze
and --input-nalu parameter does not affect the result

maybe dealock only reproduces with progressive h264 or h264 produced jetson h264 encoder

Hi,
Could you check if you observe the issue with reference h264 stream:

/usr/src/jetson_multimedia_api/data/Video/sample_outdoor_car_1080p_10fps.h264

Hi,

sample_outdoor_car_1080p_10fps.h264 to long for test, for reproduce i create also transcoder piece of sample_outdoor_car_1080p_10fps

ffmpeg -y -i sample_outdoor_car_1080p_10fps.h264 -t 00:25 -c copy sample_outdoor_car_1080p_10fps_25.h264
./multivideo_transcode num_files 1 ../../data/Video/sample_outdoor_car_1080p_10fps_25.h264 H264 ../../data/Video/sample_outdoor_car_1080p_10fps_25_transcoded.h264

So, i am running multiple decode threads and after some time pid 19440 freeze

# ps auxww | grep video_de
root      8621  0.3  0.6 8637220 52016 pts/5   Sl+  13:50   0:00 ./video_decode H264 --disable-rendering --input-nalu ../../data/Video/sample_outdoor_car_1080p_10fps_25_transcoded.h264
root     10494  101  0.6 8631100 48064 pts/3   Rl+  13:53   0:02 ./video_decode H264 --disable-rendering --input-nalu ../../data/Video/sample_outdoor_car_1080p_10fps.h264
root     10514  0.0  0.5 8631100 46872 pts/4   Sl+  13:53   0:00 ./video_decode H264 --disable-rendering --input-nalu ../../data/Video/sample_outdoor_car_1080p_10fps_25.h264
root     10524  0.0  0.0   6892   628 pts/7    S+   13:53   0:00 grep --color=auto video_de
root     19440  0.1  0.6 8631100 48036 pts/2   tl+  13:30   0:02 ./video_decode H264 --disable-rendering --input-nalu ../../data/Video/sample_outdoor_car_1080p_10fps.h264
# gdb -p 19440
GNU gdb (Ubuntu 8.1.1-0ubuntu1) 8.1.1
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "aarch64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word".
Attaching to process 19440
[New LWP 19442]
[New LWP 19443]
[New LWP 19444]
[New LWP 19445]
[New LWP 19455]
[New LWP 19456]
[New LWP 19457]
[New LWP 19458]
[New LWP 19464]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
0x0000007f94f83310 in __GI___pthread_timedjoin_ex (threadid=547676893648, thread_return=0x0, abstime=0x0, block=<optimized out>) at pthread_join_common.c:89
89      pthread_join_common.c: No such file or directory.
(gdb) info threads
  Id   Target Id         Frame
* 1    Thread 0x7f94fe7430 (LWP 19440) "DecOutPlane" 0x0000007f94f83310 in __GI___pthread_timedjoin_ex (threadid=547676893648, thread_return=0x0, abstime=0x0, block=<optimized out>)
    at pthread_join_common.c:89
  2    Thread 0x7f940a31d0 (LWP 19442) "drm_vbl" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b088a288)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
  3    Thread 0x7f938a21d0 (LWP 19443) "drm_pflip" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b088a258)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
  4    Thread 0x7f930a11d0 (LWP 19444) "drm_vbl" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b088a338)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
  5    Thread 0x7f928a01d0 (LWP 19445) "drm_pflip" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b088a308)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
  6    Thread 0x7f8747c1d0 (LWP 19455) "NVMDecBufProcT" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b08b526c)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
  7    Thread 0x7f86c7b1d0 (LWP 19456) "NVMDecDisplayT" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b08afec8)                      
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88                                                                                                                        
  8    Thread 0x7f8647a1d0 (LWP 19457) "NVMDecFrmStatsT" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b08aff3c)       
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88                                          
  9    Thread 0x7f849641d0 (LWP 19458) "V4L2_DecThread" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b095f3cc)        
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88                          
  10   Thread 0x7f841631d0 (LWP 19464) "DecCapPlane" 0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b095f358)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
(gdb) thread 10                                                              
[Switching to thread 10 (Thread 0x7f841631d0 (LWP 19464))]        
#0  0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b095f358) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
88      ../sysdeps/unix/sysv/linux/futex-internal.h: No such file or directory.
(gdb) list                                     
83      in ../sysdeps/unix/sysv/linux/futex-internal.h
(gdb) bt                                   
#0  0x0000007f94f882a4 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55b095f358) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1  0x0000007f94f882a4 in __pthread_cond_wait_common (abstime=0x0, mutex=0x55b095f300, cond=0x55b095f330) at pthread_cond_wait.c:502
#2  0x0000007f94f882a4 in __pthread_cond_wait (cond=0x55b095f330, mutex=0x55b095f300) at pthread_cond_wait.c:655
#3  0x0000007f945c5fdc in  () at /usr/lib/aarch64-linux-gnu/tegra/libnvos.so
#4  0x0000007f90a3fbf0 in TegraV4L2_Poll_CPlane () at /usr/lib/aarch64-linux-gnu/tegra/libtegrav4l2.so
#5  0x0000007f9443a2e4 in plugin_ioctl () at /usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvvideocodec.so
#6  0x0000007f94e69d68 in v4l2_ioctl (fd=14, request=3227014673) at libv4l2.c:1152
#7  0x000000557b13ccc0 in NvV4l2ElementPlane::dqBuffer(v4l2_buffer&, NvBuffer**, NvBuffer**, unsigned int) (this=0x55b089a488, v4l2_buf=..., buffer=0x7f841625b8, shared_buffer=0x0, num_ret
ries=0) at NvV4l2ElementPlane.cpp:126
#8  0x000000557b105720 in dec_capture_loop_fcn(void*) (arg=0x7ffba68218) at video_decode_main.cpp:1055
#9  0x0000007f94f82088 in start_thread (arg=0x7ffba67fff) at pthread_create.c:463
#10 0x0000007f949f5ffc in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78
(gdb) frame 8  
#8  0x000000557b105720 in dec_capture_loop_fcn (arg=0x7ffba68218) at video_decode_main.cpp:1055
1055                if (dec->capture_plane.dqBuffer(v4l2_buf, &dec_buffer, NULL, 0))
(gdb) list                                   
1050                memset(&v4l2_buf, 0, sizeof(v4l2_buf));                
1051                memset(planes, 0, sizeof(planes));                                                                                                      
1052                v4l2_buf.m.planes = planes;          
1053              
1054                /* Dequeue a filled buffer. */
1055                if (dec->capture_plane.dqBuffer(v4l2_buf, &dec_buffer, NULL, 0))                                                                                                 
1056                {          
1057                    if (errno == EAGAIN)                                                                                                               
1058                    {                            
1059                        usleep(1000);                                                                                                                    
(gdb) p dec->capture_plane.num_queued_buffers        
$1 = 10                                                                                                                                                    
(gdb) p ctx.got_e                                    
There is no member named got_e.                                                                                                                              
(gdb) p ctx.got_eos                                  
$2 = true                                                                                                                                                         
(gdb)                                  

Can you please tell me did you succeed in reproducing this behavior?

Also, with three decoders and small pieces of sample_outdoor_car_1080p_10fps_25_transcoded.h264 freeze problem is reproduced stably.

ffmpeg -y -i ../../data/Video/sample_outdoor_car_1080p_10fps.h264 -t 00:05 -c copy ../../data/Video/sample_outdoor_car_1080p_10fps_05.h264
ffmpeg -y -i ../../data/Video/sample_outdoor_car_1080p_10fps.h264 -t 00:10 -c copy ../../data/Video/sample_outdoor_car_1080p_10fps_10.h264
while true; do ./video_decode H264 --disable-rendering --input-nalu ../../data/Video/sample_outdoor_car_1080p_10fps_05.h264 ; done &
while true; do ./video_decode H264 --disable-rendering --input-nalu ../../data/Video/sample_outdoor_car_1080p_10fps_05.h264 ; done &
while true; do ./video_decode H264 --disable-rendering --input-nalu ../../data/Video/sample_outdoor_car_1080p_10fps_10.h264 ; done &

Hi,
Thanks for the information. We will follow the steps to reproduce the issue.

Hi,
Please apply the patch and try again:

diff --git a/multimedia_api/ll_samples/samples/00_video_decode/video_decode.h b/multimedia_api/ll_samples/samples/00_video_decode/video_decode.h
index 4c81278..9d6ccfd 100644
--- a/multimedia_api/ll_samples/samples/00_video_decode/video_decode.h
+++ b/multimedia_api/ll_samples/samples/00_video_decode/video_decode.h
@@ -101,6 +101,7 @@ typedef struct
 
     pthread_t dec_capture_loop; // Decoder capture thread, created if running in blocking mode.
     bool got_error;
+    bool op_sent_eos; // Sent EoS to output plane
     bool got_eos;
     bool vp9_file_header_flag;
     bool vp8_file_header_flag;
diff --git a/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp b/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
index 8bb14a9..47b050c 100644
--- a/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
+++ b/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
@@ -1056,7 +1056,14 @@ dec_capture_loop_fcn(void *arg)
             {
                 if (errno == EAGAIN)
                 {
-                    usleep(1000);
+                    if (ctx->op_sent_eos)
+                    {
+                        usleep(16666);
+                    }
+                    else
+                    {
+                        usleep(1000);
+                    }
                 }
                 else
                 {
@@ -2055,6 +2062,7 @@ decode_proc(context_t& ctx, int argc, char *argv[])
         eos = decoder_proc_blocking(ctx, eos, current_file, current_loop, nalu_parse_buffer);
     else
         eos = decoder_proc_nonblocking(ctx, eos, current_file, current_loop, nalu_parse_buffer);
+    ctx.op_sent_eos = eos;
     /* After sending EOS, all the buffers from output plane should be dequeued.
        and after that capture plane loop should be signalled to stop. */
     if (ctx.blocking_mode)

It looks to be a race condition in handing EoS. We don’t see the issue after applying the patch. Please give it a try.