A segfault occurs when creating NvVideoDecoder inside a child process

Hi :)
I am getting segmentation fault when I run a decoder inside a child process.
This only happens when I run the decoder in the child process. Without fork() the application works as expected.
Jetson AGX Xavier. L4T 32.5.1-20210519111140
Small example to reproduce the failure: NvFaultEx.zip (1.2 KB)

Backtrace:

Program received signal SIGSEGV, Segmentation fault.
tcache_get (tc_idx=2) at malloc.c:2952
2952	malloc.c: No such file or directory.
(gdb) bt
#0  tcache_get (tc_idx=2) at malloc.c:2952
#1  __GI___libc_malloc (bytes=bytes@entry=50) at malloc.c:3060
#2  0x0000007f84666584 in _dl_new_object (realname=0x5582330900 "/usr/lib/aarch64-linux-gnu/tegra/libnvmm_utils.so", realname@entry=0x7f83dbec60 "", libname=0x558234f7c0 "", 
    libname@entry=0x7f84667cb8 <openaux> "\375{\276\251\375\003", type=type@entry=0, loader=<optimized out>, loader@entry=0x558234f7c0, mode=mode@entry=-2147483648, nsid=nsid@entry=0) at dl-object.c:163
#3  0x0000007f846613bc in _dl_map_object_from_fd (name=0x7f84667cb8 <openaux> "\375{\276\251\375\003", name@entry=0x7f83dbf1b7 "libnvmm_utils.so", origname=origname@entry=0x0, fd=<optimized out>, 
    fbp=fbp@entry=0x7fe6303a68, realname=0x7f83dbec60 "", loader=loader@entry=0x558234f7c0, l_type=0, l_type@entry=2, mode=mode@entry=-2147483648, stack_endp=0x7fe6303a60, stack_endp@entry=0x7fe6303b00, 
    nsid=nsid@entry=0) at dl-load.c:998
#4  0x0000007f84663c68 in _dl_map_object (loader=0x558234f7c0, name=0x7f83dbf1b7 "libnvmm_utils.so", type=2, trace_mode=0, mode=-2147483648, nsid=0) at dl-load.c:2461
#5  0x0000007f84667cf0 in openaux (a=0x7fe63041e8) at dl-deps.c:63
#6  0x0000007f8424a694 in __GI__dl_catch_exception (exception=0x7f83dbf1b7, exception@entry=0x7f8466e5b4 <_dl_close_worker+1324>, operate=0x7fe6303e1c, operate@entry=0x7f84667cb8 <openaux>, args=0x7fe63041d0, 
    args@entry=0x7fe6304450) at dl-error-skeleton.c:196
#7  0x0000007f84668060 in _dl_map_object_deps (map=map@entry=0x558234f7c0, preloads=preloads@entry=0x0, npreloads=npreloads@entry=0, trace_mode=trace_mode@entry=0, open_mode=<optimized out>) at dl-deps.c:249
#8  0x0000007f8466d8d0 in dl_open_worker (a=0x7fe63044d8) at dl-open.c:278
#9  0x0000007f8424a694 in __GI__dl_catch_exception (exception=0xfffffffffffffffe, exception@entry=0x7fe63044c0, operate=0x7fe63042fc, operate@entry=0x7f8466d7e0 <dl_open_worker>, args=0x7fe63044c0, 
    args@entry=0x7fe63044d8) at dl-error-skeleton.c:196
#10 0x0000007f8466d420 in _dl_open (file=0x558234f720 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvvidconv.so", mode=-2147483647, caller_dlopen=0x7f844dea68, nsid=-2, argc=1, argv=0x7fe6304f98, 
    env=<optimized out>) at dl-open.c:605
#11 0x0000007f83ff1014 in dlopen_doit (a=0x7fe6304798) at dlopen.c:66
#12 0x0000007f8424a694 in __GI__dl_catch_exception (exception=0x7f846897a8 <__stack_chk_guard>, exception@entry=0x7fe6304730, operate=0x7fe630458c, operate@entry=0x7f83ff0fb0 <dlopen_doit>, args=0x7fe6304710, 
    args@entry=0x7fe6304798) at dl-error-skeleton.c:196
#13 0x0000007f8424a738 in __GI__dl_catch_error (objname=objname@entry=0x5582303270, errstring=errstring@entry=0x5582303278, mallocedp=mallocedp@entry=0x5582303268, operate=operate@entry=0x7f83ff0fb0 <dlopen_doit>, 
    args=args@entry=0x7fe6304798) at dl-error-skeleton.c:215
#14 0x0000007f83ff2780 in _dlerror_run (operate=operate@entry=0x7f83ff0fb0 <dlopen_doit>, args=0x7fe6304798, args@entry=0x7fe63047a8) at dlerror.c:162
--Type <RET> for more, q to quit, c to continue without paging--c
#15 0x0000007f83ff10e8 in __dlopen (file=<optimized out>, mode=<optimized out>) at dlopen.c:87
#16 0x0000007f844dea68 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#17 0x0000007f844da6c0 in v4l2_fd_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#18 0x0000007f844dae24 in v4l2_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#19 0x000000556b3c870c in NvV4l2Element::NvV4l2Element(char const*, char const*, int, int) ()
#20 0x000000556b3d3478 in NvVideoDecoder::NvVideoDecoder(char const*, int) ()
#21 0x000000556b3d34c4 in NvVideoDecoder::createVideoDecoder(char const*, int) ()
#22 0x000000556b3c3818 in main ()

P.S. Note: libv4l2_nvargus.so was moved to avoid malloc-delete mismatch error as suggested here Memory Leak (Alloc/free mismatch) in Tegra multimedia API (encoder) - #6 by DaneLLL

Hi,
The latest release is Jetpack 4.6(r32.6.1). Are you able to upgrade to this release and try?

It looks like nothing has changed after upgrading to version 32.6.1-20210726122859 :(

Backtrace with libv4l2_nvargus.so:

Program received signal SIGSEGV, Segmentation fault.
malloc_consolidate (av=av@entry=0x7fa7e3fa70 <main_arena>) at malloc.c:4469
4469	malloc.c: No such file or directory.
(gdb) bt
#0  malloc_consolidate (av=av@entry=0x7fa7e3fa70 <main_arena>) at malloc.c:4469
#1  0x0000007fa7d61a70 in _int_malloc (av=av@entry=0x7fa7e3fa70 <main_arena>, bytes=bytes@entry=1234) at malloc.c:3713
#2  0x0000007fa7d63ee0 in __libc_calloc (n=<optimized out>, elem_size=<optimized out>) at malloc.c:3446
#3  0x0000007fa8214334 in _dl_new_object (realname=0x55801832f0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", realname@entry=0x7fa82391b0 "", libname=0x0, 
    libname@entry=0x1 <error: Cannot access memory at address 0x1>, type=127, type@entry=-2147483647, loader=loader@entry=0x0, mode=mode@entry=-1879048191, nsid=nsid@entry=0) at dl-object.c:73
#4  0x0000007fa820f3bc in _dl_map_object_from_fd (name=0x1 <error: Cannot access memory at address 0x1>, 
    name@entry=0x55801836b0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", origname=0x7fd6a9c1e8 "\260\066\030\200U", 
    origname@entry=0x0, fd=<optimized out>, fbp=fbp@entry=0x7fd6a9bbd8, realname=0x7fa82391b0 "", loader=loader@entry=0x0, l_type=-2147483647, l_type@entry=2, mode=mode@entry=-1879048191, stack_endp=0x7fd6a9bbd0, 
    stack_endp@entry=0x7fd6a9bc70, nsid=nsid@entry=0) at dl-load.c:998
#5  0x0000007fa8211c60 in _dl_map_object (loader=0x0, 
    loader@entry=0x7fa8233a20, name=name@entry=0x55801836b0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", type=type@entry=2, trace_mode=trace_mode@entry=0, mode=mode@entry=-1879048191, nsid=0) at dl-load.c:2460
#6  0x0000007fa821b880 in dl_open_worker (a=0x7fd6a9c1e8) at dl-open.c:235
#7  0x0000007fa7df7694 in __GI__dl_catch_exception (exception=0xfffffffffffffffe, exception@entry=0x7fd6a9c1d0, operate=0x7fd6a9c00c, operate@entry=0x7fa821b7d8 <dl_open_worker>, args=0x7fd6a9c1d0, 
    args@entry=0x7fd6a9c1e8) at dl-error-skeleton.c:196
#8  0x0000007fa821b418 in _dl_open
    (file=0x55801836b0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", mode=-2147483647, caller_dlopen=0x7fa808ba68, nsid=-2, argc=1, argv=0x7fd6a9cca8, env=<optimized out>)
    at dl-open.c:605
#9  0x0000007fa7b9e014 in dlopen_doit (a=0x7fd6a9c4a8) at dlopen.c:66
#10 0x0000007fa7df7694 in __GI__dl_catch_exception (exception=0x7fa82377a8 <__stack_chk_guard>, exception@entry=0x7fd6a9c440, operate=0x7fd6a9c29c, operate@entry=0x7fa7b9dfb0 <dlopen_doit>, args=0x7fd6a9c420, 
    args@entry=0x7fd6a9c4a8) at dl-error-skeleton.c:196
#11 0x0000007fa7df7738 in __GI__dl_catch_error
    (objname=objname@entry=0x5580143270, errstring=errstring@entry=0x5580143278, mallocedp=mallocedp@entry=0x5580143268, operate=operate@entry=0x7fa7b9dfb0 <dlopen_doit>, args=args@entry=0x7fd6a9c4a8)
--Type <RET> for more, q to quit, c to continue without paging--c
    at dl-error-skeleton.c:215
#12 0x0000007fa7b9f780 in _dlerror_run (operate=operate@entry=0x7fa7b9dfb0 <dlopen_doit>, args=0x7fd6a9c4a8, args@entry=0x7fd6a9c4b8) at dlerror.c:162
#13 0x0000007fa7b9e0e8 in __dlopen (file=<optimized out>, mode=<optimized out>) at dlopen.c:87
#14 0x0000007fa808ba68 in  () at /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#15 0x0000007fa80876c0 in v4l2_fd_open () at /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#16 0x0000007fa8087e24 in v4l2_open () at /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#17 0x000000557ae08808 in NvV4l2Element::NvV4l2Element(char const*, char const*, int, int) ()
#18 0x000000557ae12574 in NvVideoDecoder::NvVideoDecoder(char const*, int) ()
#19 0x000000557ae125c0 in NvVideoDecoder::createVideoDecoder(char const*, int) ()
#20 0x000000557ae03818 in main ()

Backtrace without libv4l2_nvargus.so:

Program received signal SIGSEGV, Segmentation fault.
tcache_get (tc_idx=2) at malloc.c:2952
2952	malloc.c: No such file or directory.
(gdb) bt
#0  tcache_get (tc_idx=2) at malloc.c:2952
#1  __GI___libc_malloc (bytes=bytes@entry=50) at malloc.c:3060
#2  0x0000007f7901a57c in _dl_new_object (realname=0x559a674a10 "/usr/lib/aarch64-linux-gnu/tegra/libnvmm_utils.so", realname@entry=0x7f78771c60 "", libname=0x559a6948e0 "", 
    libname@entry=0x7f7901bcb0 <openaux> "\375{\276\251\375\003", type=type@entry=0, loader=<optimized out>, loader@entry=0x559a6948e0, mode=mode@entry=-2147483648, nsid=nsid@entry=0) at dl-object.c:163
#3  0x0000007f790153bc in _dl_map_object_from_fd (name=0x7f7901bcb0 <openaux> "\375{\276\251\375\003", name@entry=0x7f787721b7 "libnvmm_utils.so", origname=origname@entry=0x0, fd=<optimized out>, 
    fbp=fbp@entry=0x7fe56480e8, realname=0x7f78771c60 "", loader=loader@entry=0x559a6948e0, l_type=0, l_type@entry=2, mode=mode@entry=-2147483648, stack_endp=0x7fe56480e0, stack_endp@entry=0x7fe5648180, 
    nsid=nsid@entry=0) at dl-load.c:998
#4  0x0000007f79017c60 in _dl_map_object (loader=0x559a6948e0, name=0x7f787721b7 "libnvmm_utils.so", type=2, trace_mode=0, mode=-2147483648, nsid=0) at dl-load.c:2460
#5  0x0000007f7901bce8 in openaux (a=0x7fe5648868) at dl-deps.c:63
#6  0x0000007f78bfd694 in __GI__dl_catch_exception (exception=0x7f787721b7, exception@entry=0x7f790225ac <_dl_close_worker+1324>, operate=0x7fe564849c, operate@entry=0x7f7901bcb0 <openaux>, args=0x7fe5648850, 
    args@entry=0x7fe5648ad0) at dl-error-skeleton.c:196
#7  0x0000007f7901c058 in _dl_map_object_deps (map=map@entry=0x559a6948e0, preloads=preloads@entry=0x0, npreloads=npreloads@entry=0, trace_mode=trace_mode@entry=0, open_mode=<optimized out>) at dl-deps.c:249
#8  0x0000007f790218c8 in dl_open_worker (a=0x7fe5648b58) at dl-open.c:278
#9  0x0000007f78bfd694 in __GI__dl_catch_exception (exception=0xfffffffffffffffe, exception@entry=0x7fe5648b40, operate=0x7fe564897c, operate@entry=0x7f790217d8 <dl_open_worker>, args=0x7fe5648b40, 
    args@entry=0x7fe5648b58) at dl-error-skeleton.c:196
#10 0x0000007f79021418 in _dl_open (file=0x559a694840 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvvidconv.so", mode=-2147483647, caller_dlopen=0x7f78e91a68, nsid=-2, argc=1, argv=0x7fe5649618, 
    env=<optimized out>) at dl-open.c:605
#11 0x0000007f789a4014 in dlopen_doit (a=0x7fe5648e18) at dlopen.c:66
#12 0x0000007f78bfd694 in __GI__dl_catch_exception (exception=0x7f7903d7a8 <__stack_chk_guard>, exception@entry=0x7fe5648db0, operate=0x7fe5648c0c, operate@entry=0x7f789a3fb0 <dlopen_doit>, args=0x7fe5648d90, 
    args@entry=0x7fe5648e18) at dl-error-skeleton.c:196
#13 0x0000007f78bfd738 in __GI__dl_catch_error (objname=objname@entry=0x559a647270, errstring=errstring@entry=0x559a647278, mallocedp=mallocedp@entry=0x559a647268, operate=operate@entry=0x7f789a3fb0 <dlopen_doit>, 
    args=args@entry=0x7fe5648e18) at dl-error-skeleton.c:215
#14 0x0000007f789a5780 in _dlerror_run (operate=operate@entry=0x7f789a3fb0 <dlopen_doit>, args=0x7fe5648e18, args@entry=0x7fe5648e28) at dlerror.c:162
--Type <RET> for more, q to quit, c to continue without paging--
#15 0x0000007f789a40e8 in __dlopen (file=<optimized out>, mode=<optimized out>) at dlopen.c:87
#16 0x0000007f78e91a68 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#17 0x0000007f78e8d6c0 in v4l2_fd_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#18 0x0000007f78e8de24 in v4l2_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#19 0x000000557cb8f808 in NvV4l2Element::NvV4l2Element(char const*, char const*, int, int) ()
#20 0x000000557cb99574 in NvVideoDecoder::NvVideoDecoder(char const*, int) ()
#21 0x000000557cb995c0 in NvVideoDecoder::createVideoDecoder(char const*, int) ()
#22 0x000000557cb8a818 in main ()

Hi Keylost,

We tried your sample code on r32.6.1/Xavier, but no errors.
How many times are you reproduce the errors?
Please check our steps is correct or not. Thanks!

$ ./NvFaultEx 
Parent: BEGIN
Child PID: 21550
Child: BEGIN
Child process terminated by signal 11
Parent: END
Child process terminated by signal 11

No errors? Signal 11 means SIGSEGV.
If the decoder was successfully created, then you would receive the message: Decoder created

You could increase sleep time in line 49 and use GDB to connect to child pid to get backtrace.

Hi,
Please try the steps:

  1. Apply the patch to 00_video_decode:
diff --git a/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp b/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
index 8bb14a9..fcda7dd 100644
--- a/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
+++ b/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
@@ -39,6 +39,7 @@
 #include <fcntl.h>
 #include <poll.h>
 #include <nvbuf_utils.h>
+#include <sys/stat.h>
 
 #include "video_decode.h"
 #include "nvbuf_utils.h"
@@ -992,6 +993,8 @@ dec_capture_loop_fcn(void *arg)
     NvVideoDecoder *dec = ctx->dec;
     struct v4l2_event ev;
     int ret;
+    NvBufferSession session;
+    session = NvBufferSessionCreate();
 
     cout << "Starting decoder capture loop thread" << endl;
     /* Need to wait for the first Resolution change event, so that
@@ -1152,6 +1155,7 @@ dec_capture_loop_fcn(void *arg)
                 transform_params.transform_filter = NvBufferTransform_Filter_Nearest;
                 transform_params.src_rect = src_rect;
                 transform_params.dst_rect = dest_rect;
+		transform_params.session = session;
 
                 if(ctx->capture_plane_mem_type == V4L2_MEMORY_DMABUF)
                     dec_buffer->planes[0].fd = ctx->dmabuff_fd[v4l2_buf.index];
@@ -1162,7 +1166,6 @@ dec_capture_loop_fcn(void *arg)
                     cerr << "Transform failed" << endl;
                     break;
                 }
-
                 /* Write raw video frame to file. */
                 if (!ctx->stats && ctx->out_file)
                 {
@@ -1220,6 +1223,7 @@ dec_capture_loop_fcn(void *arg)
         }
     }
 #endif
+    NvBufferSessionDestroy(session);
     cout << "Exiting decoder capture loop thread" << endl;
     return NULL;
 }
@@ -1802,6 +1806,7 @@ decode_proc(context_t& ctx, int argc, char *argv[])
     ctx.in_file = (std::ifstream **)malloc(sizeof(std::ifstream *)*ctx.file_count);
     for (uint32_t i = 0 ; i < ctx.file_count ; i++)
     {
+	cout << "---> opening " << ctx.in_file_path[i] << endl;
         ctx.in_file[i] = new ifstream(ctx.in_file_path[i]);
         TEST_ERROR(!ctx.in_file[i]->is_open(), "Error opening input file", cleanup);
     }
@@ -2212,7 +2217,7 @@ cleanup:
   * @param argv : Argument Vector
   */
 int
-main(int argc, char *argv[])
+main_1(int argc, char *argv[])
 {
     /* create decoder context. */
     context_t ctx;
@@ -2239,3 +2244,29 @@ main(int argc, char *argv[])
 
     return ret;
 }
+
+static int daemonize(void)
+{
+pid_t pid;
+int rc;
+
+// daemon initialzation
+if ( (pid = fork()) < 0 )
+	return -1;
+else if (pid != 0)
+	exit(0);		// parent goes bye-bye
+	
+// child continues
+setsid();			// become session leader
+rc = chdir("/");			// change working directory
+umask(0);			// clear our file mode creation mask
+return rc;
+}
+
+int
+main(int argc, char *argv[])
+{
+	daemonize();
+	main_1(argc,argv);
+	return 0;
+}
  1. Rebuild the app and run:
$ ./video_decode H264 -o /home/nvidia/a.yuv --disable-rendering /usr/src/jetson_multimedia_api/data/Video/sample_outdoor_car_1080p_10fps.h264
1 Like

It seems to be workingā€¦ Is something wrong with my sample? It doesnā€™t look like there is any significant difference before the createVideoDecoder() call and without fork() my sample works fineā€¦

My sample works when compiled with the Makefile from 00_video_decode sampleā€¦ After further investigation, I discovered that the nvjpeg library must be linked for some unknown reason to make my sample work. (ā•ÆĀ°ā–”Ā°ļ¼‰ā•Æļøµ ā”»ā”ā”»
The ā€œ-Wl, - no-as-neededā€ flag is required because symbols from this library are not used in the sample.

Workaround:

cmake_minimum_required(VERSION 3.8)
set(PROJECT NvFaultEx)
project(${PROJECT} DESCRIPTION "nvidia multimedia api segfault example")

find_library(LIB_NVBUF nvbuf_utils PATHS /usr/lib/aarch64-linux-gnu/tegra)
find_library(LIB_V4L2 nvv4l2 PATHS /usr/lib/aarch64-linux-gnu/tegra)
find_library(NVJPEG_LIBRARY nvjpeg PATHS /usr/lib/aarch64-linux-gnu/tegra)
find_package (Threads)

add_executable(${PROJECT} NvFaultEx.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvBuffer.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvElement.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvElementProfiler.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvLogging.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2Element.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2ElementPlane.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvVideoDecoder.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvVideoEncoder.cpp
)

target_link_libraries(${PROJECT} PRIVATE ${CMAKE_THREAD_LIBS_INIT} ${LIB_NVBUF}  ${LIB_V4L2} ${NVJPEG_LIBRARY})
set_target_properties(${PROJECT} PROPERTIES LINK_FLAGS "-Wl,--no-as-needed")
target_include_directories(${PROJECT} PRIVATE /usr/src/jetson_multimedia_api/include)
target_include_directories(${PROJECT} PRIVATE /usr/local/cuda/include)

One more important detail:
The nvjpeg library solves the segfault issue, but the NvBufferSession should also be created as DaneLL pointed out. Without it, you will get errors like this in the child process:

[WARN] (/usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2Element.cpp:119) <dec0> :Error while DQing event: Resource temporarily unavailable
[WARN] (/usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2Element.cpp:119) <dec0> :Error while DQing event: Resource temporarily unavailable
NvMapMemCacheMaint:1075334668 failed [14]