A segfault occurs when creating NvVideoDecoder inside a child process

Hi :)
I am getting segmentation fault when I run a decoder inside a child process.
This only happens when I run the decoder in the child process. Without fork() the application works as expected.
Jetson AGX Xavier. L4T 32.5.1-20210519111140
Small example to reproduce the failure: NvFaultEx.zip (1.2 KB)

Backtrace:

Program received signal SIGSEGV, Segmentation fault.
tcache_get (tc_idx=2) at malloc.c:2952
2952	malloc.c: No such file or directory.
(gdb) bt
#0  tcache_get (tc_idx=2) at malloc.c:2952
#1  __GI___libc_malloc (bytes=bytes@entry=50) at malloc.c:3060
#2  0x0000007f84666584 in _dl_new_object (realname=0x5582330900 "/usr/lib/aarch64-linux-gnu/tegra/libnvmm_utils.so", realname@entry=0x7f83dbec60 "", libname=0x558234f7c0 "", 
    libname@entry=0x7f84667cb8 <openaux> "\375{\276\251\375\003", type=type@entry=0, loader=<optimized out>, loader@entry=0x558234f7c0, mode=mode@entry=-2147483648, nsid=nsid@entry=0) at dl-object.c:163
#3  0x0000007f846613bc in _dl_map_object_from_fd (name=0x7f84667cb8 <openaux> "\375{\276\251\375\003", name@entry=0x7f83dbf1b7 "libnvmm_utils.so", origname=origname@entry=0x0, fd=<optimized out>, 
    fbp=fbp@entry=0x7fe6303a68, realname=0x7f83dbec60 "", loader=loader@entry=0x558234f7c0, l_type=0, l_type@entry=2, mode=mode@entry=-2147483648, stack_endp=0x7fe6303a60, stack_endp@entry=0x7fe6303b00, 
    nsid=nsid@entry=0) at dl-load.c:998
#4  0x0000007f84663c68 in _dl_map_object (loader=0x558234f7c0, name=0x7f83dbf1b7 "libnvmm_utils.so", type=2, trace_mode=0, mode=-2147483648, nsid=0) at dl-load.c:2461
#5  0x0000007f84667cf0 in openaux (a=0x7fe63041e8) at dl-deps.c:63
#6  0x0000007f8424a694 in __GI__dl_catch_exception (exception=0x7f83dbf1b7, exception@entry=0x7f8466e5b4 <_dl_close_worker+1324>, operate=0x7fe6303e1c, operate@entry=0x7f84667cb8 <openaux>, args=0x7fe63041d0, 
    args@entry=0x7fe6304450) at dl-error-skeleton.c:196
#7  0x0000007f84668060 in _dl_map_object_deps (map=map@entry=0x558234f7c0, preloads=preloads@entry=0x0, npreloads=npreloads@entry=0, trace_mode=trace_mode@entry=0, open_mode=<optimized out>) at dl-deps.c:249
#8  0x0000007f8466d8d0 in dl_open_worker (a=0x7fe63044d8) at dl-open.c:278
#9  0x0000007f8424a694 in __GI__dl_catch_exception (exception=0xfffffffffffffffe, exception@entry=0x7fe63044c0, operate=0x7fe63042fc, operate@entry=0x7f8466d7e0 <dl_open_worker>, args=0x7fe63044c0, 
    args@entry=0x7fe63044d8) at dl-error-skeleton.c:196
#10 0x0000007f8466d420 in _dl_open (file=0x558234f720 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvvidconv.so", mode=-2147483647, caller_dlopen=0x7f844dea68, nsid=-2, argc=1, argv=0x7fe6304f98, 
    env=<optimized out>) at dl-open.c:605
#11 0x0000007f83ff1014 in dlopen_doit (a=0x7fe6304798) at dlopen.c:66
#12 0x0000007f8424a694 in __GI__dl_catch_exception (exception=0x7f846897a8 <__stack_chk_guard>, exception@entry=0x7fe6304730, operate=0x7fe630458c, operate@entry=0x7f83ff0fb0 <dlopen_doit>, args=0x7fe6304710, 
    args@entry=0x7fe6304798) at dl-error-skeleton.c:196
#13 0x0000007f8424a738 in __GI__dl_catch_error (objname=objname@entry=0x5582303270, errstring=errstring@entry=0x5582303278, mallocedp=mallocedp@entry=0x5582303268, operate=operate@entry=0x7f83ff0fb0 <dlopen_doit>, 
    args=args@entry=0x7fe6304798) at dl-error-skeleton.c:215
#14 0x0000007f83ff2780 in _dlerror_run (operate=operate@entry=0x7f83ff0fb0 <dlopen_doit>, args=0x7fe6304798, args@entry=0x7fe63047a8) at dlerror.c:162
--Type <RET> for more, q to quit, c to continue without paging--c
#15 0x0000007f83ff10e8 in __dlopen (file=<optimized out>, mode=<optimized out>) at dlopen.c:87
#16 0x0000007f844dea68 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#17 0x0000007f844da6c0 in v4l2_fd_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#18 0x0000007f844dae24 in v4l2_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#19 0x000000556b3c870c in NvV4l2Element::NvV4l2Element(char const*, char const*, int, int) ()
#20 0x000000556b3d3478 in NvVideoDecoder::NvVideoDecoder(char const*, int) ()
#21 0x000000556b3d34c4 in NvVideoDecoder::createVideoDecoder(char const*, int) ()
#22 0x000000556b3c3818 in main ()

P.S. Note: libv4l2_nvargus.so was moved to avoid malloc-delete mismatch error as suggested here Memory Leak (Alloc/free mismatch) in Tegra multimedia API (encoder) - #6 by DaneLLL

Hi,
The latest release is Jetpack 4.6(r32.6.1). Are you able to upgrade to this release and try?

It looks like nothing has changed after upgrading to version 32.6.1-20210726122859 :(

Backtrace with libv4l2_nvargus.so:

Program received signal SIGSEGV, Segmentation fault.
malloc_consolidate (av=av@entry=0x7fa7e3fa70 <main_arena>) at malloc.c:4469
4469	malloc.c: No such file or directory.
(gdb) bt
#0  malloc_consolidate (av=av@entry=0x7fa7e3fa70 <main_arena>) at malloc.c:4469
#1  0x0000007fa7d61a70 in _int_malloc (av=av@entry=0x7fa7e3fa70 <main_arena>, bytes=bytes@entry=1234) at malloc.c:3713
#2  0x0000007fa7d63ee0 in __libc_calloc (n=<optimized out>, elem_size=<optimized out>) at malloc.c:3446
#3  0x0000007fa8214334 in _dl_new_object (realname=0x55801832f0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", realname@entry=0x7fa82391b0 "", libname=0x0, 
    libname@entry=0x1 <error: Cannot access memory at address 0x1>, type=127, type@entry=-2147483647, loader=loader@entry=0x0, mode=mode@entry=-1879048191, nsid=nsid@entry=0) at dl-object.c:73
#4  0x0000007fa820f3bc in _dl_map_object_from_fd (name=0x1 <error: Cannot access memory at address 0x1>, 
    name@entry=0x55801836b0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", origname=0x7fd6a9c1e8 "\260\066\030\200U", 
    origname@entry=0x0, fd=<optimized out>, fbp=fbp@entry=0x7fd6a9bbd8, realname=0x7fa82391b0 "", loader=loader@entry=0x0, l_type=-2147483647, l_type@entry=2, mode=mode@entry=-1879048191, stack_endp=0x7fd6a9bbd0, 
    stack_endp@entry=0x7fd6a9bc70, nsid=nsid@entry=0) at dl-load.c:998
#5  0x0000007fa8211c60 in _dl_map_object (loader=0x0, 
    loader@entry=0x7fa8233a20, name=name@entry=0x55801836b0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", type=type@entry=2, trace_mode=trace_mode@entry=0, mode=mode@entry=-1879048191, nsid=0) at dl-load.c:2460
#6  0x0000007fa821b880 in dl_open_worker (a=0x7fd6a9c1e8) at dl-open.c:235
#7  0x0000007fa7df7694 in __GI__dl_catch_exception (exception=0xfffffffffffffffe, exception@entry=0x7fd6a9c1d0, operate=0x7fd6a9c00c, operate@entry=0x7fa821b7d8 <dl_open_worker>, args=0x7fd6a9c1d0, 
    args@entry=0x7fd6a9c1e8) at dl-error-skeleton.c:196
#8  0x0000007fa821b418 in _dl_open
    (file=0x55801836b0 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvcuvidvideocodec.so", mode=-2147483647, caller_dlopen=0x7fa808ba68, nsid=-2, argc=1, argv=0x7fd6a9cca8, env=<optimized out>)
    at dl-open.c:605
#9  0x0000007fa7b9e014 in dlopen_doit (a=0x7fd6a9c4a8) at dlopen.c:66
#10 0x0000007fa7df7694 in __GI__dl_catch_exception (exception=0x7fa82377a8 <__stack_chk_guard>, exception@entry=0x7fd6a9c440, operate=0x7fd6a9c29c, operate@entry=0x7fa7b9dfb0 <dlopen_doit>, args=0x7fd6a9c420, 
    args@entry=0x7fd6a9c4a8) at dl-error-skeleton.c:196
#11 0x0000007fa7df7738 in __GI__dl_catch_error
    (objname=objname@entry=0x5580143270, errstring=errstring@entry=0x5580143278, mallocedp=mallocedp@entry=0x5580143268, operate=operate@entry=0x7fa7b9dfb0 <dlopen_doit>, args=args@entry=0x7fd6a9c4a8)
--Type <RET> for more, q to quit, c to continue without paging--c
    at dl-error-skeleton.c:215
#12 0x0000007fa7b9f780 in _dlerror_run (operate=operate@entry=0x7fa7b9dfb0 <dlopen_doit>, args=0x7fd6a9c4a8, args@entry=0x7fd6a9c4b8) at dlerror.c:162
#13 0x0000007fa7b9e0e8 in __dlopen (file=<optimized out>, mode=<optimized out>) at dlopen.c:87
#14 0x0000007fa808ba68 in  () at /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#15 0x0000007fa80876c0 in v4l2_fd_open () at /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#16 0x0000007fa8087e24 in v4l2_open () at /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#17 0x000000557ae08808 in NvV4l2Element::NvV4l2Element(char const*, char const*, int, int) ()
#18 0x000000557ae12574 in NvVideoDecoder::NvVideoDecoder(char const*, int) ()
#19 0x000000557ae125c0 in NvVideoDecoder::createVideoDecoder(char const*, int) ()
#20 0x000000557ae03818 in main ()

Backtrace without libv4l2_nvargus.so:

Program received signal SIGSEGV, Segmentation fault.
tcache_get (tc_idx=2) at malloc.c:2952
2952	malloc.c: No such file or directory.
(gdb) bt
#0  tcache_get (tc_idx=2) at malloc.c:2952
#1  __GI___libc_malloc (bytes=bytes@entry=50) at malloc.c:3060
#2  0x0000007f7901a57c in _dl_new_object (realname=0x559a674a10 "/usr/lib/aarch64-linux-gnu/tegra/libnvmm_utils.so", realname@entry=0x7f78771c60 "", libname=0x559a6948e0 "", 
    libname@entry=0x7f7901bcb0 <openaux> "\375{\276\251\375\003", type=type@entry=0, loader=<optimized out>, loader@entry=0x559a6948e0, mode=mode@entry=-2147483648, nsid=nsid@entry=0) at dl-object.c:163
#3  0x0000007f790153bc in _dl_map_object_from_fd (name=0x7f7901bcb0 <openaux> "\375{\276\251\375\003", name@entry=0x7f787721b7 "libnvmm_utils.so", origname=origname@entry=0x0, fd=<optimized out>, 
    fbp=fbp@entry=0x7fe56480e8, realname=0x7f78771c60 "", loader=loader@entry=0x559a6948e0, l_type=0, l_type@entry=2, mode=mode@entry=-2147483648, stack_endp=0x7fe56480e0, stack_endp@entry=0x7fe5648180, 
    nsid=nsid@entry=0) at dl-load.c:998
#4  0x0000007f79017c60 in _dl_map_object (loader=0x559a6948e0, name=0x7f787721b7 "libnvmm_utils.so", type=2, trace_mode=0, mode=-2147483648, nsid=0) at dl-load.c:2460
#5  0x0000007f7901bce8 in openaux (a=0x7fe5648868) at dl-deps.c:63
#6  0x0000007f78bfd694 in __GI__dl_catch_exception (exception=0x7f787721b7, exception@entry=0x7f790225ac <_dl_close_worker+1324>, operate=0x7fe564849c, operate@entry=0x7f7901bcb0 <openaux>, args=0x7fe5648850, 
    args@entry=0x7fe5648ad0) at dl-error-skeleton.c:196
#7  0x0000007f7901c058 in _dl_map_object_deps (map=map@entry=0x559a6948e0, preloads=preloads@entry=0x0, npreloads=npreloads@entry=0, trace_mode=trace_mode@entry=0, open_mode=<optimized out>) at dl-deps.c:249
#8  0x0000007f790218c8 in dl_open_worker (a=0x7fe5648b58) at dl-open.c:278
#9  0x0000007f78bfd694 in __GI__dl_catch_exception (exception=0xfffffffffffffffe, exception@entry=0x7fe5648b40, operate=0x7fe564897c, operate@entry=0x7f790217d8 <dl_open_worker>, args=0x7fe5648b40, 
    args@entry=0x7fe5648b58) at dl-error-skeleton.c:196
#10 0x0000007f79021418 in _dl_open (file=0x559a694840 "/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvvidconv.so", mode=-2147483647, caller_dlopen=0x7f78e91a68, nsid=-2, argc=1, argv=0x7fe5649618, 
    env=<optimized out>) at dl-open.c:605
#11 0x0000007f789a4014 in dlopen_doit (a=0x7fe5648e18) at dlopen.c:66
#12 0x0000007f78bfd694 in __GI__dl_catch_exception (exception=0x7f7903d7a8 <__stack_chk_guard>, exception@entry=0x7fe5648db0, operate=0x7fe5648c0c, operate@entry=0x7f789a3fb0 <dlopen_doit>, args=0x7fe5648d90, 
    args@entry=0x7fe5648e18) at dl-error-skeleton.c:196
#13 0x0000007f78bfd738 in __GI__dl_catch_error (objname=objname@entry=0x559a647270, errstring=errstring@entry=0x559a647278, mallocedp=mallocedp@entry=0x559a647268, operate=operate@entry=0x7f789a3fb0 <dlopen_doit>, 
    args=args@entry=0x7fe5648e18) at dl-error-skeleton.c:215
#14 0x0000007f789a5780 in _dlerror_run (operate=operate@entry=0x7f789a3fb0 <dlopen_doit>, args=0x7fe5648e18, args@entry=0x7fe5648e28) at dlerror.c:162
--Type <RET> for more, q to quit, c to continue without paging--
#15 0x0000007f789a40e8 in __dlopen (file=<optimized out>, mode=<optimized out>) at dlopen.c:87
#16 0x0000007f78e91a68 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#17 0x0000007f78e8d6c0 in v4l2_fd_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#18 0x0000007f78e8de24 in v4l2_open () from /usr/lib/aarch64-linux-gnu/tegra/libv4l2.so.0
#19 0x000000557cb8f808 in NvV4l2Element::NvV4l2Element(char const*, char const*, int, int) ()
#20 0x000000557cb99574 in NvVideoDecoder::NvVideoDecoder(char const*, int) ()
#21 0x000000557cb995c0 in NvVideoDecoder::createVideoDecoder(char const*, int) ()
#22 0x000000557cb8a818 in main ()

Hi Keylost,

We tried your sample code on r32.6.1/Xavier, but no errors.
How many times are you reproduce the errors?
Please check our steps is correct or not. Thanks!

$ ./NvFaultEx 
Parent: BEGIN
Child PID: 21550
Child: BEGIN
Child process terminated by signal 11
Parent: END
Child process terminated by signal 11

No errors? Signal 11 means SIGSEGV.
If the decoder was successfully created, then you would receive the message: Decoder created

You could increase sleep time in line 49 and use GDB to connect to child pid to get backtrace.

Hi,
Please try the steps:

  1. Apply the patch to 00_video_decode:
diff --git a/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp b/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
index 8bb14a9..fcda7dd 100644
--- a/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
+++ b/multimedia_api/ll_samples/samples/00_video_decode/video_decode_main.cpp
@@ -39,6 +39,7 @@
 #include <fcntl.h>
 #include <poll.h>
 #include <nvbuf_utils.h>
+#include <sys/stat.h>
 
 #include "video_decode.h"
 #include "nvbuf_utils.h"
@@ -992,6 +993,8 @@ dec_capture_loop_fcn(void *arg)
     NvVideoDecoder *dec = ctx->dec;
     struct v4l2_event ev;
     int ret;
+    NvBufferSession session;
+    session = NvBufferSessionCreate();
 
     cout << "Starting decoder capture loop thread" << endl;
     /* Need to wait for the first Resolution change event, so that
@@ -1152,6 +1155,7 @@ dec_capture_loop_fcn(void *arg)
                 transform_params.transform_filter = NvBufferTransform_Filter_Nearest;
                 transform_params.src_rect = src_rect;
                 transform_params.dst_rect = dest_rect;
+		transform_params.session = session;
 
                 if(ctx->capture_plane_mem_type == V4L2_MEMORY_DMABUF)
                     dec_buffer->planes[0].fd = ctx->dmabuff_fd[v4l2_buf.index];
@@ -1162,7 +1166,6 @@ dec_capture_loop_fcn(void *arg)
                     cerr << "Transform failed" << endl;
                     break;
                 }
-
                 /* Write raw video frame to file. */
                 if (!ctx->stats && ctx->out_file)
                 {
@@ -1220,6 +1223,7 @@ dec_capture_loop_fcn(void *arg)
         }
     }
 #endif
+    NvBufferSessionDestroy(session);
     cout << "Exiting decoder capture loop thread" << endl;
     return NULL;
 }
@@ -1802,6 +1806,7 @@ decode_proc(context_t& ctx, int argc, char *argv[])
     ctx.in_file = (std::ifstream **)malloc(sizeof(std::ifstream *)*ctx.file_count);
     for (uint32_t i = 0 ; i < ctx.file_count ; i++)
     {
+	cout << "---> opening " << ctx.in_file_path[i] << endl;
         ctx.in_file[i] = new ifstream(ctx.in_file_path[i]);
         TEST_ERROR(!ctx.in_file[i]->is_open(), "Error opening input file", cleanup);
     }
@@ -2212,7 +2217,7 @@ cleanup:
   * @param argv : Argument Vector
   */
 int
-main(int argc, char *argv[])
+main_1(int argc, char *argv[])
 {
     /* create decoder context. */
     context_t ctx;
@@ -2239,3 +2244,29 @@ main(int argc, char *argv[])
 
     return ret;
 }
+
+static int daemonize(void)
+{
+pid_t pid;
+int rc;
+
+// daemon initialzation
+if ( (pid = fork()) < 0 )
+	return -1;
+else if (pid != 0)
+	exit(0);		// parent goes bye-bye
+	
+// child continues
+setsid();			// become session leader
+rc = chdir("/");			// change working directory
+umask(0);			// clear our file mode creation mask
+return rc;
+}
+
+int
+main(int argc, char *argv[])
+{
+	daemonize();
+	main_1(argc,argv);
+	return 0;
+}
  1. Rebuild the app and run:
$ ./video_decode H264 -o /home/nvidia/a.yuv --disable-rendering /usr/src/jetson_multimedia_api/data/Video/sample_outdoor_car_1080p_10fps.h264
1 Like

It seems to be working… Is something wrong with my sample? It doesn’t look like there is any significant difference before the createVideoDecoder() call and without fork() my sample works fine…

My sample works when compiled with the Makefile from 00_video_decode sample… After further investigation, I discovered that the nvjpeg library must be linked for some unknown reason to make my sample work. (╯°▔°)╯︵ ┻━┻
The ā€œ-Wl, - no-as-neededā€ flag is required because symbols from this library are not used in the sample.

Workaround:

cmake_minimum_required(VERSION 3.8)
set(PROJECT NvFaultEx)
project(${PROJECT} DESCRIPTION "nvidia multimedia api segfault example")

find_library(LIB_NVBUF nvbuf_utils PATHS /usr/lib/aarch64-linux-gnu/tegra)
find_library(LIB_V4L2 nvv4l2 PATHS /usr/lib/aarch64-linux-gnu/tegra)
find_library(NVJPEG_LIBRARY nvjpeg PATHS /usr/lib/aarch64-linux-gnu/tegra)
find_package (Threads)

add_executable(${PROJECT} NvFaultEx.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvBuffer.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvElement.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvElementProfiler.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvLogging.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2Element.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2ElementPlane.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvVideoDecoder.cpp
    /usr/src/jetson_multimedia_api/samples/common/classes/NvVideoEncoder.cpp
)

target_link_libraries(${PROJECT} PRIVATE ${CMAKE_THREAD_LIBS_INIT} ${LIB_NVBUF}  ${LIB_V4L2} ${NVJPEG_LIBRARY})
set_target_properties(${PROJECT} PROPERTIES LINK_FLAGS "-Wl,--no-as-needed")
target_include_directories(${PROJECT} PRIVATE /usr/src/jetson_multimedia_api/include)
target_include_directories(${PROJECT} PRIVATE /usr/local/cuda/include)

One more important detail:
The nvjpeg library solves the segfault issue, but the NvBufferSession should also be created as DaneLL pointed out. Without it, you will get errors like this in the child process:

[WARN] (/usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2Element.cpp:119) <dec0> :Error while DQing event: Resource temporarily unavailable
[WARN] (/usr/src/jetson_multimedia_api/samples/common/classes/NvV4l2Element.cpp:119) <dec0> :Error while DQing event: Resource temporarily unavailable
NvMapMemCacheMaint:1075334668 failed [14]