Error run 2 context parallel in TensorRT7

out error when run about 1 minute:

[F] [TRT] Assertion failed: *refCount > 0
../rtSafe/WeightsPtr.cpp:20
Aborting...

[E] [TRT] FAILED_EXECUTION: std::exception

*** Error in `./main.out': double free or corruption (!prev): 0x000000000d24ba00 ***
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x777e5)[0x7f4b72b737e5]
/lib/x86_64-linux-gnu/libc.so.6(+0x8037a)[0x7f4b72b7c37a]
/lib/x86_64-linux-gnu/libc.so.6(cfree+0x4c)[0x7f4b72b8053c]
/docker_environment/usr/lib/x86_64-linux-gnu/libnvinfer.so.7(_ZN8nvinfer110WeightsPtr7releaseEv+0x60)[0x7f4b7403e9e0]
/docker_environment/usr/lib/x86_64-linux-gnu/libnvinfer.so.7(_ZNK8nvinfer12rt4cuda22CudnnConvolutionRunner7executeERKNS0_13CommonContextERKNS0_19ExecutionParametersE+0x123c)[0x7f4b740767cc]
/docker_environment/usr/lib/x86_64-linux-gnu/libnvinfer.so.7(_ZN8nvinfer12rt16ExecutionContext15enqueueInternalEPP10CUevent_st+0x3f1)[0x7f4b73de51b1]
/docker_environment/usr/lib/x86_64-linux-gnu/libnvinfer.so.7(_ZN8nvinfer12rt16ExecutionContext9enqueueV2EPPvP11CUstream_stPP10CUevent_st+0x220)[0x7f4b73de82b0]
./main.out(_Z3runSt6vectorIPN8nvinfer117IExecutionContextESaIS2_EEiS_IS_IiSaIiEESaIS6_EE+0x483)[0x510b83]
./main.out(_ZNSt6thread5_ImplISt12_Bind_simpleIFPFvSt6vectorIPN8nvinfer117IExecutionContextESaIS5_EEiS2_IS2_IiSaIiEESaIS9_EEES7_iSB_EEE6_M_runEv+0x8c)[0x5110ec]
/docker_environment/usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xb8c80)[0x7f4b73194c80]
/lib/x86_64-linux-gnu/libpthread.so.0(+0x76ba)[0x7f4b81a886ba]
/lib/x86_64-linux-gnu/libc.so.6(clone+0x6d)[0x7f4b72c0341d]
======= Memory map: ========
00400000-006b8000 r-xp 00000000 08:03 836208                             /media/tu/T4_DATA2/workspace/Tu/git/sc-core/object_detection_trt/build/app/main.out
008b7000-008bc000 r--p 002b7000 08:03 836208                             /media/tu/T4_DATA2/workspace/Tu/git/sc-core/object_detection_trt/build/app/main.out
008bc000-008bd000 rw-p 002bc000 08:03 836208                             /media/tu/T4_DATA2/workspace/Tu/git/sc-core/object_detection_trt/build/app/main.out
008bd000-008c0000 rw-p 00000000 00:00 0 
00c8a000-32ffe000 rw-p 00000000 00:00 0                                  [heap]
200000000-200100000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200100000-200104000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200104000-200120000 ---p 00000000 00:00 0 
200120000-200520000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200520000-200620000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200620000-200624000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200624000-200640000 ---p 00000000 00:00 0 
200640000-200a40000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200a40000-200a44000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200a44000-200a60000 ---p 00000000 00:00 0 
200a60000-200e60000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200e60000-200e64000 rw-s 00000000 00:06 570                              /dev/nvidiactl
200e64000-200e80000 ---p 00000000 00:00 0 
200e80000-201280000 rw-s 00000000 00:06 570                              /dev/nvidiactl
201280000-201284000 rw-s 00000000 00:06 570                              /dev/nvidiactl
201284000-2012a0000 ---p 00000000 00:00 0 
2012a0000-2016a0000 rw-s 00000000 00:06 570                              /dev/nvidiactl
2016a0000-2016a4000 rw-s 00000000 00:06 570                              /dev/nvidiactl
2016a4000-2016c0000 ---p 00000000 00:00 0 
2016c0000-201ac0000 rw-s 00000000 00:06 570                              /dev/nvidiactl
201ac0000-201ac4000 rw-s 00000000 00:06 570                              /dev/nvidiactl
201ac4000-201ae0000 ---p 00000000 00:00 0 
201ae0000-201ee0000 rw-s 00000000 00:06 570                              /dev/nvidiactl
201ee0000-201ee4000 rw-s 00000000 00:06 570                              /dev/nvidiactl
201ee4000-201f00000 ---p 00000000 00:00 0 
201f00000-202300000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202300000-202304000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202304000-202320000 ---p 00000000 00:00 0 
202320000-202720000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202720000-202724000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202724000-202740000 ---p 00000000 00:00 0 
202740000-202b40000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202b40000-202b44000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202b44000-202b60000 ---p 00000000 00:00 0 
202b60000-202f60000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202f60000-202f64000 rw-s 00000000 00:06 570                              /dev/nvidiactl
202f64000-202f80000 ---p 00000000 00:00 0 
202f80000-203380000 rw-s 00000000 00:06 570                              /dev/nvidiactl
203380000-203384000 rw-s 00000000 00:06 570                              /dev/nvidiactl
203384000-2033a0000 ---p 00000000 00:00 0 
2033a0000-2037a0000 rw-s 00000000 00:06 570                              /dev/nvidiactl
2037a0000-2037a4000 rw-s 00000000 00:06 570                              /dev/nvidiactl
2037a4000-2037c0000 ---p 00000000 00:00 0 
2037c0000-203bc0000 rw-s 00000000 00:06 570                              /dev/nvidiactl
203bc0000-203bc4000 rw-s 00000000 00:06 570                              /dev/nvidiactl
203bc4000-203be0000 ---p 00000000 00:00 0 
203be0000-203fe0000 rw-s 00000000 00:06 570                              /dev/nvidiactl
203fe0000-203fe4000 rw-s 00000000 00:06 570                              /dev/nvidiactl
203fe4000-204000000 ---p 00000000 00:00 0 
204000000-204400000 rw-s 00000000 00:06 570                              /dev/nvidiactl
204400000-204500000 rw-s 00000000 00:05 8762806                          /dev/zero (deleted)
204500000-204600000 rw-s 00000000 00:06 570                              /dev/nvidiactl
204600000-204700000 rw-s 00000000 00:05 8762807                          /dev/zero (deleted)
204700000-204800000 rw-s 00000000 00:06 570                              /dev/nvidiactl
204800000-204900000 rw-s 00000000 00:06 570                              /dev/nvidiactl
204900000-2049e0000 rw-s 00000000 00:06 570                              /dev/nvidiactl
2049e0000-204ae0000 rw-s 00000000 00:05 8762809                          /dev/zero (deleted)
204ae0000-204be0000 rw-s 00000000 00:05 8761890                          /dev/zero (deleted)
204be0000-d00000000 ---p 00000000 00:00 0 
7f4b24000000-7f4b24029000 rw-p 00000000 00:00 0 
7f4b24029000-7f4b28000000 ---p 00000000 00:00 0 
7f4b28000000-7f4b2802b000 rw-p 00000000 00:00 0 
7f4b2802b000-7f4b2c000000 ---p 00000000 00:00 0 
7f4b2c000000-7f4b2c021000 rw-p 00000000 00:00 0 
7f4b2c021000-7f4b30000000 ---p 00000000 00:00 0 
7f4b30000000-7f4b30001000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30001000-7f4b30002000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30002000-7f4b30003000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30003000-7f4b30004000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30004000-7f4b30005000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30005000-7f4b30006000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30006000-7f4b30007000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30007000-7f4b30008000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30008000-7f4b30009000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30009000-7f4b3000a000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b3000a000-7f4b3000b000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b3000b000-7f4b3000c000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b3000c000-7f4b3000d000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b3000d000-7f4b3000e000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b3000e000-7f4b3000f000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b3000f000-7f4b30010000 rw-s 00000000 00:06 619                        /dev/nvidia1
7f4b30010000-7f4b40000000 ---p 00000000 00:00 0 
7f4b40000000-7f4b40021000 rw-p 00000000 00:00 0 
7f4b40021000-7f4b44000000 ---p 00000000 00:00 0 
7f4b444b8000-7f4b444b9000 ---p 00000000 00:00 0 
7f4b444b9000-7f4b44cb9000 rw-p 00000000 00:00 0 
7f4b44cb9000-7f4b44cba000 ---p 00000000 00:00 0 
7f4b44cba000-7f4b45d44000 rw-p 00000000 00:00 0 
7f4b45dcd000-7f4b45dce000 ---p 00000000 00:00 0 
7f4b45dce000-7f4b46d28000 rw-p 00000000 00:00 0 
7f4b47482000-7f4b47a41000 rw-p 00000000 00:00 0 
7f4b48000000-7f4b48021000 rw-p 00000000 00:00 0 
7f4b48021000-7f4b4c000000 ---p 00000000 00:00 0 
7f4b4c3ae000-7f4b4c6fe000 rw-p 00000000 00:00 0 
7f4b4ca4e000-7f4b4cc2a000 rw-p 00000000 00:00 0 
7f4b4ce06000-7f4b4cf33000 rw-p 00000000 00:00 0 
7f4b4d060000-7f4b4d061000 ---p 00000000 00:00 0 
7f4b4d061000-7f4b4d861000 rw-p 00000000 00:00 0 
7f4b4d861000-7f4b4d862000 ---p 00000000 00:00 0 
7f4b4d862000-7f4b4e062000 rw-p 00000000 00:00 0 
7f4b4e062000-7f4b4e063000 ---p 00000000 00:00 0 
7f4b4e063000-7f4b4e863000 rw-p 00000000 00:00 0 
7f4b4e863000-7f4b54863000 ---p 00000000 00:00 0 
7f4b54863000-7f4b548a0000 r-xp 00000000 08:12 70257023                   /usr/lib/nvidia-410/libnvidia-fatbinaryloader.so.410.78
7f4b548a0000-7f4b54aa0000 ---p 0003d000 08:12 70257023                   /usr/lib/nvidia-410/libnvidia-fatbinaryloader.so.410.78
7f4b54aa0000-7f4b54aab000 rw-p 0003d000 08:12 70257023                   /usr/lib/nvidia-410/libnvidia-fatbinaryloader.so.410.78
7f4b54aab000-7f4b54ab0000 rw-p 00000000 00:00 0 
7f4b54ab0000-7f4b55832000 r-xp 00000000 08:12 70268995                   /usr/lib/x86_64-linux-gnu/libcuda.so.410.78
7f4b55832000-7f4b55a32000 ---p 00d82000 08:12 70268995                   /usr/lib/x86_64-linux-gnu/libcuda.so.410.78
7f4b55a32000-7f4b55ba0000 rw-p 00d82000 08:12 70268995                   /usr/lib/x86_64-linux-gnu/libcuda.so.410.78
7f4b55ba0000-7f4b55bb0000 rw-p 00000000 00:00 0 
7f4b55bb0000-7f4b55bc9000 r-xp 00000000 08:12 29626917                   /lib/x86_64-linux-gnu/libz.so.1.2.8
7f4b55bc9000-7f4b55dc8000 ---p 00019000 08:12 29626917                   /lib/x86_64-linux-gnu/libz.so.1.2.8
7f4b55dc8000-7f4b55dc9000 r--p 00018000 08:12 29626917                   /lib/x86_64-linux-gnu/libz.so.1.2.8
7f4b55dc9000-7f4b55dca000 rw-p 00019000 08:12 29626917                   /lib/x86_64-linux-gnu/libz.so.1.2.8
7f4b55dca000-7f4b55dd1000 r-xp 00000000 08:12 29622289                   /lib/x86_64-linux-gnu/librt-2.23.so
7f4b55dd1000-7f4b55fd0000 ---p 00007000 08:12 29622289                   /lib/x86_64-linux-gnu/librt-2.23.so
7f4b55fd0000-7f4b55fd1000 r--p 00006000 08:12 29622289                   /lib/x86_64-linux-gnu/librt-2.23.so
7f4b55fd1000-7f4b55fd2000 rw-p 00007000 08:12 29622289                   /lib/x86_64-linux-gnu/librt-2.23.so
7f4b55fd2000-7f4b5652b000 r-xp 00000000 08:12 3810871                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7.0.0
7f4b5652b000-7f4b5672b000 ---p 00559000 08:12 3810871                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7.0.0
7f4b5672b000-7f4b5672f000 r--p 00559000 08:12 3810871                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7.0.0
7f4b5672f000-7f4b56731000 rw-p 0055d000 08:12 3810871                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7.0.0
7f4b56731000-7f4b56732000 rw-p 00000000 00:00 0 
7f4b56732000-7f4b5683a000 r-xp 00000000 08:12 29626940                   /lib/x86_64-linux-gnu/libm-2.23.so
7f4b5683a000-7f4b56a39000 ---p 00108000 08:12 29626940                   /lib/x86_64-linux-gnu/libm-2.23.so
7f4b56a39000-7f4b56a3a000 r--p 00107000 08:12 29626940                   /lib/x86_64-linux-gnu/libm-2.23.so
7f4b56a3a000-7f4b56a3b000 rw-p 00108000 08:12 29626940                   /lib/x86_64-linux-gnu/libm-2.23.so
7f4b56a3b000-7f4b56a3e000 r-xp 00000000 08:12 29622354                   /lib/x86_64-linux-gnu/libdl-2.23.so
7f4b56a3e000-7f4b56c3d000 ---p 00003000 08:12 29622354                   /lib/x86_64-linux-gnu/libdl-2.23.so
7f4b56c3d000-7f4b56c3e000 r--p 00002000 08:12 29622354                   /lib/x86_64-linux-gnu/libdl-2.23.so
7f4b56c3e000-7f4b56c3f000 rw-p 00003000 08:12 29622354                   /lib/x86_64-linux-gnu/libdl-2.23.so
7f4b56c3f000-7f4b57ded000 r-xp 00000000 08:12 3823958                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libnvrtc.so.10.0.130
7f4b57ded000-7f4b57fec000 ---p 011ae000 08:12 3823958                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libnvrtc.so.10.0.130
7f4b57fec000-7f4b581a3000 rw-p 011ad000 08:12 3823958                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libnvrtc.so.10.0.130
7f4b581a3000-7f4b5825b000 rw-p 00000000 00:00 0 
7f4b5825b000-7f4b587e4000 r-xp 00000000 08:12 3810852                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libmyelin.so.1.0.0
7f4b587e4000-7f4b589e3000 ---p 00589000 08:12 3810852                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libmyelin.so.1.0.0
7f4b589e3000-7f4b589f1000 r--p 00588000 08:12 3810852                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libmyelin.so.1.0.0
7f4b589f1000-7f4b58a54000 rw-p 00596000 08:12 3810852                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libmyelin.so.1.0.0
7f4b58a54000-7f4b58a6c000 rw-p 00000000 00:00 0 
7f4b58a6c000-7f4b5cdad000 r-xp 00000000 08:12 3823857                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcublas.so.10.0.130
7f4b5cdad000-7f4b5cfac000 ---p 04341000 08:12 3823857                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcublas.so.10.0.130
7f4b5cfac000-7f4b5cff0000 rw-p 04340000 08:12 3823857                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcublas.so.10.0.130
7f4b5cff0000-7f4b5d002000 rw-p 00000000 00:00 0 
7f4b5d002000-7f4b72300000 r-xp 00000000 08:12 3808187                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libcudnn.so.7.5.0
7f4b72300000-7f4b72500000 ---p 152fe000 08:12 3808187                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libcudnn.so.7.5.0
7f4b72500000-7f4b72587000 rw-p 152fe000 08:12 3808187                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libcudnn.so.7.5.0
7f4b72587000-7f4b7261b000 rw-p 00000000 00:00 0 
7f4b7261b000-7f4b728d9000 r-xp 00000000 08:12 3801124                    /docker_environment_trt7/usr/local/lib/libgrpc.so.7.0.0
7f4b728d9000-7f4b72ad8000 ---p 002be000 08:12 3801124                    /docker_environment_trt7/usr/local/lib/libgrpc.so.7.0.0
7f4b72ad8000-7f4b72aef000 r--p 002bd000 08:12 3801124                    /docker_environment_trt7/usr/local/lib/libgrpc.so.7.0.0
7f4b72aef000-7f4b72af6000 rw-p 002d4000 08:12 3801124                    /docker_environment_trt7/usr/local/lib/libgrpc.so.7.0.0
7f4b72af6000-7f4b72afc000 rw-p 00000000 00:00 0 
7f4b72afc000-7f4b72cbc000 r-xp 00000000 08:12 29622369                   /lib/x86_64-linux-gnu/libc-2.23.so
7f4b72cbc000-7f4b72ebc000 ---p 001c0000 08:12 29622369                   /lib/x86_64-linux-gnu/libc-2.23.so
7f4b72ebc000-7f4b72ec0000 r--p 001c0000 08:12 29622369                   /lib/x86_64-linux-gnu/libc-2.23.so
7f4b72ec0000-7f4b72ec2000 rw-p 001c4000 08:12 29622369                   /lib/x86_64-linux-gnu/libc-2.23.so
7f4b72ec2000-7f4b72ec6000 rw-p 00000000 00:00 0 
7f4b72ec6000-7f4b72edc000 r-xp 00000000 08:12 29626960                   /lib/x86_64-linux-gnu/libgcc_s.so.1
7f4b72edc000-7f4b730db000 ---p 00016000 08:12 29626960                   /lib/x86_64-linux-gnu/libgcc_s.so.1
7f4b730db000-7f4b730dc000 rw-p 00015000 08:12 29626960                   /lib/x86_64-linux-gnu/libgcc_s.so.1
7f4b730dc000-7f4b7324e000 r-xp 00000000 08:12 3808989                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.21
7f4b7324e000-7f4b7344e000 ---p 00172000 08:12 3808989                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.21
7f4b7344e000-7f4b73458000 r--p 00172000 08:12 3808989                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.21
7f4b73458000-7f4b7345a000 rw-p 0017c000 08:12 3808989                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.21
7f4b7345a000-7f4b7345e000 rw-p 00000000 00:00 0 
7f4b7345e000-7f4b734d3000 r-xp 00000000 08:12 3823862                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudart.so.10.0.130
7f4b734d3000-7f4b736d3000 ---p 00075000 08:12 3823862                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudart.so.10.0.130
7f4b736d3000-7f4b736d7000 rw-p 00075000 08:12 3823862                    /docker_environment_trt7/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudart.so.10.0.130
7f4b736d7000-7f4b736d8000 rw-p 00000000 00:00 0 
7f4b736d8000-7f4b73934000 r-xp 00000000 08:12 3810848                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvonnxparser.so.7.0.0
7f4b73934000-7f4b73b33000 ---p 0025c000 08:12 3810848                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvonnxparser.so.7.0.0
7f4b73b33000-7f4b73b3a000 r--p 0025b000 08:12 3810848                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvonnxparser.so.7.0.0
7f4b73b3a000-7f4b73b3b000 rw-p 00262000 08:12 3810848                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvonnxparser.so.7.0.0
7f4b73b3b000-7f4b814da000 r-xp 00000000 08:12 3810860                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer.so.7.0.0
7f4b814da000-7f4b816da000 ---p 0d99f000 08:12 3810860                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer.so.7.0.0
7f4b816da000-7f4b81730000 r--p 0d99f000 08:12 3810860                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer.so.7.0.0
7f4b81730000-7f4b8173c000 rw-p 0d9f5000 08:12 3810860                    /docker_environment_trt7/usr/lib/x86_64-linux-gnu/libnvinfer.so.7.0.0
7f4b8173c000-7f4b817ec000 rw-p 00000000 00:00 0 
7f4b817ec000-7f4b8187c000 r-xp 00000000 08:12 3801104                    /docker_environment_trt7/usr/local/lib/libgrpc++.so.1.19.1
7f4b8187c000-7f4b81a7b000 ---p 00090000 08:12 3801104                    /docker_environment_trt7/usr/local/lib/libgrpc++.so.1.19.1
7f4b81a7b000-7f4b81a7f000 r--p 0008f000 08:12 3801104                    /docker_environment_trt7/usr/local/lib/libgrpc++.so.1.19.1
7f4b81a7f000-7f4b81a80000 rw-p 00093000 08:12 3801104                    /docker_environment_trt7/usr/local/lib/libgrpc++.so.1.19.1
7f4b81a80000-7f4b81a81000 rw-p 00000000 00:00 0 
7f4b81a81000-7f4b81a99000 r-xp 00000000 08:12 29622356                   /lib/x86_64-linux-gnu/libpthread-2.23.so
7f4b81a99000-7f4b81c98000 ---p 00018000 08:12 29622356                   /lib/x86_64-linux-gnu/libpthread-2.23.so
7f4b81c98000-7f4b81c99000 r--p 00017000 08:12 29622356                   /lib/x86_64-linux-gnu/libpthread-2.23.so
7f4b81c99000-7f4b81c9a000 rw-p 00018000 08:12 29622356                   /lib/x86_64-linux-gnu/libpthread-2.23.so
7f4b81c9a000-7f4b81c9e000 rw-p 00000000 00:00 0 
7f4b81c9e000-7f4b81cc4000 r-xp 00000000 08:12 29622355                   /lib/x86_64-linux-gnu/ld-2.23.so
7f4b81de2000-7f4b81ea6000 rw-p 00000000 00:00 0 
7f4b81eb1000-7f4b81eb2000 rw-p 00000000 00:00 0 
7f4b81eb2000-7f4b81eb3000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eb3000-7f4b81eb4000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eb4000-7f4b81eb5000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eb5000-7f4b81eb6000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eb6000-7f4b81eb7000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eb7000-7f4b81eb8000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eb8000-7f4b81eb9000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eb9000-7f4b81eba000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81eba000-7f4b81ebb000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ebb000-7f4b81ebc000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ebc000-7f4b81ebd000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ebd000-7f4b81ebe000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ebe000-7f4b81ebf000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ebf000-7f4b81ec0000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ec0000-7f4b81ec1000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ec1000-7f4b81ec2000 rw-s 00000000 00:06 570                        /dev/nvidiactl
7f4b81ec2000-7f4b81ec3000 rw-p 00000000 00:00 0 
7f4b81ec3000-7f4b81ec4000 r--p 00025000 08:12 29622355                   /lib/x86_64-linux-gnu/ld-2.23.so
7f4b81ec4000-7f4b81ec5000 rw-p 00026000 08:12 29622355                   /lib/x86_64-linux-gnu/ld-2.23.so
7f4b81ec5000-7f4b81ec6000 rw-p 00000000 00:00 0 
7fff7a0fe000-7fff7a120000 rw-p 00000000 00:00 0                          [stack]
7fff7a197000-7fff7a19a000 r--p 00000000 00:00 0                          [vvar]
7fff7a19a000-7fff7a19c000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
Aborted (core dumped)

code:

#include "NvInfer.h"
#include <iostream>
#include "NvUtils.h"
#include "NvOnnxParser.h"
using namespace nvinfer1;
#include <thread>

#include "common/logger.h"
#include "common/buffers.h"
std::string model_path = "detection_model.onnx";

void convert_dims_to_vect(const nvinfer1::Dims& dims, std::vector<int>& v){
    v.resize(dims.nbDims);
    for (int i=0; i<dims.nbDims; ++i)
        v[i] = dims.d[i];
}
void make_explicit_shapes(IExecutionContext* context,const  std::vector<std::string>& tensorNames, std::vector<std::vector<int>>& explicit_shapes){
	int n = tensorNames.size();
	explicit_shapes.resize(n);
	std::string suffix;
	int profile_index = context->getOptimizationProfile();
	if (profile_index!=0)
		suffix = " [profile "+std::to_string(profile_index)+"]";
	std::vector<int> v;
	for (int i=0; i<n; ++i){
		int index = context->getEngine().getBindingIndex((tensorNames[i]+suffix).c_str());
		convert_dims_to_vect(context->getBindingDimensions(index), v);
		explicit_shapes[i] = v;
	}
}

std::string input_name = "fts_input_images:0";
Dims4 dims1(1,10,10,3);
Dims4 dims2(1,80,80,3);
Dims4 dims3(1,500,500,3);
void run(std::vector<IExecutionContext*> contexts, int i, std::vector<std::vector<int>> explicit_shapes){
    std::vector<samplesCommon::DeviceBuffer> deviceBuffers;
    std::vector<samplesCommon::HostBuffer> hostBuffers;
    for (int i=0; i<explicit_shapes.size(); ++i){
        size_t allocationSize = std::accumulate(explicit_shapes[i].begin(), explicit_shapes[i].end(), 1, std::multiplies<int>()) * 4;
        hostBuffers.emplace_back(allocationSize);
        // std::cout<<"allocationSize: "<<allocationSize<<"\n";
        deviceBuffers.emplace_back(allocationSize);
    }

    std::vector<void*> mDeviceBindings(i*deviceBuffers.size(), NULL);
    for (auto& buffer:(deviceBuffers)){
        // std::cout<<buffer.data()<<" buffer\n";
        mDeviceBindings.emplace_back(buffer.data());
    }
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));
    if (!contexts[i]->enqueueV2(mDeviceBindings.data(), stream, nullptr)){
        std::cout<<"error when run graph TensorRT\n";
    }
    cudaStreamSynchronize(stream);
    cudaStreamDestroy(stream);
}

int main(int argc, char** argv) {
  auto builder = createInferBuilder(gLogger);

  auto config = builder->createBuilderConfig();
  for (int i=0; i<2; ++i){
    auto profile = builder->createOptimizationProfile();
    profile->setDimensions(input_name.c_str(), OptProfileSelector::kMIN, dims1);
    profile->setDimensions(input_name.c_str(), OptProfileSelector::kOPT, dims2);
    profile->setDimensions(input_name.c_str(), OptProfileSelector::kMAX, dims3);
    config->addOptimizationProfile(profile);
  }

  auto network = builder->createNetworkV2(1U << static_cast<int>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
  auto parser = nvonnxparser::createParser(*network, gLogger);
  parser->parseFromFile(model_path.c_str(), 3);
  auto engine = builder->buildEngineWithConfig(*network,*config);

    std::vector<std::string> tensorNames;
    for (int i=0; i<engine->getNbBindings(); ++i){
        std::string name(engine->getBindingName(i));
        if (name.find("[profile")==-1){
            tensorNames.emplace_back(name);
        }
    }

  std::vector<IExecutionContext*> contexts;
  std::vector<std::vector<int>> explicit_shapes;
  for (int i=0; i<2; ++i){
    contexts.emplace_back(engine->createExecutionContext());
    auto context = contexts.back();
    context->setOptimizationProfile(i);
    std::cout<<"allInputDimensionsSpecified: "<<context->allInputDimensionsSpecified()<<"\n";
    int index;
    if (i==0)
        index = engine->getBindingIndex((input_name).c_str());
    else
        index = engine->getBindingIndex((input_name+" [profile "+std::to_string(i)+"]").c_str());
    context->setBindingDimensions(index, dims2);
    std::cout<<"allInputDimensionsSpecified must equal 1: "<<context->allInputDimensionsSpecified()<<"\n";

    explicit_shapes.clear();
    make_explicit_shapes(context, tensorNames, explicit_shapes);

    std::vector<samplesCommon::DeviceBuffer> deviceBuffers;
    std::vector<samplesCommon::HostBuffer> hostBuffers;
    for (int i=0; i<tensorNames.size(); ++i){
        size_t allocationSize = std::accumulate(explicit_shapes[i].begin(), explicit_shapes[i].end(), 1, std::multiplies<int>()) * 4;
        hostBuffers.emplace_back(allocationSize);
        // std::cout<<"allocationSize: "<<allocationSize<<"\n";
        deviceBuffers.emplace_back(allocationSize);
    }

    std::vector<void*> mDeviceBindings(i*deviceBuffers.size(), NULL);
    for (auto& buffer:(deviceBuffers)){
        std::cout<<buffer.data()<<" buffer\n";
        mDeviceBindings.emplace_back(buffer.data());
    }
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));
    if (!context->enqueueV2(mDeviceBindings.data(), stream, nullptr)){
        std::cout<<"error when run graph TensorRT\n";
    }
    cudaStreamSynchronize(stream);
  }

for (;;){
std::vector<std::thread> v_thread;
for (int i=0;i<2;++i){
v_thread.emplace_back(run, contexts, i, explicit_shapes);
std::cout<<i<<"\n";
}
for (auto p=v_thread.begin(); p!=v_thread.end(); p++)
p->join();
}

}

model: https://1drv.ms/u/s!AhFk3ICqlZI2irl-5pxC5LawRSixew?e=XfVdJb
I use cuda 10 + tensorrt7 + GTX960 + ubuntu

Hi,

If you need to run multiple builds simultaneously, you will need to create multiple builders.
The TensorRT runtime can be used by multiple threads simultaneously, so long as each object uses a different execution context.

Please refer below link:
https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/tensorrt-700/tensorrt-best-practices/index.html#thread-safety

Thanks

Hi @SunilJB
I run context parallel, not build parallel. You don’t read my sample code?

Hi,

Sorry for late reply.
size_t allocationSize = std::accumulate(explicit_shapes[i].begin(), explicit_shapes[i].end(), 1, std::multiplies()) * 4;
This line assumes that the input data type is INT8, but there may be one binding whose DataType is FP16.

Can you try to replace size_t allocationSize = std::accumulate(explicit_shapes[i].begin(), explicit_shapes[i].end(), 1, std::multiplies()) * 4;
with size_t allocationSize = std::accumulate(explicit_shapes[i].begin(), explicit_shapes[i].end(), 1, std::multiplies()) * elementSize(engine->getBindingDataType(i)); if the tensor layout is LINEAR?

Thanks

All “samplesCommon::getElementSize(engine->getBindingDataType(i))” of my model is 4 and 4 is FP32. You can print out to test. It is important, if run not parallel -> no error ???

Hi,

Our engg team is looking into the issue, will get back to you once we have any updates.

Meanwhile, it seems issue might also be due to memory leak, could you please try valgrind/cuda-memcheck: https://docs.nvidia.com/cuda/cuda-memcheck/index.html to try to find the leak in this case.

Thanks

To chime in on this, seeing a similar memory corruption when running multiple execution contexts in multiple threads. Placing a global mutex around the calls to enqueue() seems to fix the issue – but according to the docs individual execution context objects should be thread safe? Same code worked flawlessly in 6.0.1.5.

2 Likes

Thanks @alexm5m91 , your solution worked. But Nvidia must fix this issue

1 Like

@SunilJB
Any progress?
I also have this “double free or corruption (!prev): 0x000000003d2f4b80 ***” issue. Now I’m using mutex like @alexm5m91 said as a workaround.

But this issue should have an official fix.

By the way. the core dump is:

#0  0x00007fd45b420428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
#1  0x00007fd45b42202a in __GI_abort () at abort.c:89
#2  0x00007fd45b4627ea in __libc_message (do_abort=do_abort@entry=2, fmt=fmt@entry=0x7fd45b57bed8 "*** Error in `%s': %s: 0x%s ***\n") at ../sysdeps/posix/libc_fatal.c:175
#3  0x00007fd45b46b37a in malloc_printerr (ar_ptr=<optimized out>, ptr=<optimized out>, str=0x7fd45b57c008 "double free or corruption (!prev)", action=3) at malloc.c:5006
#4  _int_free (av=<optimized out>, p=<optimized out>, have_lock=0) at malloc.c:3867
#5  0x00007fd45b46f53c in __GI___libc_free (mem=<optimized out>) at malloc.c:2968
#6  0x00007fd431bfcb29 in ?? () from /usr/local/nvidia/lib64/libcuda.so.1
#7  0x00007fd431bfcc29 in ?? () from /usr/local/nvidia/lib64/libcuda.so.1
#8  0x00007fd431a31350 in ?? () from /usr/local/nvidia/lib64/libcuda.so.1
#9  0x00007fd431b6b9c2 in cuMemFree_v2 () from /usr/local/nvidia/lib64/libcuda.so.1
#10 0x00007fd4370ad950 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.0
#11 0x00007fd4370e2874 in cudaFree () from /usr/local/cuda/lib64/libcudart.so.9.0
#12 0x00007fd45144507e in ?? () from /ljay/workspace/tools/nvidia/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib/libnvinfer.so.7
#13 0x00007fd451445574 in ?? () from /ljay/workspace/tools/nvidia/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib/libnvinfer.so.7
#14 0x00007fd450bdf990 in nvinfer1::rt::cuda::WinogradConvActRunner::updateConvolution(dit::Convolution*, nvinfer1::rt::CommonContext const&, signed char const*, nvinfer1::utils::TensorLayout const&, nvinfer1::utils::TensorLayout const&) const () from /ljay/workspace/tools/nvidia/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib/libnvinfer.so.7
#15 0x00007fd450bdfc1d in nvinfer1::rt::cuda::WinogradConvActRunner::recomputeResources(nvinfer1::rt::CommonContext const&, nvinfer1::rt::ExecutionParameters*) ()
   from /ljay/workspace/tools/nvidia/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib/libnvinfer.so.7
#16 0x00007fd450b5f55c in nvinfer1::rt::ExecutionContext::recomputeResources(std::vector<std::unique_ptr<nvinfer1::rt::Runner, std::default_delete<nvinfer1::rt::Runner> >, std::allocator<std::unique_ptr<nvinfer1::rt::Runner, std::default_delete<nvinfer1::rt::Runner> > > > const&, int, int) () from /ljay/workspace/tools/nvidia/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib/libnvinfer.so.7
#17 0x00007fd450b60eb8 in nvinfer1::rt::ExecutionContext::enqueueInternal(CUevent_st**) () from /ljay/workspace/tools/nvidia/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib/libnvinfer.so.7
#18 0x00007fd450b63a10 in nvinfer1::rt::ExecutionContext::enqueue(int, void**, CUstream_st*, CUevent_st**) ()
   from /ljay/workspace/tools/nvidia/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib/libnvinfer.so.7

At the same place “nvinfer1::rt::cuda::WinogradConvActRunner::updateConvolution”, I found memory leak. After some iterations, OOM occured. details here: memory leak in TensorRT 6?

runtime environment:
GPU: Tesla P4
Ubuntu16.04
CUDA version: 9.0
CUDNN version: 7.5.1
TensorRT version: 7.0.0.11