VPI vpiStreamSync () blocked when stereo rectify used vpi remap

We use VPI remap and opencv cv::cuda::StereoSGM to do 6 direction stereo cameras’ stereoRectify and stereo Disparity. Each direction use one thread, so total 6 threads.

But we met the cuda blocked problem. Thanks for you help!

stereo remap code:

 // remap
        CHECK_STATUS(
            vpiSubmitRemap(stream_, backend_, remapl_, inLeft_, outLeft_, VPI_INTERP_LINEAR, VPI_BORDER_ZERO, 0));
        CHECK_STATUS(
            vpiSubmitRemap(stream_, backend_, remapr_, inRight_, outRight_, VPI_INTERP_LINEAR, VPI_BORDER_ZERO, 0));
        // Wait until conversion finishes.
        CHECK_STATUS(vpiStreamSync(stream_));
        {
            VPIImageData data_l;
            CHECK_STATUS(vpiImageLockData(outLeft_, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &data_l));
            // Make an OpenCV matrix out of this image
            CHECK_STATUS(vpiImageDataExportOpenCVMat(data_l, &cv_image_l_));
            left_img_rect = cv_image_l_.clone();  // fix VPI_ERROR_INTERNAL: Can't perform shared mapping
            // Done handling output, don't forget to unlock it.
            CHECK_STATUS(vpiImageUnlock(outLeft_));
        }
        {
            VPIImageData data_r;
            CHECK_STATUS(vpiImageLockData(outRight_, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &data_r));
            CHECK_STATUS(vpiImageDataExportOpenCVMat(data_r, &cv_image_r_));
            right_img_rect = cv_image_r_.clone();
            CHECK_STATUS(vpiImageUnlock(outRight_));
        }

opencv stereoSGM code:

 cv::cuda::GpuMat cudaDisparityMap(img_l.size(), CV_16S);
 cv::cuda::GpuMat cudaLeftFrame, cudaRightFrame;
 cudaLeftFrame.upload(img_l);
 cudaRightFrame.upload(img_r);
 cuda_sgbm_->compute(cudaLeftFrame, cudaRightFrame, cudaDisparityMap);

 disp = cv::Mat(img_l.size(), CV_16S);
 cudaDisparityMap.download(disp);

But we met the cuda blocked problem. The log as show bellow.

#0  futex_wait_cancelable (private=0, expected=0, futex_word=0xaaab084a3bc8) at ../sysdeps/nptl/futex-internal.h:183
#1  __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0xaaab0831b840, cond=0xaaab084a3ba0) at pthread_cond_wait.c:508
#2  __pthread_cond_wait (cond=0xaaab084a3ba0, mutex=0xaaab0831b840) at pthread_cond_wait.c:647
#3  0x0000ffff7609b288 in  () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#4  0x0000ffff755865ac in  () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#5  0x0000ffff75571c80 in  () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#6  0x0000ffff75572080 in  () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#7  0x0000ffff75377d00 in vpiStreamSync () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#8  0x0000aaaad908e42c in soa::StereoRectifier::rectify(cv::Mat const&, cv::Mat const&, cv::Mat&, cv::Mat&) ()
#9  0x0000aaaad9086e08 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#10 0x0000aaaad904a28c in soa_process_func(void*) ()
#11 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#12 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78

#0  futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1  __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2  0x0000ffff623ae7c0 in  () at /lib/aarch64-linux-gnu/libcuda.so
#3  0x0000ffff6230943c in  () at /lib/aarch64-linux-gnu/libcuda.so
#4  0x0000ffff623c7104 in  () at /lib/aarch64-linux-gnu/libcuda.so
#5  0x0000ffff7ea306b0 in __cudart320 () at /usr/local/lib/libopencv_core.so.4.5
#6  0x0000ffff7ea853f4 in cudaEventRecord () at /usr/local/lib/libopencv_core.so.4.5
#7  0x0000ffff7e82ddec in cv::cuda::Event::record(cv::cuda::Stream&) () at /usr/local/lib/libopencv_core.so.4.5
#8  0x0000ffff7ed61e90 in void cv::cuda::device::stereosgm::path_aggregation::PathAggregation::operator()<64ul>(cv::cuda::GpuMat const&, cv::cuda::GpuMat const&, cv::cuda::GpuMat&, int, int, int, int, cv::cuda::Stream&) () at /usr/local/lib/libopencv_cudastereo.so.4.5
#9  0x0000ffff7ed3c530 in (anonymous namespace)::StereoSGMImpl::compute(cv::_InputArray const&, cv::_InputArray const&, cv::_OutputArray const&, cv::cuda::Stream&) ()
    at /usr/local/lib/libopencv_cudastereo.so.4.5
#10 0x0000aaaad908c8bc in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78

#0  futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1  __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2  0x0000ffff623ae7c0 in  () at /lib/aarch64-linux-gnu/libcuda.so
#3  0x0000ffff62305980 in  () at /lib/aarch64-linux-gnu/libcuda.so
#4  0x0000ffff623e2e94 in  () at /lib/aarch64-linux-gnu/libcuda.so
#5  0x0000ffff7ea61658 in __cudart175 () at /usr/local/lib/libopencv_core.so.4.5
#6  0x0000ffff7ea32290 in __cudart326 () at /usr/local/lib/libopencv_core.so.4.5
#7  0x0000ffff7ea6d7bc in cudaMallocPitch () at /usr/local/lib/libopencv_core.so.4.5
#8  0x0000ffff7ea17390 in (anonymous namespace)::DefaultAllocator::allocate(cv::cuda::GpuMat*, int, int, unsigned long) () at /usr/local/lib/libopencv_core.so.4.5
#9  0x0000ffff7ea29938 in cv::cuda::GpuMat::create(int, int, int) () at /usr/local/lib/libopencv_core.so.4.5
#10 0x0000aaaad908c990 in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78

#0  futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1  __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2  0x0000ffff623ae7c0 in  () at /lib/aarch64-linux-gnu/libcuda.so
#3  0x0000ffff6230943c in  () at /lib/aarch64-linux-gnu/libcuda.so
#4  0x0000ffff623c7104 in  () at /lib/aarch64-linux-gnu/libcuda.so
#5  0x0000ffff7ea306b0 in __cudart320 () at /usr/local/lib/libopencv_core.so.4.5
#6  0x0000ffff7ea853f4 in cudaEventRecord () at /usr/local/lib/libopencv_core.so.4.5
#7  0x0000ffff7e82ddec in cv::cuda::Event::record(cv::cuda::Stream&) () at /usr/local/lib/libopencv_core.so.4.5
#8  0x0000ffff7ed61e90 in void cv::cuda::device::stereosgm::path_aggregation::PathAggregation::operator()<64ul>(cv::cuda::GpuMat const&, cv::cuda::GpuMat const&, cv::cuda::GpuMat&, int, int, int, int, cv::cuda::Stream&) () at /usr/local/lib/libopencv_cudastereo.so.4.5
#9  0x0000ffff7ed3c530 in (anonymous namespace)::StereoSGMImpl::compute(cv::_InputArray const&, cv::_InputArray const&, cv::_OutputArray const&, cv::cuda::Stream&) ()
    at /usr/local/lib/libopencv_cudastereo.so.4.5
#10 0x0000aaaad908c8bc in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78

#0  futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1  __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2  0x0000ffff623ae7c0 in  () at /lib/aarch64-linux-gnu/libcuda.so
#3  0x0000ffff6230943c in  () at /lib/aarch64-linux-gnu/libcuda.so
#4  0x0000ffff623c7104 in  () at /lib/aarch64-linux-gnu/libcuda.so
#5  0x0000ffff7ea306b0 in __cudart320 () at /usr/local/lib/libopencv_core.so.4.5
#6  0x0000ffff7ea853f4 in cudaEventRecord () at /usr/local/lib/libopencv_core.so.4.5
#7  0x0000ffff7e82ddec in cv::cuda::Event::record(cv::cuda::Stream&) () at /usr/local/lib/libopencv_core.so.4.5
#8  0x0000ffff7ed61e90 in void cv::cuda::device::stereosgm::path_aggregation::PathAggregation::operator()<64ul>(cv::cuda::GpuMat const&, cv::cuda::GpuMat const&, cv::cuda::GpuMat&, int, int, int, int, cv::cuda::Stream&) () at /usr/local/lib/libopencv_cudastereo.so.4.5
#9  0x0000ffff7ed3c530 in (anonymous namespace)::StereoSGMImpl::compute(cv::_InputArray const&, cv::_InputArray const&, cv::_OutputArray const&, cv::cuda::Stream&) ()
    at /usr/local/lib/libopencv_cudastereo.so.4.5
#10 0x0000aaaad908c8bc in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78

Hi,

Do you create 6 CUDA streams and one for each camera?
Thanks.

Yes. I have create 6 threads,each thread creates one CUDA stream. The problem will happen when the application run few hours later(sometimes 96 hours later ).

Hi,

Could you share a reproducible source so we can try it in our environment?

Does it require 6 cameras to reproduce?
Does the same issue occur with two cameras?

More, do you reproduce this with our latest JetPack 5.1.2?
Thanks.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.