We use VPI remap and opencv cv::cuda::StereoSGM to do 6 direction stereo cameras’ stereoRectify and stereo Disparity. Each direction use one thread, so total 6 threads.
But we met the cuda blocked problem. Thanks for you help!
stereo remap code:
// remap
CHECK_STATUS(
vpiSubmitRemap(stream_, backend_, remapl_, inLeft_, outLeft_, VPI_INTERP_LINEAR, VPI_BORDER_ZERO, 0));
CHECK_STATUS(
vpiSubmitRemap(stream_, backend_, remapr_, inRight_, outRight_, VPI_INTERP_LINEAR, VPI_BORDER_ZERO, 0));
// Wait until conversion finishes.
CHECK_STATUS(vpiStreamSync(stream_));
{
VPIImageData data_l;
CHECK_STATUS(vpiImageLockData(outLeft_, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &data_l));
// Make an OpenCV matrix out of this image
CHECK_STATUS(vpiImageDataExportOpenCVMat(data_l, &cv_image_l_));
left_img_rect = cv_image_l_.clone(); // fix VPI_ERROR_INTERNAL: Can't perform shared mapping
// Done handling output, don't forget to unlock it.
CHECK_STATUS(vpiImageUnlock(outLeft_));
}
{
VPIImageData data_r;
CHECK_STATUS(vpiImageLockData(outRight_, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &data_r));
CHECK_STATUS(vpiImageDataExportOpenCVMat(data_r, &cv_image_r_));
right_img_rect = cv_image_r_.clone();
CHECK_STATUS(vpiImageUnlock(outRight_));
}
opencv stereoSGM code:
cv::cuda::GpuMat cudaDisparityMap(img_l.size(), CV_16S);
cv::cuda::GpuMat cudaLeftFrame, cudaRightFrame;
cudaLeftFrame.upload(img_l);
cudaRightFrame.upload(img_r);
cuda_sgbm_->compute(cudaLeftFrame, cudaRightFrame, cudaDisparityMap);
disp = cv::Mat(img_l.size(), CV_16S);
cudaDisparityMap.download(disp);
But we met the cuda blocked problem. The log as show bellow.
#0 futex_wait_cancelable (private=0, expected=0, futex_word=0xaaab084a3bc8) at ../sysdeps/nptl/futex-internal.h:183
#1 __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0xaaab0831b840, cond=0xaaab084a3ba0) at pthread_cond_wait.c:508
#2 __pthread_cond_wait (cond=0xaaab084a3ba0, mutex=0xaaab0831b840) at pthread_cond_wait.c:647
#3 0x0000ffff7609b288 in () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#4 0x0000ffff755865ac in () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#5 0x0000ffff75571c80 in () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#6 0x0000ffff75572080 in () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#7 0x0000ffff75377d00 in vpiStreamSync () at /opt/nvidia/vpi2/lib/aarch64-linux-gnu/libnvvpi.so.2
#8 0x0000aaaad908e42c in soa::StereoRectifier::rectify(cv::Mat const&, cv::Mat const&, cv::Mat&, cv::Mat&) ()
#9 0x0000aaaad9086e08 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#10 0x0000aaaad904a28c in soa_process_func(void*) ()
#11 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#12 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78
#0 futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1 __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2 0x0000ffff623ae7c0 in () at /lib/aarch64-linux-gnu/libcuda.so
#3 0x0000ffff6230943c in () at /lib/aarch64-linux-gnu/libcuda.so
#4 0x0000ffff623c7104 in () at /lib/aarch64-linux-gnu/libcuda.so
#5 0x0000ffff7ea306b0 in __cudart320 () at /usr/local/lib/libopencv_core.so.4.5
#6 0x0000ffff7ea853f4 in cudaEventRecord () at /usr/local/lib/libopencv_core.so.4.5
#7 0x0000ffff7e82ddec in cv::cuda::Event::record(cv::cuda::Stream&) () at /usr/local/lib/libopencv_core.so.4.5
#8 0x0000ffff7ed61e90 in void cv::cuda::device::stereosgm::path_aggregation::PathAggregation::operator()<64ul>(cv::cuda::GpuMat const&, cv::cuda::GpuMat const&, cv::cuda::GpuMat&, int, int, int, int, cv::cuda::Stream&) () at /usr/local/lib/libopencv_cudastereo.so.4.5
#9 0x0000ffff7ed3c530 in (anonymous namespace)::StereoSGMImpl::compute(cv::_InputArray const&, cv::_InputArray const&, cv::_OutputArray const&, cv::cuda::Stream&) ()
at /usr/local/lib/libopencv_cudastereo.so.4.5
#10 0x0000aaaad908c8bc in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78
#0 futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1 __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2 0x0000ffff623ae7c0 in () at /lib/aarch64-linux-gnu/libcuda.so
#3 0x0000ffff62305980 in () at /lib/aarch64-linux-gnu/libcuda.so
#4 0x0000ffff623e2e94 in () at /lib/aarch64-linux-gnu/libcuda.so
#5 0x0000ffff7ea61658 in __cudart175 () at /usr/local/lib/libopencv_core.so.4.5
#6 0x0000ffff7ea32290 in __cudart326 () at /usr/local/lib/libopencv_core.so.4.5
#7 0x0000ffff7ea6d7bc in cudaMallocPitch () at /usr/local/lib/libopencv_core.so.4.5
#8 0x0000ffff7ea17390 in (anonymous namespace)::DefaultAllocator::allocate(cv::cuda::GpuMat*, int, int, unsigned long) () at /usr/local/lib/libopencv_core.so.4.5
#9 0x0000ffff7ea29938 in cv::cuda::GpuMat::create(int, int, int) () at /usr/local/lib/libopencv_core.so.4.5
#10 0x0000aaaad908c990 in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78
#0 futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1 __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2 0x0000ffff623ae7c0 in () at /lib/aarch64-linux-gnu/libcuda.so
#3 0x0000ffff6230943c in () at /lib/aarch64-linux-gnu/libcuda.so
#4 0x0000ffff623c7104 in () at /lib/aarch64-linux-gnu/libcuda.so
#5 0x0000ffff7ea306b0 in __cudart320 () at /usr/local/lib/libopencv_core.so.4.5
#6 0x0000ffff7ea853f4 in cudaEventRecord () at /usr/local/lib/libopencv_core.so.4.5
#7 0x0000ffff7e82ddec in cv::cuda::Event::record(cv::cuda::Stream&) () at /usr/local/lib/libopencv_core.so.4.5
#8 0x0000ffff7ed61e90 in void cv::cuda::device::stereosgm::path_aggregation::PathAggregation::operator()<64ul>(cv::cuda::GpuMat const&, cv::cuda::GpuMat const&, cv::cuda::GpuMat&, int, int, int, int, cv::cuda::Stream&) () at /usr/local/lib/libopencv_cudastereo.so.4.5
#9 0x0000ffff7ed3c530 in (anonymous namespace)::StereoSGMImpl::compute(cv::_InputArray const&, cv::_InputArray const&, cv::_OutputArray const&, cv::cuda::Stream&) ()
at /usr/local/lib/libopencv_cudastereo.so.4.5
#10 0x0000aaaad908c8bc in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78
#0 futex_lock_pi (private=0, abstime=0x0, futex_word=0xaaab03136df0) at ../sysdeps/nptl/futex-internal.h:412
#1 __pthread_mutex_lock_full (mutex=0xaaab03136df0) at pthread_mutex_lock.c:419
#2 0x0000ffff623ae7c0 in () at /lib/aarch64-linux-gnu/libcuda.so
#3 0x0000ffff6230943c in () at /lib/aarch64-linux-gnu/libcuda.so
#4 0x0000ffff623c7104 in () at /lib/aarch64-linux-gnu/libcuda.so
#5 0x0000ffff7ea306b0 in __cudart320 () at /usr/local/lib/libopencv_core.so.4.5
#6 0x0000ffff7ea853f4 in cudaEventRecord () at /usr/local/lib/libopencv_core.so.4.5
#7 0x0000ffff7e82ddec in cv::cuda::Event::record(cv::cuda::Stream&) () at /usr/local/lib/libopencv_core.so.4.5
#8 0x0000ffff7ed61e90 in void cv::cuda::device::stereosgm::path_aggregation::PathAggregation::operator()<64ul>(cv::cuda::GpuMat const&, cv::cuda::GpuMat const&, cv::cuda::GpuMat&, int, int, int, int, cv::cuda::Stream&) () at /usr/local/lib/libopencv_cudastereo.so.4.5
#9 0x0000ffff7ed3c530 in (anonymous namespace)::StereoSGMImpl::compute(cv::_InputArray const&, cv::_InputArray const&, cv::_OutputArray const&, cv::cuda::Stream&) ()
at /usr/local/lib/libopencv_cudastereo.so.4.5
#10 0x0000aaaad908c8bc in soa::StereoDisparity::process_rectified_stereo_cv(cv::Mat const&, cv::Mat const&, cv::Mat&, int&) ()
#11 0x0000aaaad9086bb0 in soa::SOA::process_stereo_images(cv::Mat const&, cv::Mat const&, bool const&, int const&, Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, std::array<float, 6ul>&, cv::Mat&, cv::Mat&, cv::Mat&) ()
#12 0x0000aaaad904a28c in soa_process_func(void*) ()
#13 0x0000ffff7f416624 in start_thread (arg=0xaaaad9049d68 <soa_process_func(void*)>) at pthread_create.c:477
#14 0x0000ffff74c4049c in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78