CUDA cross-compilation for nvivafilter gst plugin

Jakow1ew · February 18, 2022, 1:09pm

I try to write gst video filter via nvivafilter. We use remote x86_64 host with cross-compilation toolchain. After a while I ran into problems. CUDA cross-compilation is overcomplicated, and documentation doesn’t say much. Both host and jetson use Ununtu (20.04 on host and 18.04 on Jetson).

To setup cross-compilation I’ve installed these packages on host along with cuda-toolkit 10.2.

Cross compilation works with naive vectorAdd example from official repo (both with Makefile and hand-written CMake). But nvsample_cudaprocess example from nvivafilter fails at runtime with

cuGraphicsEGLRegisterImage failed : 3
every frame.

Looks like cross-compilation works (naive example successfully executed), but there are problems with this specific library.

Here is my CMakeLists.txt

cmake_minimum_required (VERSION 3.16)

set(CMAKE_TOOLCHAIN_FILE /opt/devkit_toolchain.cmake)
set(CMAKE_VERBOSE_MAKEFILE ON)

project(nvsample_cudaprocess VERSION 0.1 LANGUAGES CXX CUDA)

find_package(PkgConfig REQUIRED)

pkg_check_modules(PKG REQUIRED IMPORTED_TARGET egl glesv2 cudart-10.2 cuda-10.2)

add_library(${PROJECT_NAME} SHARED
    nvsample_cudaprocess.cu
)

include(${CMAKE_SCRIPTS_ALL})

message(STATUS "PKG_INCLUDE_DIRS: ${PKG_INCLUDE_DIRS}")

target_include_directories(${PROJECT_NAME} PUBLIC
    ${PKG_INCLUDE_DIRS}
    ${CMAKE_SYSROOT}/usr/include
    ${CMAKE_SYSROOT}/usr/include/aarch64-linux-gnu/
    .
)

target_link_libraries(${PROJECT_NAME}
    PkgConfig::PKG
)

and toolchain file

set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_VERSION 1)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_SYSROOT /nfsroot_devkit)

set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT})
set(ONLY_CMAKE_FIND_ROOT_PATH TRUE)

set(ENV{PKG_CONFIG_SYSROOT_DIR} ${CMAKE_SYSROOT})
set(ENV{PKG_CONFIG_LIBDIR} "${CMAKE_SYSROOT}/usr/lib/pkgconfig:${CMAKE_SYSROOT}/usr/share/pkgconfig:${CMAKE_SYSROOT}/usr/lib/aarch64-linux-gnu/pkgconfig:${CMAKE_SYSROOT}/usr/local/lib/pkgconfig")

set(tools /opt/jetson_toolchain/gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu)

set(CMAKE_C_COMPILER ${tools}/bin/aarch64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ${tools}/bin/aarch64-linux-gnu-g++)

set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CMAKE_CUDA_COMPILER /usr/local/cuda-10.2/bin/nvcc)

set(CMAKE_SCRIPTS_PATH /opt/CMakeScripts)
set(CMAKE_SCRIPTS_ALL ${CMAKE_SCRIPTS_PATH}/CMakeScripts.cmake)

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)

I run my code with the following gst pipeline

gst-launch-1.0 v4l2src device=/dev/video1 ! nvvidconv ! 'video/x-raw(memory:NVMM), format=NV12' ! nvivafilter cuda-process=true pre-process=true post-process=true customer-lib-name="nvsample_cudaprocess.so" ! 'video/x-raw(memory:NVMM), format=RGBA' ! nvvidconv ! nvv4l2h264enc ! h264parse ! rtph264pay pt=96 ! udpsink host=172.16.11.43 port=8554

And the output is

Setting pipeline to PAUSED ...
Opening in BLOCKING MODE 
Pipeline is live and does not need PREROLL ...
Setting pipeline to PLAYING ...
New clock: GstSystemClock
Redistribute latency...
NvMMLiteOpen : Block : BlockType = 4 
===== NVMEDIA: NVENC =====
NvMMLiteBlockCreate : Block : BlockType = 4 
cuGraphicsEGLRegisterImage failed : 3 
H264: Profile = 66, Level = 0 
NVMEDIA_ENC: bBlitMode is set to TRUE 
cuGraphicsEGLRegisterImage failed : 3 
cuGraphicsEGLRegisterImage failed : 3 
cuGraphicsEGLRegisterImage failed : 3 
cuGraphicsEGLRegisterImage failed : 3 
...

DaneLLL · February 19, 2022, 1:38am

Hi,
The verified case is to build the sample on target. Cross compiling is not tested. A possible solution is to clone the image from Jetson platform and mount to host PC. Please check
Jetson Linux API Reference: Setting Up Cross-Platform Support

system · March 9, 2022, 4:24am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.