I’m trying to get this simple app to cross-compile:
#include <stdio.h>
#include <arm_neon.h>
int main()
{
// Initialize an array with enough data for four 128-bit vectors (4 x 8 = 32 uint16_t elements)
uint16_t data[32] = {
0x0011, 0x0022, 0x0033, 0x0044, 0x0055, 0x0066, 0x0077, 0x0088,
0x0099, 0x00AA, 0x00BB, 0x00CC, 0x00DD, 0x00EE, 0x00FF, 0x0110,
0x0121, 0x0132, 0x0143, 0x0154, 0x0165, 0x0176, 0x0187, 0x0198,
0x01A9, 0x01BA, 0x01CB, 0x01DC, 0x01ED, 0x01FE, 0x020F, 0x0210
};
// Use the vld1q_u16_x4 intrinsic to load data into four NEON vectors
uint16x8x4_t vectors = vld1q_u16_x4(data);
// Example usage of the vectors, such as printing one element from each vector
printf("Values: %x %x %x %x\n", vgetq_lane_u16(vectors.val[0], 0),
vgetq_lane_u16(vectors.val[1], 0),
vgetq_lane_u16(vectors.val[2], 0),
vgetq_lane_u16(vectors.val[3], 0));
return 0;
}
using CMakeLists.txt and a toolchain attached below. For the setup, I downloaded Jetson toolchain r35.4.1 from here and placed it in a parent folder to the sample app.
When cross-compiling, I get this error:
sample-aarch64/main.cpp:22:28: error: ‘vld1q_u16_x4’ was not declared in this scope; did you mean ‘vld1q_u16_x2’?
However, same code compiles fine on the target platform using default gcc-9.4.1.
And indeed, when I check for the vld1q_u16_x4
in “arm_neon.h” on the device vs host, I see that it does not exist in the toolchain (only vld1q_u16_x2
does):
# xavier
~/sample$ find /usr -type f -name "arm_neon.h"
/usr/lib/gcc/aarch64-linux-gnu/8/include/arm_neon.h
/usr/lib/gcc/aarch64-linux-gnu/11/include/arm_neon.h
/usr/lib/gcc/aarch64-linux-gnu/9/include/arm_neon.h
/usr/lib/llvm-12/lib/clang/12.0.0/include/arm_neon.h
/usr/lib/llvm-10/lib/clang/10.0.0/include/arm_neon.h
~/sample$ grep -E "vld1q_u16_x4" /usr/lib/gcc/aarch64-linux-gnu/9/include/arm_neon.h
vld1q_u16_x4 (const uint16_t *__a)
# host
~/sample-aarch64$ find ../aarch64--glibc--stable-final/ -type f -name "arm_neon.h"
../aarch64--glibc--stable-final/lib/gcc/aarch64-buildroot-linux-gnu/9.3.0/include/arm_neon.h
~/sample-aarch64$ grep -R "vld1q_u16_x4" ../aarch64--glibc--stable-final/lib/gcc/aarch64-buildroot-linux-gnu/9.3.0/include/arm_neon.h
~/sample-aarch64$ grep -R "vld1q_u16_x2" ../aarch64--glibc--stable-final/lib/gcc/aarch64-buildroot-linux-gnu/9.3.0/include/arm_neon.h
vld1q_u16_x2 (const uint16_t *__a)
This is a bit confusing since, as I understand it, the toolchain provided should have same system libraries as the target has, shouldn’t it?
As a matter of fact, I also tried providing aarch64-buildroot-linux-gnu-gcc with include paths that gcc on the device has by sshfs-mounting device’s root fs (and adding -I
option to gcc using include_directories
cmake command).
Even though this bypassed the original error above, it eventually failed with numerous errors, like:
~/xavier-sysroot/usr/lib/gcc/aarch64-linux-gnu/9/include/arm_neon.h: In function ‘float64x2_t vrnd64xq_f64(float64x2_t)’:
~/xavier-sysroot/usr/lib/gcc/aarch64-linux-gnu/9/include/arm_neon.h:34799:10: error: ‘__builtin_aarch64_frint64xv2df’ was not declared in this scope; did you mean ‘__builtin_aarch64_frintnv2df’?
34799 | return __builtin_aarch64_frint64xv2df (__a);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| __builtin_aarch64_frintnv2df
cmake file and toolchain:
cmake_minimum_required(VERSION 3.10)
project(HelloWorld)
set(CMAKE_CXX_STANDARD 11)
add_executable(hello_world main.cpp)
# JetsonToolchain.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
# Specify the cross compiler
set(CMAKE_C_COMPILER ~/aarch64--glibc--stable-final/bin/aarch64-buildroot-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ~/aarch64--glibc--stable-final/bin/aarch64-buildroot-linux-gnu-g++)
set(XAVIER_SYSROOT_DIR ~/xavier-sysroot)
set(CMAKE_CXX_FLAGS "-isystem=${XAVIER_SYSROOT_DIR}/usr/lib/gcc/aarch64-linux-gnu/9/include -isystem=${XAVIER_SYSROOT_DIR}/usr/include -isystem=${XAVIER_SYSROOT_DIR}/usr/include/aarch64-linux-gnu ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-isystem=${XAVIER_SYSROOT_DIR}/usr/lib/gcc/aarch64-linux-gnu/9/include -isystem=${XAVIER_SYSROOT_DIR}/usr/include -isystem=${XAVIER_SYSROOT_DIR}/usr/include/aarch64-linux-gnu ${CMAKE_C_FLAGS}")
include_directories(${XAVIER_SYSROOT_DIR}/usr/include/c++/9
${XAVIER_SYSROOT_DIR}/usr/include/aarch64-linux-gnu/c++/9
${XAVIER_SYSROOT_DIR}/usr/include/c++/9/backward
${XAVIER_SYSROOT_DIR}/usr/lib/gcc/aarch64-linux-gnu/9/include
${XAVIER_SYSROOT_DIR}/usr/local/include
${XAVIER_SYSROOT_DIR}/usr/include/aarch64-linux-gnu
${XAVIER_SYSROOT_DIR}/usr/include)
# Where is the target environment
set(CMAKE_FIND_ROOT_PATH ~/aarch64--glibc--stable-final/aarch64-buildroot-linux-gnu/sysroot)
# Search for programs in the build host directories
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
# For libraries and headers in the target directories
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
What would be the correct approach to cross-compile this sample app? Am I using an outdated toolchain? Any insights are appreciated!