I’m trying to cross compile tensorflow 2.9.1 for Xavier NX running CUDA 10.2. One of the cross compilation steps keeps failing but other cuda files compile successfully.
My build machine ran this nvcc
command:
nvcc -D_FORCE_INLINES -gencode=arch=compute_72,\"code=sm_72,compute_72\" --expt-relaxed-constexpr --ftz=true -DEIGEN_MPL2_ONLY -DEIGEN_MAX_ALIGN_BYTES=64 -DHAVE_SYS_UIO_H -DTF_USE_SNAPPY -DAUTOLOAD_DYNAMIC_KERNELS -DGOOGLE_CUDA=1 -DEIGEN_AVOID_STL_ARRAY -DGOOGLE_CUDA=1 -DTENSORFLOW_USE_NVCC=1 -DTENSORFLOW_USE_XLA=1 -DGOOGLE_TENSORRT=1 -DTENSORFLOW_MONOLITHIC_BUILD -std=c++14 --compiler-options " -isystem external/local_config_cuda/cuda -isystem bazel-out/aarch64-opt/bin/external/local_config_cuda/cuda -isystem external/local_config_cuda/cuda/cuda/include -isystem bazel-out/aarch64-opt/bin/external/local_config_cuda/cuda/cuda/include -isystem external/nsync/public -isystem bazel-out/aarch64-opt/bin/external/nsync/public -isystem external/eigen_archive -isystem bazel-out/aarch64-opt/bin/external/eigen_archive -isystem external/gif -isystem bazel-out/aarch64-opt/bin/external/gif -isystem external/com_google_protobuf/src -isystem bazel-out/aarch64-opt/bin/external/com_google_protobuf/src -isystem external/zlib -isystem bazel-out/aarch64-opt/bin/external/zlib -isystem external/farmhash_archive/src -isystem bazel-out/aarch64-opt/bin/external/farmhash_archive/src -isystem external/local_config_rocm/rocm -isystem bazel-out/aarch64-opt/bin/external/local_config_rocm/rocm -isystem external/local_config_rocm/rocm/rocm/include -isystem bazel-out/aarch64-opt/bin/external/local_config_rocm/rocm/rocm/include -isystem external/local_config_rocm/rocm/rocm/include/rocrand -isystem bazel-out/aarch64-opt/bin/external/local_config_rocm/rocm/rocm/include/rocrand -isystem external/local_config_rocm/rocm/rocm/include/roctracer -isystem bazel-out/aarch64-opt/bin/external/local_config_rocm/rocm/rocm/include/roctracer -iquote . -iquote bazel-out/aarch64-opt/bin -iquote external/cub_archive -iquote bazel-out/aarch64-opt/bin/external/cub_archive -iquote external/local_config_cuda -iquote bazel-out/aarch64-opt/bin/external/local_config_cuda -iquote external/com_google_absl -iquote bazel-out/aarch64-opt/bin/external/com_google_absl -iquote external/nsync -iquote bazel-out/aarch64-opt/bin/external/nsync -iquote external/eigen_archive -iquote bazel-out/aarch64-opt/bin/external/eigen_archive -iquote external/gif -iquote bazel-out/aarch64-opt/bin/external/gif -iquote external/libjpeg_turbo -iquote bazel-out/aarch64-opt/bin/external/libjpeg_turbo -iquote external/com_google_protobuf -iquote bazel-out/aarch64-opt/bin/external/com_google_protobuf -iquote external/zlib -iquote bazel-out/aarch64-opt/bin/external/zlib -iquote external/com_googlesource_code_re2 -iquote bazel-out/aarch64-opt/bin/external/com_googlesource_code_re2 -iquote external/farmhash_archive -iquote bazel-out/aarch64-opt/bin/external/farmhash_archive -iquote external/fft2d -iquote bazel-out/aarch64-opt/bin/external/fft2d -iquote external/highwayhash -iquote bazel-out/aarch64-opt/bin/external/highwayhash -iquote external/double_conversion -iquote bazel-out/aarch64-opt/bin/external/double_conversion -iquote external/snappy -iquote bazel-out/aarch64-opt/bin/external/snappy -iquote external/local_config_rocm -iquote bazel-out/aarch64-opt/bin/external/local_config_rocm -iquote external/local_config_tensorrt -iquote bazel-out/aarch64-opt/bin/external/local_config_tensorrt -iquote external/cudnn_frontend_archive -iquote bazel-out/aarch64-opt/bin/external/cudnn_frontend_archive -fPIC" --verbose --keep --compiler-bindir=/opt/toolchain/bin/aarch64-linux-gnu-gcc -I . -x cu -g -G -I bazel-out/aarch64-opt/bin/external/local_config_cuda/cuda/_virtual_includes/cuda_headers_virtual -I bazel-out/aarch64-opt/bin/external/local_config_tensorrt/_virtual_includes/tensorrt_headers -I bazel-out/aarch64-opt/bin/external/local_config_cuda/cuda/_virtual_includes/cudnn_header -I bazel-out/aarch64-opt/bin/external/cudnn_frontend_archive/_virtual_includes/cudnn_frontend -I external/gemmlowp -c tensorflow/core/kernels/histogram_op_gpu.cu.cc -o bazel-out/aarch64-opt/bin/tensorflow/core/kernels/_objs/histogram_op_gpu/histogram_op_gpu.cu.o
nvcc eventually calls ptxas
like this:
ptxas --verbose --compile-only -arch=sm_72 -m64 -g --dont-merge-basicblocks --return-at-end "histogram_op_gpu.cu.ptx" -o "histogram_op_gpu.cu.sm_72.cubin"
The only output I get from ptxas is this: ptxas fatal : Unresolved extern function 'cudaGetErrorString'
strace
of ptxas shows the last failure is ioctl(3, TCGETS, 0x7ffe13b62410) = -1 ENOTTY (Inappropriate ioctl for device)
.
Does anyone know what is wrong and how do I cross compile this file?