Hi,
I already posted in the cuda forum some time ago. Here.
I’m facing a problem with compiling a very simple piece of cuda code. The code is:
#include <cstdint>
#include "cuda.h"
__global__ void yuv444ToInterleaved(
const uint8_t *yPlane, uint32_t yPitch, const uint8_t *uvPlane, uint32_t uvPitch, int width, int height,
uint8_t *dst) {
int imageX = blockIdx.x * blockDim.x + threadIdx.x;
int imageY = blockIdx.y * blockDim.y + threadIdx.y;
if (imageX >= width || imageY >= height) {
return;
}
int y = imageY * yPitch + imageX;
int uv = imageY * uvPitch + imageX * 2;
int k = imageY * width * 3 + imageX * 3;
dst[k] = yPlane[y];
dst[k + 1] = uvPlane[uv];
dst[k + 2] = uvPlane[uv + 1];
}
int YUVConvert(
CUdeviceptr yPlane, int yPlanePitch, CUdeviceptr uvPlane, int uvPlanePitch, int width, int height,
CUdeviceptr destination) {
const dim3 block(32, 32);
const dim3 grid((width + block.x - 1) / block.x, (height + block.y - 1) / block.y);
yuv444ToInterleaved<<<grid, block>>>(
(uint8_t *)yPlane, yPlanePitch, (uint8_t *)uvPlane, uvPlanePitch, width, height, (uint8_t *)destination);
cudaStreamAttachMemAsync(NULL, &destination, 0, cudaMemAttachHost);
cudaStreamSynchronize(NULL);
return 0;
}
Usually in our build pipeline we’re using cmake but for the sake of this example I tried to reproduce only this compilation with nvcc directly. With clang-11 as host compilers the compilation fails in a rather cryptic message
$ /usr/local/cuda-11.4/bin/nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Sun_Oct_23_22:16:07_PDT_2022
Cuda compilation tools, release 11.4, V11.4.315
Build cuda_11.4.r11.4/compiler.31964100_0
$ /usr/local/cuda-11.4/bin/nvcc -std=c++17 -ccbin clang-11 -c yuv-kernel.cu
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(70): error: qualified name is not allowed
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(72): error: expected a "("
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(72): error: expected a type specifier
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(72): error: expected a ")"
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(76): error: expected a "("
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(76): error: expected a type specifier
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(77): error: expected a ";"
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(78): error: expected a "("
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(78): error: expected a ";"
/usr/bin/../lib/gcc/aarch64-linux-gnu/10/../../../../include/c++/10/ext/numeric_traits.h(88): error: class template "__gnu_cxx::__numeric_traits_integer<_Value>" has no member "__is_signed"
10 errors detected in the compilation of "yuv-kernel.cu".
When compiling with gcc as host compiler the compilation works as expected
$ /usr/local/cuda-11.4/bin/nvcc -std=c++17 -ccbin gcc -c yuv-kernel.cu
Additionally reducing the c++ standard to 14 also yields successful compilation with clang-11 reducing the c++ standard is not really an option for the whole project as we’re fixed on c++17.
/usr/local/cuda-11.4/bin/nvcc -std=c++14 -ccbin clang-11 -c yuv-kernel.cu
/usr/local/cuda-11.4/bin/nvcc -std=c++14 -ccbin clang-10 -c yuv-kernel.cu
For now I worked around this by using gcc as the cuda host compiler and clang for the rest of the project but this is not a satisfying solution. The versions we use should be compatible as per this
Are there any pointers as to why this happens or what steps I could take to solve this. I’m working on a Tegra Linux on a Jetson Orin NX.
JP version 5.1.2
CUDA version 11.4
clang version 11.0.0