CMakeDetermineCUDACompiler.cmake fails with "nvcc fatal : Unknown option '-Wl,--start-group'"

I’m trying to compile a hello world example on the following machine:

  • PopOS (~Ubuntu) 22.04 (Linux pop-os 6.6.6-76060606-generic)
  • nvcc 12.3 (V12.3.103)
  • gcc/g++ 12.3.0
  • CMake, tried 3.22.1, 3.27.x, and 3.28.1
  • GPU: NVIDIA GeForce RTX 2060
  • NVIDIA driver version: 545.29.06 (CUDA Version: 12.3)

Here is my CMakeLists.txt:

cmake_minimum_required(VERSION 3.18)
project(hello_cuda CXX CUDA)

set(CMAKE_CXX_STANDARD 17)
set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 75)

add_executable(${PROJECT_NAME} hello_cuda.cu)

And here is hello_cuda.cu:

#include <iostream>

int main()
{
  std::cout << "Hello, World!" << std::endl;
}

The steps I follow to build:

mkdir build
cd build
cmake ..

And finally, the CMake output/error:

-- The CXX compiler identification is GNU 12.3.0
CMake Error at /usr/share/cmake-3.22/Modules/CMakeDetermineCompilerId.cmake:726 (message):
  Compiling the CUDA compiler identification source file
  "CMakeCUDACompilerId.cu" failed.

  Compiler: /usr/local/cuda-12.3/bin/nvcc

  Build flags:

  Id flags: --keep;--keep-dir;tmp;-ccbin=/usr/local/cuda-12.3/bin/nvcc -v

  

  The output was:

  1

  #$ _NVVM_BRANCH_=nvvm

  #$ _SPACE_=

  #$ _CUDART_=cudart

  #$ _HERE_=/usr/local/cuda-12.3/bin

  #$ _THERE_=/usr/local/cuda-12.3/bin

  #$ _TARGET_SIZE_=

  #$ _TARGET_DIR_=

  #$ _TARGET_DIR_=targets/x86_64-linux

  #$ TOP=/usr/local/cuda-12.3/bin/..

  #$ NVVMIR_LIBRARY_DIR=/usr/local/cuda-12.3/bin/../nvvm/libdevice

  #$
  LD_LIBRARY_PATH=/usr/local/cuda-12.3/bin/../lib:/usr/local/cuda-12.3/lib64:


  #$
  PATH=/usr/local/cuda-12.3/bin/../nvvm/bin:/usr/local/cuda-12.3/bin:/usr/local/cuda-12.3/bin:/home/current_user/.local/bin:/home/current_user/.cargo/bin:/home/current_user/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/usr/local/cuda-12.3/bin


  #$ INCLUDES="-I/usr/local/cuda-12.3/bin/../targets/x86_64-linux/include"

  #$ LIBRARIES=
  "-L/usr/local/cuda-12.3/bin/../targets/x86_64-linux/lib/stubs"
  "-L/usr/local/cuda-12.3/bin/../targets/x86_64-linux/lib"

  #$ CUDAFE_FLAGS=

  #$ PTXAS_FLAGS=

  #$ rm tmp/a_dlink.reg.c

  #$ "/usr/local/cuda-12.3/bin"/nvcc -D__CUDA_ARCH_LIST__=520 -E -x c++
  -D__CUDACC__ -D__NVCC__
  "-I/usr/local/cuda-12.3/bin/../targets/x86_64-linux/include"
  -D__CUDACC_VER_MAJOR__=12 -D__CUDACC_VER_MINOR__=3
  -D__CUDACC_VER_BUILD__=103 -D__CUDA_API_VER_MAJOR__=12
  -D__CUDA_API_VER_MINOR__=3 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include
  "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o
  "tmp/CMakeCUDACompilerId.cpp4.ii"

  #$ cudafe++ --c++17 --gnu_version=120300 --display_error_number
  --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name
  "/home/current_user/workspace/cuda_workspace/clion_cuda_00/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu"
  --allow_managed --m64 --parse_templates --gen_c_file_name
  "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name
  "CMakeCUDACompilerId.cudafe1.stub.c" --gen_module_id_file
  --module_id_file_name "tmp/CMakeCUDACompilerId.module_id"
  "tmp/CMakeCUDACompilerId.cpp4.ii"

  #$ "/usr/local/cuda-12.3/bin"/nvcc -D__CUDA_ARCH__=520
  -D__CUDA_ARCH_LIST__=520 -E -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS
  -D__CUDACC__ -D__NVCC__
  "-I/usr/local/cuda-12.3/bin/../targets/x86_64-linux/include"
  -D__CUDACC_VER_MAJOR__=12 -D__CUDACC_VER_MINOR__=3
  -D__CUDACC_VER_BUILD__=103 -D__CUDA_API_VER_MAJOR__=12
  -D__CUDA_API_VER_MINOR__=3 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include
  "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o
  "tmp/CMakeCUDACompilerId.cpp1.ii"

  #$ cicc --c++17 --gnu_version=120300 --display_error_number
  --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name
  "/home/current_user/workspace/cuda_workspace/clion_cuda_00/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu"
  --allow_managed -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1
  -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c"
  -tused --module_id_file_name "tmp/CMakeCUDACompilerId.module_id"
  --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name
  "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name
  "tmp/CMakeCUDACompilerId.cudafe1.gpu" "tmp/CMakeCUDACompilerId.cpp1.ii" -o
  "tmp/CMakeCUDACompilerId.ptx"

  #$ ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx" -o
  "tmp/CMakeCUDACompilerId.sm_52.cubin"

  #$ fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64
  --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 "
  "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin"
  "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx"
  --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c"

  #$ "/usr/local/cuda-12.3/bin"/nvcc -D__CUDA_ARCH__=520
  -D__CUDA_ARCH_LIST__=520 -c -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS
  "-I/usr/local/cuda-12.3/bin/../targets/x86_64-linux/include" -m64
  "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o"

  #$ nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"
  "-L/usr/local/cuda-12.3/bin/../targets/x86_64-linux/lib/stubs"
  "-L/usr/local/cuda-12.3/bin/../targets/x86_64-linux/lib" -cpu-arch=X86_64
  "tmp/CMakeCUDACompilerId.o" -lcudadevrt -o "tmp/a_dlink.sm_52.cubin"
  --host-ccbin "/usr/local/cuda-12.3/bin/nvcc"

  #$ fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0
  -prec_div=1 -prec_sqrt=1 -fmad=1 " -link
  "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin"
  --embedded-fatbin="tmp/a_dlink.fatbin.c"

  #$ "/usr/local/cuda-12.3/bin"/nvcc -D__CUDA_ARCH_LIST__=520 -c -x c++
  -DFATBINFILE="\"tmp/a_dlink.fatbin.c\""
  -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I.
  -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION=
  -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
  "-I/usr/local/cuda-12.3/bin/../targets/x86_64-linux/include"
  -D__CUDACC_VER_MAJOR__=12 -D__CUDACC_VER_MINOR__=3
  -D__CUDACC_VER_BUILD__=103 -D__CUDA_API_VER_MAJOR__=12
  -D__CUDA_API_VER_MINOR__=3 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64
  "/usr/local/cuda-12.3/bin/crt/link.stub" -o "tmp/a_dlink.o"

  #$ "/usr/local/cuda-12.3/bin"/nvcc -D__CUDA_ARCH_LIST__=520 -m64
  -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"
  "-L/usr/local/cuda-12.3/bin/../targets/x86_64-linux/lib/stubs"
  "-L/usr/local/cuda-12.3/bin/../targets/x86_64-linux/lib" -lcudadevrt
  -lcudart_static -lrt -lpthread -ldl -Wl,--end-group -o "a.out"

  nvcc fatal : Unknown option '-Wl,--start-group'

  # --error 0x1 --

  

  

Call Stack (most recent call first):
  /usr/share/cmake-3.22/Modules/CMakeDetermineCompilerId.cmake:6 (CMAKE_DETERMINE_COMPILER_ID_BUILD)
  /usr/share/cmake-3.22/Modules/CMakeDetermineCompilerId.cmake:48 (__determine_compiler_id_test)
  /usr/share/cmake-3.22/Modules/CMakeDetermineCUDACompiler.cmake:298 (CMAKE_DETERMINE_COMPILER_ID)
  CMakeLists.txt:2 (project)

Note that the offending CMake module is /usr/local/share/cmake-3.22/Modules/CMakeDetermineCUDACompiler.cmake

It appears the CUDA CMake module (over which I do not have any control) passes gcc flags to nvcc; considering I’m not doing anything other than trying to build the simplest CUDA project possible, this appears to be a bug.

Any helps or suggestions to work around or solve the above problem is greatly appreciated.