Compilation error when using Cuda HPC package + cmake

When I am compiling my Cmake project using Cuda HPC I am facing cuda files compilation error:

/opt/nvidia/hpc_sdk/Linux_x86_64/22.9//cuda/bin/nvcc -forward-unknown-to-host-compiler -DBOOST_ALL_NO_LIB -DBOOST_CHRONO_DYN_LINK -DBOOST_DATE_TIME_DYN_LINK -DBOOST_FILESYSTEM_DYN_LINK -DBOOST_REGEX_DYN_LINK -DBOOST_SYSTEM_DYN_LINK -DBOOST_THREAD_DYN_LINK -DBOOST_TIMER_DYN_LINK -DGIT_DATE="\"Tue Mar 14 13:03:23 2023\"" -DGIT_SHA1=\"98411eb\" -DGIT_TAG=\"\" -DUSE_CUTENSOR -DUSE_ENABLE_KD_BFLOAT=true -DUSE_GPU -DUSE_GPUG80 -DUSE_NVHPC -I/usr/local/include/opencv4 -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/KsKd -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/Freq2FreqTransfer -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/CsiFirstStage -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/CsiSecondStage -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/CsiRunner -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/CsiInitialization -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/common -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/BoundaryConditions -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/Antenna -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/DataReceiver -I/home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/DataExporter -I/home/Yehonatans/work/nlirRef/nlircloud/src/algorithms/SigProc/Volume -I/home/Yehonatans/work/nlirRef/nlircloud/src/utils/general -I/home/Yehonatans/work/nlirRef/nlircloud/src/utils/threads -I/home/Yehonatans/work/nlirRef/nlircloud/src/NVHPC::MATH -I/home/Yehonatans/work/nlirRef/nlircloud/src/NVHPC::CUDA -I/home/Yehonatans/work/nlirRef/nlircloud/src/NVHPC::NVRTC -I/home/Yehonatans/work/nlirRef/nlircloud/src/NVHPC::CUDART -I/home/Yehonatans/work/nlirRef/nlircloud/src/utils/hpc -isystem=/usr/local/include -isystem=/usr/local/include/eigen3 -isystem=/opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cmake/../math_libs/11.7/include -isystem=/opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cmake/../cuda/11.7/include -isystem=/opt/nvidia/hpc_sdk/Linux_x86_64/22.9/comm_libs/openmpi/openmpi-3.1.5/include -isystem=/opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cmake/../comm_libs/11.7/nccl/include -isystem=/usr/include --expt-extended-lambda --default-stream per-thread -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --generate-code=arch=compute_80,code=[compute_80,sm_80] -std=c++17 -MD -MT ImageRec/common/CMakeFiles/nlir_common.dir/commonKernel.cu.o -MF ImageRec/common/CMakeFiles/nlir_common.dir/commonKernel.cu.o.d -x cu -dc /home/Yehonatans/work/nlirRef/nlircloud/src/ImageRec/common/commonKernel.cu -o ImageRec/common/CMakeFiles/nlir_common.dir/commonKernel.cu.o
In file included from /opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cmake/../cuda/11.7/include/crt/math_functions.h:10545,
                 from /opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cmake/../cuda/11.7/include/crt/common_functions.h:303,
                 from /opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cmake/../cuda/11.7/include/cuda_runtime.h:115,
                 from <command-line>:

The root cause for the error is the command “-isystem=/usr/include” which is being added automatically when running the cmake file.
When I manually compiled the file without -isystem=/usr/include the compilation was Ok

An example for broken compilation line

cd /home/Yehonatans/work/Ref/cloud/src/build/ImageRec/common && /opt/nvidia/hpc_sdk/Linux_x86_64/23.3//cuda/bin/nvcc -forward-unknown-to-host-compiler -DBOOST_ALL_NO_LIB -DBOOST_CHRONO_DYN_LINK -DBOOST_DATE_TIME_DYN_LINK -DBOOST_FILESYSTEM_DYN_LINK -DBOOST_REGEX_DYN_LINK -DBOOST_SYSTEM_DYN_LINK -DBOOST_THREAD_DYN_LINK -DBOOST_TIMER_DYN_LINK -DGIT_DATE="\"Tue Mar 14 13:03:23 2023\"" -DGIT_SHA1=\"98411eb\" -DGIT_TAG=\"\" -DUSE_CUTENSOR -DUSE_ENABLE_KD_BFLOAT=true -DUSE_GPU -DUSE_GPUG80 -DUSE_NVHPC -I/usr/local/include/opencv4 -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/KsKd -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/Freq2FreqTransfer -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/CsiFirstStage -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/CsiSecondStage -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/CsiRunner -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/CsiInitialization -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/common -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/BoundaryConditions -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/Antenna -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/DataReceiver -I/home/Yehonatans/work/Ref/cloud/src/ImageRec/DataExporter -I/home/Yehonatans/work/Ref/cloud/src/algorithms/SigProc/Volume -I/home/Yehonatans/work/Ref/cloud/src/utils/general -I/home/Yehonatans/work/Ref/cloud/src/utils/threads -I/home/Yehonatans/work/Ref/cloud/src/NVHPC::MATH -I/home/Yehonatans/work/Ref/cloud/src/NVHPC::CUDA -I/home/Yehonatans/work/Ref/cloud/src/NVHPC::NVRTC -I/home/Yehonatans/work/Ref/cloud/src/NVHPC::CUDART -I/home/Yehonatans/work/Ref/cloud/src/utils/hpc -isystem=/usr/local/include -isystem=/usr/local/include/eigen3 -isystem=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cmake/../math_libs/11.8/include -isystem=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cmake/../cuda/11.8/include -isystem=/opt/nvidia/hpc_sdk/Linux_x86_64/23.3/cmake/../comm_libs/11.8/nccl/include -isystem=/usr/include --expt-extended-lambda --default-stream per-thread -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --generate-code=arch=compute_80,code=[compute_80,sm_80] -Xcompiler -pthread -std=c++17 -MD -MT ImageRec/common/CMakeFiles/_common.dir/commonKernel.cu.o -MF CMakeFiles/_common.dir/commonKernel.cu.o.d -x cu -dc /home/Yehonatans/work/Ref/cloud/src/ImageRec/common/commonKernel.cu -o CMakeFiles/_common.dir/commonKernel.cu.o

The main Cmake file is:

#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -wl,--stack,32000")
cmake_minimum_required(VERSION 3.17)

### Require out-of-source builds
file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH)
if(EXISTS "${LOC_PATH}")
 message(FATAL_ERROR "You cannot build in a source directory (or any directory with a CMakeLists.txt file). Please make a build subdirectory. Feel free to remove CMakeCache.txt and CMakeFiles.")
endif()

 message("Cmake version ${CMAKE_VERSION}")
IF(NVHPC_ENABLE)
 message("NVHPC_ENABLE is active")

 add_definitions("-DUSE_NVHPC")
ENDIF()

set(CMAKE_CUDA_ARCHITECTURES  80)
set(CMAKE_CUDA_COMPILER   "$ENV{NVHPC_PATH}//cuda/bin/nvcc")
set(CMAKE_CUDA_COMPILER_FORCED TRUE)
project(Proj C CXX CUDA)

set(CUTLASS_ENABLE "ON")
set(NVHPC_ENABLE "ON")
add_definitions("-DUSE_NVHPC")
add_definitions("-DUSE_GPU")
set(CUDA_ENABLE "ON")
set(ALGS_USE_EM "ON")
add_definitions("-DUSE_ENABLE_KD_BFLOAT=true")
add_definitions("-DUSE_GPUG80")


set(CMAKE_VERBOSE_MAKEFILE OFF)

set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "$ENV{NVHPC_PATH}/cmake" )
set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "$ENV{NVHPC_PATH}/cutlass/lib64/cmake" )
find_package(MPI  REQUIRED )
find_package(NVHPC REQUIRED COMPONENTS CUDA MATH  NCCL)

#find_package(NvidiaCutlass REQUIRED nvidia::cutlass::library)
ADD_LIBRARY(utils_cuda INTERFACE)
TARGET_INCLUDE_DIRECTORIES(utils_cuda INTERFACE NVHPC::MATH NVHPC::CUDA  NVHPC::NVRTC NVHPC::CUDART)
TARGET_LINK_DIRECTORIES(utils_cuda INTERFACE NVHPC::MATH NVHPC::CUDA NVHPC::NVRTC NVHPC::CUDART)
TARGET_LINK_LIBRARIES(utils_cuda INTERFACE NVHPC::MATH NVHPC::CUDA NVHPC::NVRTC NVHPC::CUDART)
IF(DBG MATCHES "ON")
 set(CMAKE_BUILD_TYPE Debug) # Release | Debug
ELSEIF(DBG MATCHES "OFF")
 set(CMAKE_BUILD_TYPE Release) # Release | Debug
ENDIF(DBG MATCHES "ON")



message("CUDA INCLUDE: ${CUDAToolkit_INCLUDE_DIRS}")
INCLUDE_DIRECTORIES(${CUDAToolkit_INCLUDE_DIRS})
set(CMAKE_CUDA_FLAGS "--expt-extended-lambda --default-stream per-thread -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored")
#set(CMAKE_CUDA_FLAGS "--expt-extended-lambda ")



#set(NvidiaCutlass_DIR "/usr/local/cuda/lib64/cmake")
#find_package(NvidiaCutlass)
message(STATUS "Cuda root dir: ${CUDAToolkit_LIBRARY_ROOT}")

set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

set(UTILS ${CMAKE_SOURCE_DIR}/utils)
message( "UTILS path  ${UTILS}")


#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -wl,--stack,32000")
cmake_minimum_required(VERSION 3.17)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)

# Retrieve information about current git branch into environment var
find_package(Git)
if (Git_FOUND)
  message("Git found: ${GIT_EXECUTABLE}")

  message(STATUS "Resolving GIT Version")

  # the commit's SHA1, and whether the building workspace was dirty or not
  execute_process(COMMAND
    "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always #--abbrev=8
    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
    OUTPUT_VARIABLE GIT_SHA1
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)


  execute_process(COMMAND
    "${GIT_EXECUTABLE}" describe --tag #--abbrev=8
    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
    OUTPUT_VARIABLE GIT_TAG
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

  # the date of the commit
  execute_process(COMMAND
    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
    OUTPUT_VARIABLE GIT_DATE
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

  # the subject of the commit
  execute_process(COMMAND
    "${GIT_EXECUTABLE}" log -1 --format=%s
    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
    OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)


  message(STATUS "GIT_COMMIT_SUBJECT=" ${GIT_COMMIT_SUBJECT})
  message(STATUS "GIT_SHA1=" ${GIT_SHA1})
  message(STATUS "GIT_DATE=" ${GIT_DATE})
  message(STATUS "GIT_TAG=" ${GIT_TAG})

  #add_definitions("-DGIT_COMMIT_SUBJECT=\"${GIT_COMMIT_SUBJECT}\"")
  add_definitions("-DGIT_SHA1=\"${GIT_SHA1}\"")
  add_definitions("-DGIT_DATE=\"${GIT_DATE}\"")
  add_definitions("-DGIT_TAG=\"${GIT_TAG}\"")
else ()
  message(SEND_ERROR "Could not find git package")
endif ()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -pedantic -fopenmp")

#set(SANITIZE "ADDRESS_LEAK")
#set(SANITIZE  "THREADS")
#set(SANITIZE  "LEAK")
IF (CMAKE_BUILD_TYPE MATCHES Debug)
  IF (SANITIZE MATCHES "ADDRESS_LEAK")
    message("Sanitize address...")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize=leak -fno-omit-frame-pointer")
  ELSEIF (SANITIZE MATCHES "ADDRESS")
    message("Sanitize address...")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address  -fno-omit-frame-pointer")
  ELSEIF (SANITIZE MATCHES "THREADS")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread  -fno-omit-frame-pointer")
  ELSEIF (SANITIZE MATCHES "LEAK")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=leak  -fno-omit-frame-pointer")
  ENDIF ()
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
ENDIF ()

message("-- CMAKE_BUILD_TYPE = " ${CMAKE_BUILD_TYPE})

#set(BOOST_ROOT /usr/local/)
#set(BOOST_INCLUDEDIR /usr/local/include/)
#set(BOOST_LIBRARYDIR /usr/local/lib64/)
#set(Boost_NO_SYSTEM_PATHS TRUE)

FIND_PACKAGE(Boost 1.72.0 COMPONENTS thread system filesystem date_time timer chrono regex REQUIRED)
INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR})
LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
# OpenCV library
FIND_PACKAGE(OpenCV 4.1 REQUIRED)
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
LINK_DIRECTORIES(${OpenCV_LIBS})
message(STATUS "OpenCV include=" ${OpenCV_INCLUDE_DIRS})
message(STATUS "OpenCV libs=" ${OpenCV_LIBS})
message(STATUS "CMAKE_DL_LIBS=" ${CMAKE_DL_LIBS})
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_LIBS ON)
set(Boost_USE_STATIC_RUNTIME ON)



find_package(GTest REQUIRED)

find_package(Eigen3 3.3 REQUIRED NO_MODULE)
include_directories(${EIGEN3_INCLUDE_DIR})

find_package(HDF5 REQUIRED)


# Set the path of the utils directory
set(UTILS ${PROJECT_SOURCE_DIR}/utils)
set(ALGORITHMS ${PROJECT_SOURCE_DIR}/algorithms)
set(INFRA ${PROJECT_SOURCE_DIR}/infra)

# Initiate compilation of submodules
# inside a hosting repository

message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}$")

ADD_SUBDIRECTORY(${UTILS})
ADD_SUBDIRECTORY(${ALGORITHMS})
ADD_SUBDIRECTORY(${INFRA})

System setup
OS Rocky 8.6/8.7
NVHPC versions 22.9,22.11,23.3 (I tried all this version using cuda 11.8 or cuda 11.7)
Cmake version 3.20 and 3.26
What is wrong with my camke and should I fix it?

Do you have the full output from the error or is this all that was printed?

Typically system include directories are implicitly included so it’s unclear why explicitly adding it via the -isystem flag would cause an error.

Hi
In addition to that the full error report was added.
When I compiled over another machine which has functional NVHPC package I couldn’t find at cmake artifacts any command -isystem=/usr/include or -isystem /usr/include

In addition to that When I am compile the code using Cuda which was install using standard install there was not any compilation issue

My theory would be that there’s an incompatibility between the system header files and the host compiler that nvcc is using (likely g++ in this case, but I’m not sure). For example with nvc++, we sometime need to provide our own modified version of a system header file to work around some issue, which in turn use “include_next” to then load the actual system include. If “-I/usr/include” is added before the our includes paths, this causes the system include to be loaded before ours, and hence the work around is not applied.

Though given the information you’ve provided is incomplete, I can’t be sure this is correct. Besides the command line and the “In file included …” headers, does the output show more information, in particular the error message as to why the compilation failed?

Which more information should I provide in order to help the analysis process?
And which modifications should I done at my cmake files to enable successful compilation?

The full text from the output from the failing compilation line.

And which modifications should I done at my cmake files to enable successful compilation?

I’m not an expert in using cmake, so don’t know but likely it’s being include from one of the INCLUDE_DIRECTORIES. Right now I’m just gathering information so I can pass it on to another engineer who may know more.

I am attach a log file of Cmake and build commands.
Please inform if there is need for further information.
CompilationFile.log (66.0 KB)

Thanks, here’s the error in your log:

/usr/include/c++/8/cmath:45:15: fatal error: math.h: No such file or directory
 #include_next <math.h>
               ^~~~~~~~
compilation terminated.

Which I can reproduce with this simple example:

% cat test.cpp
#include <cmath>
% g++ -c test.cpp -isystem /usr/include
In file included from test.cpp:1:
/usr/include/c++/8/cmath:45:15: fatal error: math.h: No such file or directory
 #include_next <math.h>
               ^~~~~~~~
compilation terminated.

Likely there’s an ordering problem when putting the “/usr/include” path before the path to the C++ headers.

I’m asking our cmake experts here what could be adding this flag.

Hi,
There is any update about this issue?

Yes, sorry. I asked our expert in using CMake who said:

That directory should be added by CMake to CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES which would mean that it is dropped from the compile line even when requested by the user.
E.g:
cmake_minimum_required(VERSION 3.12.1)
project(nvcc_reproducer LANGUAGES C CXX CUDA)

add_executable(verify main.cu)
target_include_directories(verify SYSTEM PRIVATE /usr/include)
Generates the compile line:
/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/compilers/bin/nvcc -x cu -c /host_pwd/temp/main.cu -o CMakeFiles/verify.dir/main.cu.o

He suggests running “cmake --trace” and grepping the full output to find which sub project is adding it to the compile line. He’s guessing that it may be coming from MPI.

He also requests the output from the “cmake --system-information”

Hello,
Please see the attached log files for further analysis
Regards
Yehonatan
CmakeInfo (54.9 KB)
CmakeOutput (258.2 KB)

Hello,
There are any new updates?

I pinged my colleague who sent the following:

A documented CMakeForceCompiler — CMake 3.26.4 Documentation the forcing of a compiler is deprecated and shouldn’t be used.

When the compiler detection is forced for CUDA, the compiler detection stops before the extraction of the implicit includes and link directories and therefore you get a broken CMake configuration.

You need to remove the force lines and you will need to use the nvcc that is located in /opt/nvidia/hpc_sdk/Linux_x86_64/22.9/compilers/bin/ and not the one in
/opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cuda/bin/ due to an issue with CMake not properly handling paths like /opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cuda/bin/../targets/x86_64-linux/lib where the relative bin/../ cross a symlink and therefore the naive collapsing to bin/ is invalid.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.