Starting with CUDA 12.4, nvcc can't deduce a template type in template function under weird conditions

The following code compiles with nvcc 12.3 and older

Does not compile under nvcc 12.4 and 12.5

This issue affects the OpenSource project: morousg/cvGPUSpeedup: A faster implementation of OpenCV-CUDA that uses OpenCV objects, and more! (

Platforms: Windows 11 23H2, Ubuntu 22.04, Compiler Explorer (Compiler Explorer)

With only one of the 4 changes proposed in the code example, the code compiles. None of them should be necessary.


// Why does this code not compile?
#include <type_traits>

// There are 4 ways of making it compile
// 1) Compile with nvcc 12.3 or older

template <bool results>
constexpr bool variable = results;

template <typename T>
constexpr bool getValue(const T& value) {
    return true;

struct SomeType {
    int member;

constexpr bool function1() {
    constexpr SomeType test{ 1 };

    // 2) Replace this return with: 
    // return variable<getValue<SomeType>(test)>;
    // 3) Replace this return with: 
    // return variable<getValue(5)>;
    return variable<getValue(test)>;

constexpr bool function2() {
    // 4) Replace this retur with:
    // return true;
    return variable<true>;

int main() {
    return 0;
1 Like

More info. This is the error message from nvcc 12.4:

#$ _NVVM_BRANCH_=nvvm
#$ _SPACE_= 
#$ _CUDART_=cudart
#$ _HERE_=/opt/compiler-explorer/cuda/12.4.1/bin
#$ _THERE_=/opt/compiler-explorer/cuda/12.4.1/bin
#$ _TARGET_DIR_=targets/x86_64-linux
#$ TOP=/opt/compiler-explorer/cuda/12.4.1/bin/..
#$ NVVMIR_LIBRARY_DIR=/opt/compiler-explorer/cuda/12.4.1/bin/../nvvm/libdevice
#$ LD_LIBRARY_PATH=/opt/compiler-explorer/cuda/12.4.1/bin/../lib:/opt/compiler-explorer/cuda/12.4.1/lib:/opt/compiler-explorer/cuda/12.4.1/lib32:/opt/compiler-explorer/cuda/12.4.1/lib64
#$ PATH=/opt/compiler-explorer/cuda/12.4.1/bin/../nvvm/bin:/opt/compiler-explorer/cuda/12.4.1/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin
#$ INCLUDES="-I/opt/compiler-explorer/cuda/12.4.1/bin/../targets/x86_64-linux/include"  
#$ LIBRARIES=  "-L/opt/compiler-explorer/cuda/12.4.1/bin/../targets/x86_64-linux/lib/stubs" "-L/opt/compiler-explorer/cuda/12.4.1/bin/../targets/x86_64-linux/lib"
#$ "/opt/compiler-explorer/gcc-10.2.0/bin"/gcc -D__CUDA_ARCH_LIST__=890 -D__NV_LEGACY_LAUNCH -E -x c++ -D__CUDACC__ -D__NVCC__  -S "-I/opt/compiler-explorer/cuda/12.4.1/bin/../targets/x86_64-linux/include"    -D__CUDACC_VER_MAJOR__=12 -D__CUDACC_VER_MINOR__=4 -D__CUDACC_VER_BUILD__=131 -D__CUDA_API_VER_MAJOR__=12 -D__CUDA_API_VER_MINOR__=4 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 -g "<source>" -o "/app/example.cpp4.ii" 
#$ cudafe++ --c++14 --gnu_version=100200 --display_error_number --orig_src_file_name "<source>" --orig_src_path_name "<source>" --allow_managed --keep-device-functions  --m64 --parse_templates --gen_c_file_name "<source>dafe1.cpp" --stub_file_name "example.cudafe1.stub.c" --gen_module_id_file --module_id_file_name "/app/example.module_id" "/app/example.cpp4.ii" 
#$ "/opt/compiler-explorer/gcc-10.2.0/bin"/gcc -D__CUDA_ARCH__=890 -D__CUDA_ARCH_LIST__=890 -D__NV_LEGACY_LAUNCH -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__  -S "-I/opt/compiler-explorer/cuda/12.4.1/bin/../targets/x86_64-linux/include"    -D__CUDACC_VER_MAJOR__=12 -D__CUDACC_VER_MINOR__=4 -D__CUDACC_VER_BUILD__=131 -D__CUDA_API_VER_MAJOR__=12 -D__CUDA_API_VER_MINOR__=4 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 -g "<source>" -o "/app/example.cpp1.ii" 
#$ cicc --c++14 --gnu_version=100200 --display_error_number --orig_src_file_name "<source>" --orig_src_path_name "<source>" --allow_managed --keep-device-functions   -arch compute_89 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "example.fatbin.c" -generate-line-info -tused --module_id_file_name "/app/example.module_id" --gen_c_file_name "<source>dafe1.c" --stub_file_name "<source>dafe1.stub.c" --gen_device_file_name "<source>dafe1.gpu"  "/app/example.cpp1.ii" -o "/app/example.ptx"
#$ ptxas -arch=sm_89 -m64 -v  --generate-line-info "/app/example.ptx"  -o "<source>bin" 
ptxas info    : 0 bytes gmem
#$ fatbinary --create="/app/example.fatbin" -64 --ident="<source>" --cmdline="-v  --generate-line-info " --cicc-cmdline="-generate-line-info -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=89,file=<source>bin" --embedded-fatbin="/app/example.fatbin.c" 
#$ "/opt/compiler-explorer/gcc-10.2.0/bin"/gcc -D__CUDA_ARCH__=890 -D__CUDA_ARCH_LIST__=890 -D__NV_LEGACY_LAUNCH -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -S -Wno-psabi "-I/opt/compiler-explorer/cuda/12.4.1/bin/../targets/x86_64-linux/include"   -m64 -g "<source>dafe1.cpp" -o "/app/output.s" 
<source>: In function 'constexpr bool function2()':
<source>:34:30: error: no matching function for call to 'getValue(<brace-enclosed initializer list>)'
   34 |     return variable<true>;
      |                              ^
<source>:11:1: note: candidate: 'template<class T> constexpr bool getValue(const T&)'
   11 | constexpr bool getValue(const T& value) {
      | ^~~~~~~~
<source>:11:1: note:   template argument deduction/substitution failed:
<source>:34:30: note:   couldn't deduce template parameter 'T'
   34 |     return variable<true>;
      |                              ^
<source>:34:8: error: template argument 1 is invalid
   34 |     return variable<true>;
      |        ^~~~~~~~~~~~~~~~~~~     
# --error 0x1 --
Compiler returned: 1

I suggest filing a bug.

1 Like

I already did via partners portal. Bug ID: 4717351

Should I do it also via the link you provided?


No, your bug ID 4717351 should be sufficient.

BUG 4717351 is resolved and verified on this case . This will be fixed in a future CUDA release .

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.