Nvcc(cuda 11.6) compiled failed: __hmax undefined

basi info:
NVIDIA-SMI 510.73.08
Driver Version: 510.73.08
CUDA Version: 11.6
Compute Capability 750

cmd:
nvcc -o show_cuda_arch show_cuda_arch.cu -arch=compute_75

code:

#include <cstdio>
#include "cuda_fp16.h"
#define STR_HELPER(x) #x
#define STR(x) STR_HELPER(x)

__device__ void print_arch(){
  const char my_compile_time_ver[] = STR(__CUDACC_VER_MINOR__);
  const char my_compile_time_arch[] = STR(__CUDA_ARCH__);
  printf("__CUDA_ARCH__: %s, __CUDACC_VER_MINOR__:%s\n", my_compile_time_arch, my_compile_time_ver);
  const __half a = 0.2;
  const __half b = 0.3;
  auto c = __hmax(a, b);
  printf("c:%f\n", c);
}
__global__ void example()
{
   print_arch();
}

int main(){

  example<<<1,1>>>();
  cudaDeviceSynchronize();
}

It would be helpful to see the actual compile output. I don’t think there is any problem with your use of __hmax() itself. However this:

auto c = __hmax(a, b);

will create a variable c of type half. You cannot print a half variable this way:

printf("c:%f\n", c);

I would do this:

printf("c:%f\n", __half2float(c));

When I make that change, I don’t have any trouble compiling your code.

1 Like

I follow your update, but the same compile error happened:
error: identifier “__hmax” is undefined

It compile successfully with “-arch=compute_80”, but it run without any out put

I’m not sure what is happening then. I don’t have any trouble with it:

# cat t14.cu
#include <cstdio>
#include "cuda_fp16.h"
#define STR_HELPER(x) #x
#define STR(x) STR_HELPER(x)

__device__ void print_arch(){
  const char my_compile_time_ver[] = STR(__CUDACC_VER_MINOR__);
  const char my_compile_time_arch[] = STR(__CUDA_ARCH__);
  printf("__CUDA_ARCH__: %s, __CUDACC_VER_MINOR__:%s\n", my_compile_time_arch, my_compile_time_ver);
  const __half a = 0.2;
  const __half b = 0.3;
  auto c = __hmax(a, b);
  printf("c:%f\n", __half2float(c));
}
__global__ void example()
{
   print_arch();
}

int main(){

  example<<<1,1>>>();
  cudaDeviceSynchronize();
}
# nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Jul_11_02:20:44_PDT_2023
Cuda compilation tools, release 12.2, V12.2.128
Build cuda_12.2.r12.2/compiler.33053471_0
# nvcc -o t14 t14.cu -arch=compute_75
# ./t14
__CUDA_ARCH__: 750, __CUDACC_VER_MINOR__:2
c:0.300049
#

If you compile for compute_80 but attempt to run on a compute_75 device, it won’t run correctly. That is a basic CUDA principle. So I’m not surprised you don’t get output there. Regarding the compile issue, it’s possible you have a broken CUDA install of some sort.

My guess is that you are issuing your compile command incorrectly. My suggestion would be that you provide an exact copy-paste of your console output, duplicating exactly the method I have shown. This is the second time now I’ve asked to see the exact output. I won’t respond further without a complete test case, just as I have given you. Good luck.

1 Like

I’m sorry about that.

cat t14.cu
#include <cstdio>
#include "cuda_fp16.h"
#define STR_HELPER(x) #x
#define STR(x) STR_HELPER(x)

__device__ void print_arch(){
  const char my_compile_time_ver[] = STR(__CUDACC_VER_MINOR__);
  const char my_compile_time_arch[] = STR(__CUDA_ARCH__);
  printf("__CUDA_ARCH__: %s, __CUDACC_VER_MINOR__:%s\n", my_compile_time_arch, my_compile_time_ver);
  const __half a = 0.2;
  const __half b = 0.3;
  auto c = __hmax(a, b);
  printf("c:%f\n", __half2float(c));
}
__global__ void example()
{
   print_arch();
}

int main(){

  example<<<1,1>>>();
  cudaDeviceSynchronize();
}

nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_Mar__8_18:18:20_PST_2022
Cuda compilation tools, release 11.6, V11.6.124
Build cuda_11.6.r11.6/compiler.31057947_0

nvcc -o t14 t14.cu -arch=compute_75
t14.cu(12): error: identifier "__hmax" is undefined

1 error detected in the compilation of "t14.cu".

what if you change this:

auto c = __hmax(a, b);

to this:

__half c = __hmax(a, b);

is the problem the same?

Also, assuming you have a standard linux install of CUDA, could you run this command:

grep hmax /usr/local/cuda/include/cuda_fp16.h

and paste the output here.

OK I have reproduced the issue on CUDA 11.6

I suggest you move forward to CUDA 11.7 or newer.

ok. thanks

after I install cuda 11.7 it work right for __half c = __hmax(a, b);

but new issue come.

#nvidia-smi --query-gpu=compute_cap --format=csv 
Failed to initialize NVML: Driver/library version mismatch

I have fixed them.Thank you!

but __hmax_nan get the same eror in cuda 11.7.1 and cuda 11.8.0

# cat t14.cu 
#include <cstdio>
#include "cuda_fp16.h"
#define STR_HELPER(x) #x
#define STR(x) STR_HELPER(x)

__device__ void print_arch(){
  const char my_compile_time_ver[] = STR(__CUDACC_VER_MINOR__);
  const char my_compile_time_arch[] = STR(__CUDA_ARCH__);
  printf("__CUDA_ARCH__: %s, __CUDACC_VER_MINOR__:%s\n", my_compile_time_arch, my_compile_time_ver);
  const __half a = 0.2;
  const __half b = 0.3;
  __half c = __hmax_nan(a, b);
  printf("c:%f\n", __half2float(c));
}
__global__ void example()
{
   print_arch();
}

int main(){

  example<<<1,1>>>();
  cudaDeviceSynchronize();
}
# nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
# nvcc -o t14 t14.cu 
t14.cu(12): error: identifier "__hmax_nan" is undefined

1 error detected in the compilation of "t14.cu".

apparently __hmax_nan() support was introduced in a later version of CUDA. Try updating to the latest CUDA 12.2.1

1 Like

ok.thank you again

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.