error: identifier "__hadd2" is undefined

Hi,

I’m trying to compile the following program on our DGX2 machine using the PGI compiler 19.10.
We want to use the half2 vector datatype, but it does not compile:

main.cu(16): error: identifier "__hadd2" is undefined

Is something wrong in our cluster/makefile setup?

Thank you for your help

main.cpp

#include <iostream>
#include <cuda_fp16.h>
using namespace std;

__global__
void halfTest(int N, const half *x, half *y)
{
   int start = threadIdx.x + blockDim.x * blockIdx.x;
   int stride= blockDim.x * gridDim.x;
   int n2 = N/2;
   half2 *x2 = (half2*)x;
   half2 *y2 = (half2*)y;

   for(int i=start;i<n2;i+=stride)
     y2[i] = __hadd2(x2[i], y2[i]);

}

int main()
{
    cout << "Programstart.\n";
    const int N = 1e6;
    half *x = new half[N];
    half *y = new half[N];
    for(int i=0;i<N;i++)
    {
      x[i] = 1.0;
      y[i] = 0.0;
    }

    #pragma acc data copyin(x[:N],y[:N]) copyout(x[:N],y[:N])
    {
     #pragma host_data use_device(x,y)
     {
        halfTest<<<1,1>>>(N,x,y);
     }
    }


    return 0;
}

makefile:

ARCH = -ta=tesla:cc70
CC   = mpicc
CCU  = nvcc -ccbin=mpic++

RM   = /bin/rm
PROG = run

OBJS = main.o
OPTS =  ${ARCH} -acc -Minfo=accel -Minfo -Mcuda

%.o : %.c
        ${CC}  ${OPTS} -c ${CFLAGS} $<
%.o : %.cu
        ${CCU} -Xcompiler "${OPTS}" -c ${CUFLAGS} $<

all : ${PROG}
${PROG} : ${OBJS}
        mpic++ ${OPTS} -o $@ ${OBJS} ${LDFLAGS} ${CFPMODEL} ${libs}

clean :
        ${RM} -f ${PROG} *.o *~

Hi Peter,

I believe nvcc defaults to targeting older devices that don’t support half precision. Try setting the gpu architecture to CC70.

% nvcc main.cu
main.cu(15): error: identifier "__hadd2" is undefined

1 error detected in the compilation of "/tmp/tmpxft_000020b0_00000000-8_main.cpp1.ii".
% nvcc main.cu --gpu-architecture=compute_70
%

Hope this helps,
Mat

Thank you for your answer. It worked.