Hi,
I’m trying to compile the following program on our DGX2 machine using the PGI compiler 19.10.
We want to use the half2 vector datatype, but it does not compile:
main.cu(16): error: identifier "__hadd2" is undefined
Is something wrong in our cluster/makefile setup?
Thank you for your help
main.cpp
#include <iostream>
#include <cuda_fp16.h>
using namespace std;
__global__
void halfTest(int N, const half *x, half *y)
{
int start = threadIdx.x + blockDim.x * blockIdx.x;
int stride= blockDim.x * gridDim.x;
int n2 = N/2;
half2 *x2 = (half2*)x;
half2 *y2 = (half2*)y;
for(int i=start;i<n2;i+=stride)
y2[i] = __hadd2(x2[i], y2[i]);
}
int main()
{
cout << "Programstart.\n";
const int N = 1e6;
half *x = new half[N];
half *y = new half[N];
for(int i=0;i<N;i++)
{
x[i] = 1.0;
y[i] = 0.0;
}
#pragma acc data copyin(x[:N],y[:N]) copyout(x[:N],y[:N])
{
#pragma host_data use_device(x,y)
{
halfTest<<<1,1>>>(N,x,y);
}
}
return 0;
}
makefile:
ARCH = -ta=tesla:cc70
CC = mpicc
CCU = nvcc -ccbin=mpic++
RM = /bin/rm
PROG = run
OBJS = main.o
OPTS = ${ARCH} -acc -Minfo=accel -Minfo -Mcuda
%.o : %.c
${CC} ${OPTS} -c ${CFLAGS} $<
%.o : %.cu
${CCU} -Xcompiler "${OPTS}" -c ${CUFLAGS} $<
all : ${PROG}
${PROG} : ${OBJS}
mpic++ ${OPTS} -o $@ ${OBJS} ${LDFLAGS} ${CFPMODEL} ${libs}
clean :
${RM} -f ${PROG} *.o *~