Hello,
I’m trying to successfully combine NPP which is a CUDA lib with OpenACC to boost some operations on GPU. I use NPP to allocate memory on device and to compute some operations, then I have a for loop surrounded by a pragma data clause to tell the compiler that these pointers (pSrc and pDst) are allocated on device memory, as the following code shows:
#include <npp.h>
int main(int argc, char* argv[]){
NppiSize imageSize = {2048, 2048};
int stepSrc = 0;
int stepDst = 0;
Npp32f *pSrc = nppiMalloc_32f_C1(imageSize.width, imageSize.height, &stepSrc);
Npp32f *pDst = nppiMalloc_32f_C1(imageSize.width, imageSize.height, &stepDst);
nppiSet_32f_C1R((Npp32f) 1.0f, pSrc, stepSrc, imageSize);
float alpha = 5.0f;
float bias = 1.0f;
#pragma acc data deviceptr(pSrc, pDst)
{
#pragma acc parallel loop
for (int j = 0; j < imageSize.height; ++j)
{
for (int i = 0; i < imageSize.width; ++i)
{
pDst[j*stepSrc/sizeof(Npp32f) + i] = pSrc[j*stepDst/sizeof(Npp32f) + i] * alpha + bias;
}
}
}
nppiFree(pSrc);
nppiFree(pDst);
}
When I try to compile the code with
nvcc saxpy.cpp -ccbin=pgc++ -Xcompiler " -Minfo=accel -ta=tesla:cuda8.0 -Mcuda=8.0 " -I/opt/tools/cuda-8.0/include -o saxpy -L/opt/tools/cuda-8.0/lib64 -lnppc -lnppi
It shows the following error of the linking phase:
nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
main:
19, Accelerator kernel generated
Generating Tesla code
21, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
23, #pragma acc loop seq
23, Complex loop carried dependence of pDst->,pSrc-> prevents parallelization
Loop carried reuse of pDst-> prevents parallelization
nvlink warning : SM Arch ('sm_20') not found in '/tmp/tmpxft_000012bb_00000000-4_saxpy.o'
/tmp/tmpxft_000012bb_00000000-6_saxpy_dlink.o: In function `__cudaRegisterLinkedBinary_9saxpy_cpp':
/tmp/tmpxft_000012bb_00000000-2_saxpy_dlink.reg.c:2: undefined reference to `__fatbinwrap_9saxpy_cpp'
pgacclnk: child process exit status 1: /bin/ld
Thanks for your help,
Manuel.