How to use NVCC link NPP?

I am the first day of contact NPP.When I use NVCC to compile my NPP code, the following error appears, what should I do?

cuda Version : 11.1
GPU Arch : 2080 Ti (CC 7.5)
system : ubuntu 18.04

My NVCC instructions:

nvcc -L/usr/local/cuda-11.1/targets/x86_64-linux/lib/ npp_sum.cu -lnppc -lnppial -lnppidei -o npp_sum

error:

/tmp/tmpxft_00004fd8_00000000-11_npp_sum.o: In function `main':
tmpxft_00004fd8_00000000-6_npp_sum.cudafe1.cpp:(.text+0x65): undefined reference to `nppsSet_32f'
tmpxft_00004fd8_00000000-6_npp_sum.cudafe1.cpp:(.text+0x87): undefined reference to `nppsSumGetBufferSize_32f'
tmpxft_00004fd8_00000000-6_npp_sum.cudafe1.cpp:(.text+0xb3): undefined reference to `nppsSum_32f'
collect2: error: ld returned 1 exit status

my code:

#include <cuda_runtime.h>
#include <nppcore.h>
#include <npps_initialization.h>
#include <npps_statistics_functions.h>
#include <stdio.h>

int main()
{
    Npp32f *pSrc;
    Npp32f *pSum;
    Npp8u *pDeviceBuffer;
    int nLength = 1024;

    cudaMalloc((void **)(&pSrc), sizeof(Npp32f) * nLength);
    nppsSet_32f(1.0f, pSrc, nLength); //init function
    cudaMalloc((void **)(&pSum), sizeof(Npp32f) * 1);

    int nBufferSize;
    nppsSumGetBufferSize_32f(nLength, &nBufferSize);
    cudaMalloc((void **)(&pDeviceBuffer), nBufferSize);

    nppsSum_32f(pSrc, nLength, pSum, pDeviceBuffer);
    Npp32f nSumHost;
    cudaMemcpy(&nSumHost, pSum, sizeof(Npp32f) * 1, cudaMemcpyDeviceToHost);
    printf("sum = %f\n", nSumHost);

    cudaFree(pSrc);
    cudaFree(pSum);
    cudaFree(pDeviceBuffer);
    return 0;
}

Thank you very much.

How about
/usr/local/cuda-11.4/bin/nvcc -o out npp.cu -lnppc -lnppial -lnppidei -lnpps

Great, it worked! The reason why I did not succeed is -lnpps?

Perhaps, can I compile NPP_Code through CMake?