NPP FilterGauss

Hi,

I try to use CUDA with Qt to blur image.
I use NPP library, nppiFilterGauss_8u_C1R works great , but nppiFilterGaussAdvanced_8u_C1R corrupts images.
I make gaussian kernel with OpenCV.

void cuda_npp_gauss_filter_qt_advanced(uchar* pSourceData, uchar* pResultData, const int &ImageLineStep, const int &ImageWidth, const int &ImageHeight, const int &Radius)
 {
        NppiSize oSizeROI;

        oSizeROI.width = ImageWidth;
        oSizeROI.height = ImageHeight;

        Npp32s SourceStep = ImageLineStep;
        Npp32s DestinationStep = ImageLineStep;

        size_t AllocationSizeInBytes = ImageLineStep * ImageHeight;
        Npp8u *pSource, *pDestination;

        cudaMalloc<Npp8u>(&pSource,AllocationSizeInBytes);
        cudaMalloc<Npp8u>(&pDestination,AllocationSizeInBytes);

        //-------------------------------------------------------

        int nFilterTaps = 2*((int)((float)ceil(Radius) + 0.5F)) + 1;

        //-------------------------------------------------------

        //-------------------------------------------------------
        //-------------- Gaussian kernel ------------------------

        double sigma = 0.3*((nFilterTaps-1)*0.5 - 1) + 0.8;

        cv::Mat GaussianKernel = cv::getGaussianKernel(nFilterTaps, sigma, CV_32F);

        //-------------------------------------------------------
        //-------------------------------------------------------

        cudaMemcpy(pSource, pSourceData, AllocationSizeInBytes, cudaMemcpyHostToDevice);

        nppiFilterGaussAdvanced_8u_C1R (pSource, SourceStep, pDestination, DestinationStep, oSizeROI, nFilterTaps, (Npp32f*)GaussianKernel.data);

        cudaMemcpy(pResultData, pDestination, AllocationSizeInBytes, cudaMemcpyDeviceToHost);
}

GPU GT710
SM 3.5

Xubuntu 16.04.1
Qt 5.7-1
CUDA 8.044
OpenCV 2.4.9
Thanks

There are at least 2 issues in your code as mentioned in the comments on your cross-posting:

http://stackoverflow.com/questions/40950433/cuda-npp-gaussfilter-corrupts-images

  1. You must transfer the filter kernel coefficients to device memory, and pass a device pointer.
  2. You must adjust your ROI size as well as offset your image pointers, so that the gaussian kernel can be applied to the whole ROI, without having any part of the radius go outside the defined image space.

Thanks for help. It works now.

//-------------------------------------------------------
//-------------- Gaussian kernel ------------------------

 double sigma = 0.3*((nFilterTaps-1)*0.5 - 1) + 0.8;

 cv::Mat GaussianKernel = cv::getGaussianKernel(nFilterTaps, sigma, CV_32F);

 Npp32f* pGaussianKernel;

 size_t GaussianKernelBytes = GaussianKernel.step * GaussianKernel.rows;

 cudaMalloc<Npp32f>(&pGaussianKernel, GaussianKernelBytes);

 cudaMemcpy(pGaussianKernel, GaussianKernel.data, GaussianKernelBytes, cudaMemcpyHostToDevice);

//-------------------------------------------------------
//-------------------------------------------------------

cudaMemcpy(pSource, pSourceData, AllocationSizeInBytes, cudaMemcpyHostToDevice);

nppiFilterGaussAdvanced_8u_C1R (pSource, SourceStep, pDestination, DestinationStep, oSizeROI, nFilterTaps, pGaussianKernel);

cudaMemcpy(pResultData, pDestination, AllocationSizeInBytes, cudaMemcpyDeviceToHost);