Hello everyone,
I am trying to use the nppiFilterMedian_32f_C1R from NPP. My code seems to work but when I launch cuda-memcheck on it, there are countless invalid memory access.
I reproduced this behavior on the following sample. Am I doing something wrong ?
EDIT : I am using Cuda 10.1 on Ubuntu 18.04
// includes
#include <npp.h>
#include
#include <cuda.h>
#include <cuda_runtime.h>
// defines
#define WIDTH 1920
#define HEIGHT 1080
#define MEDIAN_SIZE 5
int main()
{
// input data
float* inData;
// output data
float* outData;
size_t pitch;
// Alloc input
cudaError_t err = cudaMallocPitch((void**)&inData, &pitch , WIDTH*sizeof(float) , HEIGHT );
if( err != cudaSuccess)
{
std::cerr << "error allocating input \n";
std::cerr << cudaGetErrorName(err) << std::endl;
return -1;
}
// alloc output
err = cudaMallocPitch((void**)&outData, &pitch, WIDTH*sizeof(float) , HEIGHT);
if( err != cudaSuccess)
{
std::cerr << "error allocating output \n";
std::cerr << cudaGetErrorName(err) << std::endl;
return -1;
}
// Npp ROI
int halfKernelSize = MEDIAN_SIZE / 2;
int left = halfKernelSize;
int right = WIDTH - 1 - halfKernelSize;
int top = halfKernelSize;
int bottom = HEIGHT -1 - halfKernelSize;
NppiSize sizeROI;
sizeROI.width = bottom - top;
sizeROI.height = right - left;
NppiSize size;
size.width = WIDTH;
size.height = HEIGHT;
// npp parameters - anchor
NppiPoint anchor;
anchor.x = halfKernelSize;
anchor.y = halfKernelSize;
// npp parameters - mask
NppiSize maskSize;
maskSize.width = MEDIAN_SIZE;
maskSize.height = MEDIAN_SIZE;
// alloc scratch Memory
Npp8u* scratchMemory;
uint32_t scratchMemorySize;
NppStatus errNpp = nppiFilterMedianGetBufferSize_32f_C1R(size, maskSize, &scratchMemorySize);
if(errNpp != NPP_SUCCESS)
{
std::cerr << "NPP error Median filter BufferSize : " << errNpp << std::endl;
return -1;
}
err = cudaMalloc( (void**)&scratchMemory, scratchMemorySize);
if (err != cudaSuccess) {
std::cerr << "error allocating scratch \n";
std::cerr << cudaGetErrorName(err) << std::endl;
return -1;
}
// NPP launch
// offset because borders are not processed
int offset = top * pitch + left * sizeof(float);
errNpp = nppiFilterMedian_32f_C1R( (Npp32f*) (inData + offset), pitch,
(Npp32f*)( outData + offset), pitch,
sizeROI, maskSize, anchor, scratchMemory );
cudaDeviceSynchronize();
if(errNpp != NPP_SUCCESS)
{
std::cerr << "NPP error Median filter : " << errNpp << std::endl;
return -1;
}
return 0;
}
I will be grateful for the help.