Problem about nppiFilterMin and nppiFilterMax MaskSize

NppStatus t_NppStatus;
int srcElements = iHeight * iWidth * iChannel;
int dstElements = iHeight * iWidth * iChannel;

// target data on device
unsigned char* dstDevData;
cudaMalloc((void**)&dstDevData, sizeof(Npp8u) * dstElements);
// source images data on device
unsigned char* srcDevData;
cudaMalloc((void**)&srcDevData, sizeof(Npp8u) * srcElements);
cudaMemcpy(srcDevData, pSrcData, sizeof(Npp8u) * srcElements, cudaMemcpyHostToDevice);

int iSrcStep = iWidth * iChannel;
NppiSize oSizeROI;
oSizeROI.width  = iWidth;
oSizeROI.height = iHeight;

NppiSize oMaskSize;
oMaskSize.width  = iMaskSize;
oMaskSize.height = iMaskSize;

NppiPoint oAnchor;
oAnchor.x = iMaskSize / 2;
oAnchor.y = iMaskSize / 2;
if (iChannel == 1)
{
    t_NppStatus = nppiFilterMin_8u_C1R(srcDevData, iSrcStep, dstDevData, iSrcStep, oSizeROI, oMaskSize, oAnchor);
}
else
{
    t_NppStatus = nppiFilterMin_8u_C3R(srcDevData, iSrcStep, dstDevData, iSrcStep, oSizeROI, oMaskSize, oAnchor);
}

cudaMemcpy(pDstData, dstDevData, sizeof(Npp8u) * dstElements, cudaMemcpyDeviceToHost);

cudaFree(srcDevData);
cudaFree(dstDevData);
return t_NppStatus;

When the image height is 14650 the width is 8192 and channel is 3;

When the mask size is 8、10、13、22、23、26、28、31、32、39、40, nppiFilterMin_8u_C3R and nppiFilterMax_8u_C3R returned image is black;

When the mask size is 3、4、5、6、 7、9、11、12、 15,、30, the returned image is normal;

In addition, the anchor point is the center point of the mask;
Where is the problem?
I didn’t find any restrictions on the size of the mask in the document;

Request help

Thanks!

You haven’t properly offset things for the mask. The code must be able to apply the mask on valid/defined pixels. When the anchorpoint is at the center of the mask, this means pixels before the pixel must be defined, and pixels after the pixel must be defined. This is covered in various forum questions here. You’ll need to choose an output ROI smaller than the input image, and offset pointers correctly.

There is still an issue with mask size of 8, however, and that image size. You may wish to file a bug.

First of all, thank you;

I have submitted a detailed file a bug

In addition, I randomly tested two smaller images, with image sizes of [H_958, W_1278] and [H_1216, W_1400], respectively. When the mask size was from 3 to 31, the returned images were all normal;

Yes, I tested 1024x1024 mask size 8, and it worked. So there is something specific to a certain range of settings here. I don’t know what it is. Thanks for filing a bug.

FWIW, here is the test case I used. it does not work for mask size 8, so there is still an issue, but it demonstrates the offsetting I referred to.

Without the offsetting, if you run the code (e.g. your code) under compute-sanitizer with mask size 5, it will still report issues, even though you may think it “works” for mask size 5.

# cat t172.cu
#include <npp.h>
#include <iostream>

NppStatus test(){
int iHeight = 14650; // works with 1024,1024
int iWidth = 8192;
int iChannel = 3;
int iMaskSize = 8; // works with 5,6,7,9
NppStatus t_NppStatus;
int iSrcStep = (iWidth + iMaskSize) * iChannel;
int iDstStep = iWidth*iChannel;
int srcElements = (iHeight + iMaskSize) * iSrcStep;
int dstElements = iHeight * iDstStep;
Npp8u *pSrcData, *pDstData;
pSrcData = new Npp8u[srcElements];
pDstData = new Npp8u[dstElements];
for (int i = 0; i < srcElements; i++) pSrcData[i] = 63;
// target data on device
unsigned char* dstDevData;
cudaMalloc((void**)&dstDevData, sizeof(Npp8u) * dstElements);
cudaMemset(dstDevData, 0, sizeof(Npp8u)*dstElements);
// source images data on device
unsigned char* srcDevData;
cudaMalloc((void**)&srcDevData, sizeof(Npp8u) * srcElements);
cudaMemcpy(srcDevData, pSrcData, sizeof(Npp8u) * srcElements, cudaMemcpyHostToDevice);

NppiSize oSizeROI;
oSizeROI.width  = iWidth;
oSizeROI.height = iHeight;

NppiSize oMaskSize;
oMaskSize.width  = iMaskSize;
oMaskSize.height = iMaskSize;

NppiPoint oAnchor;
oAnchor.x = iMaskSize / 2;
oAnchor.y = iMaskSize / 2;

if (iChannel == 1)
{
    t_NppStatus = nppiFilterMin_8u_C1R(srcDevData+(iMaskSize/2)*(iSrcStep+iChannel), iSrcStep, dstDevData, iDstStep, oSizeROI, oMaskSize, oAnchor);
}
else
{
    t_NppStatus = nppiFilterMin_8u_C3R(srcDevData+(iMaskSize/2)*(iSrcStep+iChannel), iSrcStep, dstDevData, iDstStep, oSizeROI, oMaskSize, oAnchor);
}

cudaMemcpy(pDstData, dstDevData, sizeof(Npp8u) * dstElements, cudaMemcpyDeviceToHost);
for (int i = 0;  i < dstElements; i++) if (pDstData[i] != 63) {std::cout << "mismatch at " << i << " was: " << (int)pDstData[i] << " should be 63." << std::endl; return t_NppStatus;}
cudaFree(srcDevData);
cudaFree(dstDevData);
return t_NppStatus;
}

int main(){
NppStatus s =   test();
std::cout << "npp status = " << (int)s << std::endl;
}
# nvcc -o t172 t172.cu -lnppif
# compute-sanitizer ./t172
========= COMPUTE-SANITIZER
mismatch at 0 was: 0 should be 63.
npp status = 0
========= ERROR SUMMARY: 0 errors
#

Thank you

The internal NVBUG 4595791 (sometimes will be included in CUDA release notes for the correlated lib , users can search the bug ID as a handle) is closed and verified for this fix . The fix will target next CUDA release .

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.