nppiFilterGaussBorder_8u_C1R oSrcSize and oSrcOffset parameters

based on your SO posting, and according to my testing, you will get expected output if you pass the actual original image dimensions for oSrcSize. Here is my example:

$ cat t18.cu
#include <npp.h>
#include <nppi.h>
#include <iostream>
const int sz = 32;
int main() {
    Npp8u* src_img = new Npp8u[sz*sz];
    for (int i = 0; i < sz*sz; i++) src_img[i] = (i&1)?80:40;
    Npp8u* cudaMem = nppsMalloc_8u(sz * sz);
    Npp8u* cudaMemDst = nppsMalloc_8u(sz * sz);

    if(cudaMem == nullptr)
    {
        throw std::runtime_error("Error malloc");
    }
    cudaMemcpy(cudaMem, src_img, sz*sz, cudaMemcpyHostToDevice);
    if(cudaMemDst == nullptr)
    {
        throw std::runtime_error("Error malloc dst");
    }
    cudaMemset(cudaMemDst, 0, sz*sz);
    NppiPoint const blurTopLeft{16, 16};       //inclusive
    NppiPoint const blurBottomRight{sz, sz}; //exclusive

    //Source image width and height in pixels relative to pSrc.
//    NppiSize const oSrcSize{sz - blurTopLeft.x, sz - blurTopLeft.y};
    NppiSize const oSrcSize{sz, sz};
    NppiSize const oSizeROI{blurBottomRight.x - blurTopLeft.x, blurBottomRight.y - blurTopLeft.y};

    auto const nppiError{nppiFilterGaussBorder_8u_C1R(
        cudaMem + blurTopLeft.y * sz + blurTopLeft.x,
        sz,            // pitch
        oSrcSize,       // source image width and height in pixels relative to pSrc.
        blurTopLeft,    // aka. oSrcOffset: The pixel offset that pSrc points to relative to the origin of the source image.
        cudaMemDst + blurTopLeft.y * sz + blurTopLeft.x,
        sz,            // dst pitch
        oSizeROI,
        NPP_MASK_SIZE_3_X_3,
        NPP_BORDER_REPLICATE
    )};

    if(nppiError != 0)
    {
        // We get NPP_OUT_OFF_RANGE_ERROR
        std::cerr << "nppiFilterGaussBorder_8u_C1R failed: " << nppiError << std::endl;
    }
    else
    {
        cudaMemcpy(src_img, cudaMemDst, sz*sz, cudaMemcpyDeviceToHost);
        for (int i = 0; i< sz; i++){
          for (int j = 0; j < sz; j++)
            std::cout << (int)(src_img[i*sz+j]) << " ";
          std::cout << std::endl;}
    }
    nppsFree(cudaMem);
    nppsFree(cudaMemDst);
}
$ nvcc -o t18 t18.cu -lnpps -lnppif
$ compute-sanitizer ./t18
========= COMPUTE-SANITIZER
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 58 61 58 61 58 61 58 61 58 61 58 61 58 61 69
========= ERROR SUMMARY: 0 errors
$

No, I don’t have answers for all the questions that engenders. I have found NPP parameter handling to be inscrutable at times. You’re welcome to file a bug to request doc clarification.

1 Like