NPPI Label MakersUF Return Incorrect results in Cuda 11.4

Hi there.
Seems there is a bug in the nppi library nppiLabelMarkersUF_16u32u_C1R function.
The following example demonstrates the bug.

I’m using nppiLabelMarkersUF_16u32u_C1R (connectivity is 4 nppiNormL1 )and nppiCompressMarkerLabelsUF_32u_C1IR functions to mark the following matrix:

0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0
0 1 1 1 1 1 0 0
0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0

The calculations return incorrect results:

0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0
0 2 1 1 1 1 0 0
0 2 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0

There should be only one marker for the initial matrix, but there are two.

Can someone confirm whether this is known issue, and whether there is a workaround for that?
Thank you in advance.

#include <stdio.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <npp.h>
#define WIDTH 8
#define HEIGHT 6
void my_print_16u(Npp16u* data, int w, int h) {
    for (int i = 0; i < h; i++)
    {
        for (int j = 0; j < w; j++)
        {
            printf("%3hd", data[i * w + j]);
        }
        printf("\n");
    }

}
void my_print_32u(Npp32u* data, int w, int h) {
    for (int i = 0; i < h; i++)
    {
        for (int j = 0; j < w; j++)
        {
            printf("%3hd", data[i * w + j]);
        }
        printf("\n");
    }

}

int main() {
    Npp16u host_src[WIDTH * HEIGHT] =
      {0,0,0,0,0,0,0,0,
       0,0,0,0,0,1,0,0,
       0,1,1,1,1,1,0,0,
       0,1,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0
       };

    Npp16u* device_src;
    
    cudaMalloc((void**)&device_src, sizeof(Npp16u) * WIDTH * HEIGHT);
    cudaMemcpy(device_src, host_src, sizeof(Npp16u) * WIDTH * HEIGHT, cudaMemcpyHostToDevice);

    int buffer_size;
    NppiSize source_roi = { WIDTH, HEIGHT };
    NppStatus e = nppiLabelMarkersUFGetBufferSize_32u_C1R(source_roi, &buffer_size);
    assert(e == NPP_NO_ERROR);
    Npp8u* buffer;
    cudaMalloc((void**)&buffer, buffer_size);

    Npp32u* Label_Markers;
    cudaMalloc((void**)&Label_Markers, sizeof(Npp32u) * WIDTH * HEIGHT);

    e = nppiLabelMarkersUF_16u32u_C1R(device_src, sizeof(Npp16u) * WIDTH, Label_Markers, sizeof(Npp32u) * WIDTH, source_roi, nppiNormL1, buffer);
    assert(e == NPP_NO_ERROR);

    int bs;
    int StartingNumber = WIDTH * HEIGHT;
    int NewNumber =0;
    e = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(StartingNumber, &bs);
    assert(e == NPP_NO_ERROR);
    if (bs > buffer_size) {
        buffer_size = bs;
        cudaFree(buffer);
        cudaMalloc(&buffer, buffer_size);
    }
    e = nppiCompressMarkerLabelsUF_32u_C1IR(Label_Markers, sizeof(Npp32u) * WIDTH, source_roi, StartingNumber, &NewNumber, buffer);
    assert(e == NPP_NO_ERROR);

    Npp32u* dst = new Npp32u[WIDTH * HEIGHT];
    cudaMemcpy(dst, Label_Markers, sizeof(Npp32u) * WIDTH * HEIGHT, cudaMemcpyDeviceToHost);
    printf("******INPUT************\n");
    my_print_16u(host_src, WIDTH, HEIGHT);
    printf("******OUTPUT************\n");
    my_print_32u(dst, WIDTH, HEIGHT);

}

LabelMarkersAndLabelCompressionNPP.cpp (2.2 KB)
Out_Log.log (361 Bytes)

1 Like

Hi,

I observed the same problem as you did: Resulting labels of function nppiLabelMarkersUF_8u32u_C1R_Ctx seperate connected component?

Have you already reported the bug? How to report a bug