NPPI Label MakersUF Return Incorrect results in Cuda 11.4

Hi there.
Seems there is a bug in the nppi library nppiLabelMarkersUF_16u32u_C1R function.
The following example demonstrates the bug.

I’m using nppiLabelMarkersUF_16u32u_C1R (connectivity is 4 nppiNormL1 )and nppiCompressMarkerLabelsUF_32u_C1IR functions to mark the following matrix:

0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0
0 1 1 1 1 1 0 0
0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0

The calculations return incorrect results:

0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0
0 2 1 1 1 1 0 0
0 2 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0

There should be only one marker for the initial matrix, but there are two.

Can someone confirm whether this is known issue, and whether there is a workaround for that?
Thank you in advance.

#include <stdio.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <npp.h>
#define WIDTH 8
#define HEIGHT 6
void my_print_16u(Npp16u* data, int w, int h) {
    for (int i = 0; i < h; i++)
    {
        for (int j = 0; j < w; j++)
        {
            printf("%3hd", data[i * w + j]);
        }
        printf("\n");
    }

}
void my_print_32u(Npp32u* data, int w, int h) {
    for (int i = 0; i < h; i++)
    {
        for (int j = 0; j < w; j++)
        {
            printf("%3hd", data[i * w + j]);
        }
        printf("\n");
    }

}

int main() {
    Npp16u host_src[WIDTH * HEIGHT] =
      {0,0,0,0,0,0,0,0,
       0,0,0,0,0,1,0,0,
       0,1,1,1,1,1,0,0,
       0,1,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0
       };

    Npp16u* device_src;
    
    cudaMalloc((void**)&device_src, sizeof(Npp16u) * WIDTH * HEIGHT);
    cudaMemcpy(device_src, host_src, sizeof(Npp16u) * WIDTH * HEIGHT, cudaMemcpyHostToDevice);

    int buffer_size;
    NppiSize source_roi = { WIDTH, HEIGHT };
    NppStatus e = nppiLabelMarkersUFGetBufferSize_32u_C1R(source_roi, &buffer_size);
    assert(e == NPP_NO_ERROR);
    Npp8u* buffer;
    cudaMalloc((void**)&buffer, buffer_size);

    Npp32u* Label_Markers;
    cudaMalloc((void**)&Label_Markers, sizeof(Npp32u) * WIDTH * HEIGHT);

    e = nppiLabelMarkersUF_16u32u_C1R(device_src, sizeof(Npp16u) * WIDTH, Label_Markers, sizeof(Npp32u) * WIDTH, source_roi, nppiNormL1, buffer);
    assert(e == NPP_NO_ERROR);

    int bs;
    int StartingNumber = WIDTH * HEIGHT;
    int NewNumber =0;
    e = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(StartingNumber, &bs);
    assert(e == NPP_NO_ERROR);
    if (bs > buffer_size) {
        buffer_size = bs;
        cudaFree(buffer);
        cudaMalloc(&buffer, buffer_size);
    }
    e = nppiCompressMarkerLabelsUF_32u_C1IR(Label_Markers, sizeof(Npp32u) * WIDTH, source_roi, StartingNumber, &NewNumber, buffer);
    assert(e == NPP_NO_ERROR);

    Npp32u* dst = new Npp32u[WIDTH * HEIGHT];
    cudaMemcpy(dst, Label_Markers, sizeof(Npp32u) * WIDTH * HEIGHT, cudaMemcpyDeviceToHost);
    printf("******INPUT************\n");
    my_print_16u(host_src, WIDTH, HEIGHT);
    printf("******OUTPUT************\n");
    my_print_32u(dst, WIDTH, HEIGHT);

}

LabelMarkersAndLabelCompressionNPP.cpp (2.2 KB)
Out_Log.log (361 Bytes)

2 Likes

Hi,

I observed the same problem as you did: Resulting labels of function nppiLabelMarkersUF_8u32u_C1R_Ctx seperate connected component?

Have you already reported the bug? How to report a bug

I’m having a similar problem,.
Run the batchedLabelMarkersAndCompressionNPP in the cuda sample several times, and get different connected component labeling results.
After investigation, it was found that the problem was caused by the nppiLabelMarkersUFBatch function.

I calculated the correct answer after changing this .
line 74,nppiNormL1----->nppiNormInf
e = nppiLabelMarkersUF_16u32u_C1R(device_src, sizeof(Npp16u) * WIDTH, Label_Markers, sizeof(Npp32u) * WIDTH, source_roi, nppiNormInf, buffer);

1 Like