Hi there.
Seems there is a bug in the nppi library nppiLabelMarkersUF_16u32u_C1R function.
The following example demonstrates the bug.
I’m using nppiLabelMarkersUF_16u32u_C1R (connectivity is 4 nppiNormL1 )and nppiCompressMarkerLabelsUF_32u_C1IR functions to mark the following matrix:
0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0
0 1 1 1 1 1 0 0
0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
The calculations return incorrect results:
0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0
0 2 1 1 1 1 0 0
0 2 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
There should be only one marker for the initial matrix, but there are two.
Can someone confirm whether this is known issue, and whether there is a workaround for that?
Thank you in advance.
#include <stdio.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <npp.h>
#define WIDTH 8
#define HEIGHT 6
void my_print_16u(Npp16u* data, int w, int h) {
for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
printf("%3hd", data[i * w + j]);
}
printf("\n");
}
}
void my_print_32u(Npp32u* data, int w, int h) {
for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
printf("%3hd", data[i * w + j]);
}
printf("\n");
}
}
int main() {
Npp16u host_src[WIDTH * HEIGHT] =
{0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,
0,1,1,1,1,1,0,0,
0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0
};
Npp16u* device_src;
cudaMalloc((void**)&device_src, sizeof(Npp16u) * WIDTH * HEIGHT);
cudaMemcpy(device_src, host_src, sizeof(Npp16u) * WIDTH * HEIGHT, cudaMemcpyHostToDevice);
int buffer_size;
NppiSize source_roi = { WIDTH, HEIGHT };
NppStatus e = nppiLabelMarkersUFGetBufferSize_32u_C1R(source_roi, &buffer_size);
assert(e == NPP_NO_ERROR);
Npp8u* buffer;
cudaMalloc((void**)&buffer, buffer_size);
Npp32u* Label_Markers;
cudaMalloc((void**)&Label_Markers, sizeof(Npp32u) * WIDTH * HEIGHT);
e = nppiLabelMarkersUF_16u32u_C1R(device_src, sizeof(Npp16u) * WIDTH, Label_Markers, sizeof(Npp32u) * WIDTH, source_roi, nppiNormL1, buffer);
assert(e == NPP_NO_ERROR);
int bs;
int StartingNumber = WIDTH * HEIGHT;
int NewNumber =0;
e = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(StartingNumber, &bs);
assert(e == NPP_NO_ERROR);
if (bs > buffer_size) {
buffer_size = bs;
cudaFree(buffer);
cudaMalloc(&buffer, buffer_size);
}
e = nppiCompressMarkerLabelsUF_32u_C1IR(Label_Markers, sizeof(Npp32u) * WIDTH, source_roi, StartingNumber, &NewNumber, buffer);
assert(e == NPP_NO_ERROR);
Npp32u* dst = new Npp32u[WIDTH * HEIGHT];
cudaMemcpy(dst, Label_Markers, sizeof(Npp32u) * WIDTH * HEIGHT, cudaMemcpyDeviceToHost);
printf("******INPUT************\n");
my_print_16u(host_src, WIDTH, HEIGHT);
printf("******OUTPUT************\n");
my_print_32u(dst, WIDTH, HEIGHT);
}
LabelMarkersAndLabelCompressionNPP.cpp (2.2 KB)
Out_Log.log (361 Bytes)