There are different output data of rotate function between nppiRotate(Npp of cuda) and ippiRotate(Ipp of intel) ?

nnTai · November 14, 2016, 4:48am

hello everyone, please help me!
I have problem about convert ippiRotate_32f_C1R function(using Ipp library of intel) to nppiRotate_32f_C1R function(using Npp library of Cuda).
Two function read same input data, but output data are different at fist row.
With config:
int angle = -90;
double xShift = lSrcYLength - 1;
double yShift = 0;
Input data:
pflSrc[64] = {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16,
17, 18, 19, 20,
21, 22, 23, 24
}
Output data:ippiRotate_32f_C1R
pflRes[64] = {
19, 13, 7, 1,
20, 14, 8, 2,
21, 15, 9, 3,
22, 16, 10, 4,
23, 17, 11, 5,
24, 18, 12, 6
}
Output data: nppiRotate_32f_C1R
pflResCuda[6*4] = {
0, 0, 0, 1,
20, 14, 8, 2,
21, 15, 9, 3,
22, 16, 10, 4,
23, 17, 11, 5,
24, 18, 12, 6
}
I had to try test with different config to give same output data, but only angle = -90 if wrong output data.
I use cuda version 6.0 and intel parallel studio 2011.
Thanks for help,
TaiNN

Refer to source code as bellow:

file main.cpp

#include "stdio.h"
#include "stdlib.h"
#include <cufft.h>
#include <cuda.h>
#include <string.h>

#include "C:\Program Files (x86)\Intel\Parallel Studio 2011\Composer\ipp\include\ipp.h" 
#include "C:\Program Files (x86)\Intel\Parallel Studio 2011\Composer\ipp\include\ippi.h" 
#include "C:\Program Files (x86)\Intel\Parallel Studio 2011\Composer\ipp\include\ippm.h" 

#include "headerFile.h"

int  Rotate_1Slice(
		float*        pflSrc,
		float*	      pflRes, 
		long          lSrcXLength,
		long          lSrcYLength
		);

int main()
{
	long          lSrcXLength = 6;
	long          lSrcYLength = 4;
	int size = lSrcXLength * lSrcYLength;

	//init input data
	float		  pflSrc[6*4] = {
					     1, 2, 3, 4,
					     5, 6, 7, 8,
					     9, 10, 11, 12,
					     13, 14, 15, 16,
					     17, 18, 19, 20,
					     21, 22, 23, 24
					};

	float		  pflRes[6*4] = {0};
	
	float		  h_pflSrcCuda[6*4] = {0};
	float		  h_pflResCuda[6*4] = {0};
	
	memcpy(h_pflSrcCuda, pflSrc, size*sizeof(float));

	printf("-----Input Matrix: pflSrc-----\n");
	show(pflSrc, lSrcXLength, lSrcYLength);
	
	//--------------------Ipp of intel-----------------------
	Rotate_1Slice
		(
		pflSrc,
		pflRes, 
		lSrcXLength,
		lSrcYLength
		);
	printf("-----Result ippiRotate_32f_C1R function(Ipp of intel)-----\n");
	show(pflRes, lSrcXLength, lSrcYLength);

	//--------------------Cuda--------------------	
	Rotate_1SliceCuda(
		h_pflSrcCuda,
		h_pflResCuda, 
		lSrcXLength,
		lSrcYLength
		);

	printf("-----Result nppiRotate_32f_C1R function(Npp of Cuda)-----\n");
	show(h_pflResCuda, lSrcXLength, lSrcYLength);

	printf("-----Compare data-------\n");
	if(CompareMatrix(pflRes, h_pflResCuda,size))
	{
		printf("Difference data!\n");
	}
	else
	{
		printf("Same data!\n");
	}

	return 0;
}

 int  Rotate_1Slice(
		float*	      pflSrc,
		float*	      pflRes, 
		long          lSrcXLength,
		long          lSrcYLength
		)
{

	IppStatus ippStatus;
	
	int    angle  = -90;
	double xShift = lSrcYLength - 1;
	double yShift = 0;
	

	IppiRect srcROI = { 0 , 0 , lSrcXLength , lSrcYLength };
	IppiRect resROI = { 0 , 0 , lSrcYLength , lSrcXLength };
	
	IppiSize size = {lSrcXLength,lSrcYLength};
	int sliceSize = lSrcXLength * lSrcYLength;

	ippStatus = ippiRotate_32f_C1R( pflSrc , 
									size , 
									lSrcXLength * sizeof(float) ,
									srcROI ,
									pflRes,
									lSrcYLength * sizeof(float) ,
									resROI ,
									angle,
									xShift , yShift ,
									IPPI_INTER_NN );

	if(ippStatus != ippStsNoErr)
	{
		printf("Error! call ipp function\n");
	}

	return 0;
}

 //Utilities function
 bool CompareMatrix(float *ori, float *mod, int size)
{
	bool result = false;;
	for(int i = 0; i < size; i++)
	{
		if(ori[i] != mod[i])
		{
			printf("Difference pixel %0.1f <-> %0.1f\n", ori[i], mod[i] );
			result = true;
		}
	}
	return result;
}

 void show(float *data, int row, int col)
{
	for(int i = 0; i < row; i++)
	{
		for(int j = 0; j < col; j++)
		{
			printf("%0.1f	",data[j + i*col] );
		}
		printf("\n");
	}
}

file Rotate_1SliceCuda.cu

#include <cuda_runtime_api.h>
#include <cuda.h>
#include <nppi.h>
#include<stdio.h>

#include "headerFile.h"

int Rotate_1SliceCuda
(
		float*		  pflSrcCuda,
		float*		  pflResCuda, 
		long          lSrcXLength,
		long          lSrcYLength
)
{
	NppStatus status;
	
	int sizeByte = lSrcXLength * lSrcYLength * sizeof(float);

	float *dev_pflSrc = NULL;
	float *dev_pflRes = NULL;

	cudaMalloc((void**)&dev_pflSrc, sizeByte);
	cudaMalloc((void**)&dev_pflRes, sizeByte);

	/* Transfer data from host to drive */
	cudaMemcpy(dev_pflSrc, pflSrcCuda, sizeByte, cudaMemcpyHostToDevice);
	cudaMemcpy(dev_pflRes,  pflResCuda, sizeByte, cudaMemcpyHostToDevice);

	int    angle  = -90;
	double xShift = lSrcYLength - 1;
	double yShift = 0;

	NppiRect srcROI = { 0 , 0 , lSrcXLength , lSrcYLength };
	NppiRect resROI = { 0 , 0 , lSrcYLength , lSrcXLength };
	
	NppiSize size = {lSrcXLength,lSrcYLength};

	status = nppiRotate_32f_C1R( dev_pflSrc , 
									size , 
									lSrcXLength * sizeof(float) ,
									srcROI ,
									dev_pflRes ,
									lSrcYLength * sizeof(float) ,
									resROI ,
									angle,
									xShift , yShift ,
									NPPI_INTER_NN );
	if(status != 0)
	{
		printf("Error! call nppi function\n");
	}
	
	/* Transfer data to host*/
	cudaMemcpy(pflResCuda, dev_pflRes, sizeByte, cudaMemcpyDeviceToHost);

	cudaFree(dev_pflSrc);
	cudaFree(dev_pflRes);

	return 0;
}

file HeaderFile.h

int Rotate_1SliceCuda
(
		float*		  pflSrcCuda,
		float*		  pflResCuda, 
		long          lSrcXLength,
		long          lSrcYLength
);

//Utilities function
void testRoate();
void show(float *data, int row, int col);
bool CompareMatrix(float *ori, float *mod, int size);

nnTai · November 15, 2016, 2:54am

hello everyone,
I have description problem in picture. Let image with size 4 x 4. refer link:[url]http://www.upsieutoc.com/image/3UyZQ[/url]
Result nppiRotate_32f_C1R function isn’t correct.
I think, the problem can be bug of Npp library. please help me.

Thanks,

njuffa · November 15, 2016, 8:07am

I am not familiar with this NPP functionality. I believe in general it is a correct assumption that NPP functions are supposed to be equivalent to the NPP functions of the same (except for the prefix) name. If you are using CUDA 8.0 and are quite certain that the problem is with NPP rather than your code (a common mistake when using NPP is an incorrect specification of the ROI), you should feel free to file a bug with NVIDIA. The bug reporting form is linked from the CUDA registered developer website.

TamNT27 · November 15, 2016, 2:00pm

@njuffa: Your suggestion seems to be good :D.

However, I’ve tried to produce output with CUDA 8.0. The result is same as CUDA6.0.
[url]https://goo.gl/photos/Dxd2F3PfLfjipGsi8[/url]

njuffa · November 15, 2016, 4:43pm

The reason I referenced CUDA 8.0 is because bug reports are only of interest if they are against the latest version. I did not mean to imply that the behavior is different between CUDA versions. IF you use CUDA 8.0 AND you experience broken code THEN file a bug report.

nnTai · November 16, 2016, 4:39am

Thank njuffa in advance, and support TamNT27.
I wait for answer to correct this problem.
Supporter of nvidia in here? please help me.

HannesF99 · November 17, 2016, 11:05am

I would not expect that the NPP and IPP rotation routines give exactly the same result (especially on the border). The same applies for resize routines and warping routines (either NPP or NPP). Reasons ?
Minor things willchange the results slightly. Like the used border mode. Like where the people which implemented the routine are putting the center of the coordinate system (is it the center of the upperleft pixel or the upperleft corner of the upperleft pixel ? ). Like the rounding mode (up/down/nearest). For bicubic interpolation (which you do not use), there is another factor as there are different flavours of bicubic interpolation (catmull-rom, mitchell-netravali, …).

Actually, even when only one library like the IPP, on using the IPP routine we noticed slight differences in the result of the ‘resize’ routine from one IPP version (6.1) to a newer one (8.2).

So for all kinds of geometric operations you shouldn’t expect a 100% identical result.

Furthermore, i would recommend to use bilinear instead of nearest-neighbor interpolation.
And if you are doing a 90 degree rotate always - that is i think a transposition and can be done in a more direct way.

nnTai · November 18, 2016, 9:12am

Thanks HannesF99 for your suggest,
In here only change position in image, not value.So I think it give exactly the same result.
Example: let image with size = 4 x 4,
to calculate rotates an image around (0, 0) by specified angle(-90) + shifts it(3, 0)
input=
{
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16
}

Following theory about rotate + ship:
output =
{
13, 9, 5, 1,
14, 10, 6, 2,
15, 11, 7, 3,
16, 12, 8, 4
}
Using ippiRotate_32f_C1R function(Ipp library of intel), there is output data is correct as same theory.
Using NppiRotate_32f_C1R function(Npp library of CUDA), there is output data isn’t correct.
output =
{
aa, aa, aa, 1,
14, 10, 6, 2,
15, 11, 7, 3,
16, 12, 8, 4
}
position have value = aa are wrong value.
I think logic of rotate (NPP library) wrong.
Thank,

Topic		Replies	Views
NPP library functions nppiResize_8U_C3R and nppiBGRToLab_8u_C3R differ from cv::resize() output General	10	4759	October 12, 2021
Issues with nppiMean_StdDev_32f from the NPP library GPU-Accelerated Libraries	15	3339	October 31, 2017
Nvidia Primitive functions for rotating an image Jetson TX2 cuda	4	1071	October 18, 2021
Using multiple streams in npp GPU-Accelerated Libraries npp	0	1011	January 25, 2022
Very poor performance with NPP CrossCorrValid GPU-Accelerated Libraries npp	8	3274	May 25, 2022
nppiRotate_8u_C1R and NPP_STEP_ERROR CUDA Programming and Performance	10	5595	March 8, 2012
Problem when using NPP libirary, nppiMinIndx_32f_C1R() GPU-Accelerated Libraries	8	1451	July 31, 2018
Using nppiResizeBatch_8u_C3R causes exception wrap illegal address GPU-Accelerated Libraries npp	3	800	August 24, 2022
[closed]nppiRemap_8u_C3R function GPU-Accelerated Libraries	5	1891	June 11, 2016
nppiGetPerspectiveTransform() bug Problem with nppiGetPerspectiveTransform() library function CUDA Programming and Performance	9	2192	May 23, 2012

There are different output data of rotate function between nppiRotate(Npp of cuda) and ippiRotate(Ipp of intel) ?

Related topics