hello everyone, please help me!
I have problem about convert ippiRotate_32f_C1R function(using Ipp library of intel) to nppiRotate_32f_C1R function(using Npp library of Cuda).
Two function read same input data, but output data are different at fist row.
With config:
int angle = -90;
double xShift = lSrcYLength - 1;
double yShift = 0;
Input data:
pflSrc[64] = {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16,
17, 18, 19, 20,
21, 22, 23, 24
}
Output data:ippiRotate_32f_C1R
pflRes[64] = {
19, 13, 7, 1,
20, 14, 8, 2,
21, 15, 9, 3,
22, 16, 10, 4,
23, 17, 11, 5,
24, 18, 12, 6
}
Output data: nppiRotate_32f_C1R
pflResCuda[6*4] = {
0, 0, 0, 1,
20, 14, 8, 2,
21, 15, 9, 3,
22, 16, 10, 4,
23, 17, 11, 5,
24, 18, 12, 6
}
I had to try test with different config to give same output data, but only angle = -90 if wrong output data.
I use cuda version 6.0 and intel parallel studio 2011.
Thanks for help,
TaiNN
Refer to source code as bellow:
- file main.cpp
#include "stdio.h"
#include "stdlib.h"
#include <cufft.h>
#include <cuda.h>
#include <string.h>
#include "C:\Program Files (x86)\Intel\Parallel Studio 2011\Composer\ipp\include\ipp.h"
#include "C:\Program Files (x86)\Intel\Parallel Studio 2011\Composer\ipp\include\ippi.h"
#include "C:\Program Files (x86)\Intel\Parallel Studio 2011\Composer\ipp\include\ippm.h"
#include "headerFile.h"
int Rotate_1Slice(
float* pflSrc,
float* pflRes,
long lSrcXLength,
long lSrcYLength
);
int main()
{
long lSrcXLength = 6;
long lSrcYLength = 4;
int size = lSrcXLength * lSrcYLength;
//init input data
float pflSrc[6*4] = {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16,
17, 18, 19, 20,
21, 22, 23, 24
};
float pflRes[6*4] = {0};
float h_pflSrcCuda[6*4] = {0};
float h_pflResCuda[6*4] = {0};
memcpy(h_pflSrcCuda, pflSrc, size*sizeof(float));
printf("-----Input Matrix: pflSrc-----\n");
show(pflSrc, lSrcXLength, lSrcYLength);
//--------------------Ipp of intel-----------------------
Rotate_1Slice
(
pflSrc,
pflRes,
lSrcXLength,
lSrcYLength
);
printf("-----Result ippiRotate_32f_C1R function(Ipp of intel)-----\n");
show(pflRes, lSrcXLength, lSrcYLength);
//--------------------Cuda--------------------
Rotate_1SliceCuda(
h_pflSrcCuda,
h_pflResCuda,
lSrcXLength,
lSrcYLength
);
printf("-----Result nppiRotate_32f_C1R function(Npp of Cuda)-----\n");
show(h_pflResCuda, lSrcXLength, lSrcYLength);
printf("-----Compare data-------\n");
if(CompareMatrix(pflRes, h_pflResCuda,size))
{
printf("Difference data!\n");
}
else
{
printf("Same data!\n");
}
return 0;
}
int Rotate_1Slice(
float* pflSrc,
float* pflRes,
long lSrcXLength,
long lSrcYLength
)
{
IppStatus ippStatus;
int angle = -90;
double xShift = lSrcYLength - 1;
double yShift = 0;
IppiRect srcROI = { 0 , 0 , lSrcXLength , lSrcYLength };
IppiRect resROI = { 0 , 0 , lSrcYLength , lSrcXLength };
IppiSize size = {lSrcXLength,lSrcYLength};
int sliceSize = lSrcXLength * lSrcYLength;
ippStatus = ippiRotate_32f_C1R( pflSrc ,
size ,
lSrcXLength * sizeof(float) ,
srcROI ,
pflRes,
lSrcYLength * sizeof(float) ,
resROI ,
angle,
xShift , yShift ,
IPPI_INTER_NN );
if(ippStatus != ippStsNoErr)
{
printf("Error! call ipp function\n");
}
return 0;
}
//Utilities function
bool CompareMatrix(float *ori, float *mod, int size)
{
bool result = false;;
for(int i = 0; i < size; i++)
{
if(ori[i] != mod[i])
{
printf("Difference pixel %0.1f <-> %0.1f\n", ori[i], mod[i] );
result = true;
}
}
return result;
}
void show(float *data, int row, int col)
{
for(int i = 0; i < row; i++)
{
for(int j = 0; j < col; j++)
{
printf("%0.1f ",data[j + i*col] );
}
printf("\n");
}
}
- file Rotate_1SliceCuda.cu
#include <cuda_runtime_api.h>
#include <cuda.h>
#include <nppi.h>
#include<stdio.h>
#include "headerFile.h"
int Rotate_1SliceCuda
(
float* pflSrcCuda,
float* pflResCuda,
long lSrcXLength,
long lSrcYLength
)
{
NppStatus status;
int sizeByte = lSrcXLength * lSrcYLength * sizeof(float);
float *dev_pflSrc = NULL;
float *dev_pflRes = NULL;
cudaMalloc((void**)&dev_pflSrc, sizeByte);
cudaMalloc((void**)&dev_pflRes, sizeByte);
/* Transfer data from host to drive */
cudaMemcpy(dev_pflSrc, pflSrcCuda, sizeByte, cudaMemcpyHostToDevice);
cudaMemcpy(dev_pflRes, pflResCuda, sizeByte, cudaMemcpyHostToDevice);
int angle = -90;
double xShift = lSrcYLength - 1;
double yShift = 0;
NppiRect srcROI = { 0 , 0 , lSrcXLength , lSrcYLength };
NppiRect resROI = { 0 , 0 , lSrcYLength , lSrcXLength };
NppiSize size = {lSrcXLength,lSrcYLength};
status = nppiRotate_32f_C1R( dev_pflSrc ,
size ,
lSrcXLength * sizeof(float) ,
srcROI ,
dev_pflRes ,
lSrcYLength * sizeof(float) ,
resROI ,
angle,
xShift , yShift ,
NPPI_INTER_NN );
if(status != 0)
{
printf("Error! call nppi function\n");
}
/* Transfer data to host*/
cudaMemcpy(pflResCuda, dev_pflRes, sizeByte, cudaMemcpyDeviceToHost);
cudaFree(dev_pflSrc);
cudaFree(dev_pflRes);
return 0;
}
- file HeaderFile.h
int Rotate_1SliceCuda
(
float* pflSrcCuda,
float* pflResCuda,
long lSrcXLength,
long lSrcYLength
);
//Utilities function
void testRoate();
void show(float *data, int row, int col);
bool CompareMatrix(float *ori, float *mod, int size);