Hi,
I have written codes to resize a RGB24 image to smaller using nppiResizeSqrPixel_8u_C3R like below.
I run the blow code to resize RGB24(1280x720) to half size(scalerFactor==0.5f)
I have a good result I wish but it takes too long time to get the result than ffmpeg scaler.
npp resizer : 248.52 ms (exclude time to run cudaMemcpy,cudaMalloc)
ffmpeg scaler : 4.25 ms
How to improve npp resize function performance?
/////////////////////////////////////////////////////////
cudaMalloc( (void**)&devSrc,nSrcSize);
cudaMalloc( (void**)&devDst,nDstSize);
QueryPerformanceCounter(&swStart);
cudaMemcpy((void*)devSrc,(void*)dc.GetImageData(),nSrcSize,cudaMemcpyHostToDevice);
NppiSize oSrcSize; oSrcSize.width = nSrcW; oSrcSize.height = nSrcH;
NppiRect oSrcROI = {0,0,nSrcW,nSrcH};
NppiRect oDstROI = {0,0,nDstW,nDstH};
nppiResizeSqrPixel_8u_C3R(devSrc, //RGB24 image data
oSrcSize,
nSrcW3, // stride
oSrcROI,
devDst,
nDstW3,
oDstROI,
nScaleFactor, // nXFactor
nScaleFactor, // nYFactor
0, // nXShift
0, // nYShift
NPPI_INTER_LINEAR
);
cudaMemcpy((void*)hostDst,(void*)devDst,nDstSize,cudaMemcpyDeviceToHost);
QueryPerformanceCounter(&swEnd);
fTimeElapsed = ((swEnd.QuadPart-swStart.QuadPart)/(float)swFreq.QuadPart)*1000;
printf(“image Npp scaling completed!! elapsed time = %f ms \n”,fTimeElapsed);
cudaFree(devSrc);
cudaFree(devDst);
/////////////////////////////////////////////////