I am having a problem with the nppiResizeBatch function. When I run my code (please see below) I get the exception “./UtilNPP/ImageAllocatorsNPP.h:114: cudaSuccess == eResult assertion faild!”. When I run the non batch version instead (which is currently commented out in the code below) everything works fine. I am using CUDA 10.2 and Ubuntu 16.04. Thank you very much for your help :-)
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
# define WINDOWS_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
# pragma warning(disable:4819)
#endif
#include <ImagesCPU.h>
#include <ImagesNPP.h>
#include <ImageIO.h>
#include <Exceptions.h>
#include <string.h>
#include <fstream>
#include <iostream>
#include <cuda_runtime.h>
#include <npp.h>
#include <helper_string.h>
#include <helper_cuda.h>
int main(int argc, char *argv[])
{
printf("%s Starting...\n\n", argv[0]);
try
{
std::string sFilename, sResultFilename;
sFilename = "Lena.pgm";
sResultFilename = "Lena_resizeTest.pgm";
// declare a host image object for an 8-bit grayscale image
npp::ImageCPU_8u_C1 oHostSrc1, oHostSrc2;
// load gray-scale image from disk
npp::loadImage(sFilename, oHostSrc1);
npp::loadImage(sFilename, oHostSrc2);
// declare a device image and copy construct from the host image,
// i.e. upload host to device
npp::ImageNPP_8u_C1 oDeviceSrc1(oHostSrc1);
npp::ImageNPP_8u_C1 oDeviceSrc2(oHostSrc2);
NppiSize oSrcSize = {(int)oDeviceSrc1.width(), (int)oDeviceSrc1.height()};
NppiRect oSrcROI = {0, 0, int(oDeviceSrc1.width()), int(oDeviceSrc1.height())};
// allocate device image of appropriately reduced size
npp::ImageNPP_8u_C1 oDeviceDst1(int(oSrcROI.width/2), int(oSrcROI.height/2));
npp::ImageNPP_8u_C1 oDeviceDst2(int(oSrcROI.width/2), int(oSrcROI.height/2));
NppiSize oDstSize = {int(oDeviceDst1.width()), int(oDeviceDst1.height())};
NppiRect oDstROI = {0, 0, int(oDeviceDst1.width()), int(oDeviceDst1.height())};
//const void *pSrc1, *pSrc2;
const Npp8u *pSrc1, *pSrc2;
pSrc1 = oDeviceSrc1.data();
pSrc2 = oDeviceSrc2.data();
int nSrcStep = oDeviceSrc1.pitch();
//void *pDst1, *pDst2;
Npp8u *pDst1, *pDst2;
pDst1 = oDeviceDst1.data();
pDst2 = oDeviceDst2.data();
int nDstStep = oDeviceDst1.pitch();
//NppiResizeBatchCXR *pBatchList = new(NppiResizeBatchCXR[2]);
NppiResizeBatchCXR pBatchList[2];
pBatchList[0].pSrc = pSrc1;
pBatchList[0].nSrcStep = nSrcStep;
pBatchList[0].pDst = pDst1;
pBatchList[0].nDstStep = nDstStep;
pBatchList[1].pSrc = pSrc2;
pBatchList[1].nSrcStep = nSrcStep;
pBatchList[1].pDst = pDst2;
pBatchList[1].nDstStep = nDstStep;
cudaDeviceSynchronize();
NPP_CHECK_NPP(nppiResizeBatch_8u_C1R(oSrcSize, //NppiSize oSmallestSrcSize,
oSrcROI, //NppiRect oSrcRectROI,
oDstSize, //NppiSize oSmallestDstSize,
oDstROI, //NppiRect oDstRectROI,
NPPI_INTER_NN, //int eInterpolation,
pBatchList, //NppiResizeBatchCXR * pBatchList,
2 //unsigned int nBatchSize
));
cudaDeviceSynchronize();
/* NPP_CHECK_NPP(nppiResize_8u_C1R(pSrc1, //const Npp8u* pSrc,
nSrcStep, //int nSrcStep,
oSrcSize, //NppiSize oSrcSize,
oSrcROI, //NppiRect oSrcRectROI,
pDst1, //Npp8u * pDst,
nDstStep, //int nDstStep,
oDstSize, //NppiSize oDstSize,
oDstROI, //NppiRect oDstRectROI,
NPPI_INTER_NN //int eInterpolation
));*/
// declare a host image for the result
npp::ImageCPU_8u_C1 oHostDst1(oDeviceDst1.size());
// and copy the device result data into it
oDeviceDst1.copyTo(oHostDst1.data(), oHostDst1.pitch());
saveImage(sResultFilename, oHostDst1);
std::cout << "Saved image: " << sResultFilename << std::endl;
nppiFree(oDeviceSrc1.data());
nppiFree(oDeviceSrc2.data());
nppiFree(oDeviceDst1.data());
nppiFree(oDeviceDst2.data());
exit(EXIT_SUCCESS);
}
catch (npp::Exception &rException)
{
std::cerr << "Program error! The following exception occurred: \n";
std::cerr << rException << std::endl;
std::cerr << "Aborting." << std::endl;
exit(EXIT_FAILURE);
}
catch (...)
{
std::cerr << "Program error! An unknow type of exception occurred. \n";
std::cerr << "Aborting." << std::endl;
exit(EXIT_FAILURE);
return -1;
}
return 0;
}