Image batch resize with npp

markusbrn · April 2, 2020, 12:43pm

I am having a problem with the nppiResizeBatch function. When I run my code (please see below) I get the exception “./UtilNPP/ImageAllocatorsNPP.h:114: cudaSuccess == eResult assertion faild!”. When I run the non batch version instead (which is currently commented out in the code below) everything works fine. I am using CUDA 10.2 and Ubuntu 16.04. Thank you very much for your help :-)

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#  define WINDOWS_LEAN_AND_MEAN
#  define NOMINMAX
#  include <windows.h>
#  pragma warning(disable:4819)
#endif

#include <ImagesCPU.h>
#include <ImagesNPP.h>
#include <ImageIO.h>
#include <Exceptions.h>

#include <string.h>
#include <fstream>
#include <iostream>

#include <cuda_runtime.h>
#include <npp.h>
#include <helper_string.h>
#include <helper_cuda.h>

int main(int argc, char *argv[])
{
    printf("%s Starting...\n\n", argv[0]);

    try
    {
        std::string sFilename, sResultFilename;

        sFilename = "Lena.pgm";
        sResultFilename = "Lena_resizeTest.pgm";

        // declare a host image object for an 8-bit grayscale image
        npp::ImageCPU_8u_C1 oHostSrc1, oHostSrc2;
        // load gray-scale image from disk
        npp::loadImage(sFilename, oHostSrc1);
        npp::loadImage(sFilename, oHostSrc2);
        // declare a device image and copy construct from the host image,
        // i.e. upload host to device
        npp::ImageNPP_8u_C1 oDeviceSrc1(oHostSrc1);
        npp::ImageNPP_8u_C1 oDeviceSrc2(oHostSrc2);
        NppiSize oSrcSize = {(int)oDeviceSrc1.width(), (int)oDeviceSrc1.height()};
        NppiRect oSrcROI = {0, 0, int(oDeviceSrc1.width()), int(oDeviceSrc1.height())};

        // allocate device image of appropriately reduced size
        npp::ImageNPP_8u_C1 oDeviceDst1(int(oSrcROI.width/2), int(oSrcROI.height/2));
        npp::ImageNPP_8u_C1 oDeviceDst2(int(oSrcROI.width/2), int(oSrcROI.height/2));
        NppiSize oDstSize = {int(oDeviceDst1.width()), int(oDeviceDst1.height())};
        NppiRect oDstROI = {0, 0, int(oDeviceDst1.width()), int(oDeviceDst1.height())};

        //const void *pSrc1, *pSrc2;
        const Npp8u *pSrc1, *pSrc2;
        pSrc1 = oDeviceSrc1.data();
        pSrc2 = oDeviceSrc2.data();

        int nSrcStep = oDeviceSrc1.pitch();

        //void *pDst1, *pDst2;
        Npp8u *pDst1, *pDst2;
        pDst1 = oDeviceDst1.data();
        pDst2 = oDeviceDst2.data();

        int nDstStep = oDeviceDst1.pitch();

        //NppiResizeBatchCXR *pBatchList = new(NppiResizeBatchCXR[2]);
        NppiResizeBatchCXR pBatchList[2];
        pBatchList[0].pSrc = pSrc1;
        pBatchList[0].nSrcStep = nSrcStep;
        pBatchList[0].pDst = pDst1;
        pBatchList[0].nDstStep = nDstStep;
        pBatchList[1].pSrc = pSrc2;
        pBatchList[1].nSrcStep = nSrcStep;
        pBatchList[1].pDst = pDst2;
        pBatchList[1].nDstStep = nDstStep;

        cudaDeviceSynchronize();

        NPP_CHECK_NPP(nppiResizeBatch_8u_C1R(oSrcSize,				//NppiSize 	oSmallestSrcSize,
                                             oSrcROI,				//NppiRect 	oSrcRectROI,
                                             oDstSize,				//NppiSize 	oSmallestDstSize,
                                             oDstROI,				//NppiRect 	oDstRectROI,
                                             NPPI_INTER_NN,			//int	 	eInterpolation,
                                             pBatchList,			//NppiResizeBatchCXR * 	pBatchList,
                                             2					//unsigned int 	nBatchSize 
                                             ));

        cudaDeviceSynchronize();


   /*	NPP_CHECK_NPP(nppiResize_8u_C1R(pSrc1,					//const Npp8u*	pSrc,
                                        nSrcStep,				//int		nSrcStep,
                                        oSrcSize,				//NppiSize	oSrcSize,
                                        oSrcROI,				//NppiRect 	oSrcRectROI,
                                        pDst1,					//Npp8u * 	pDst,
                                        nDstStep,				//int	 	nDstStep,
                                        oDstSize,				//NppiSize 	oDstSize,
                                        oDstROI,				//NppiRect 	oDstRectROI,
                                        NPPI_INTER_NN				//int 	eInterpolation 
                                        ));*/


        // declare a host image for the result
        npp::ImageCPU_8u_C1 oHostDst1(oDeviceDst1.size());
        // and copy the device result data into it
        oDeviceDst1.copyTo(oHostDst1.data(), oHostDst1.pitch());

        saveImage(sResultFilename, oHostDst1);
        std::cout << "Saved image: " << sResultFilename << std::endl;

        nppiFree(oDeviceSrc1.data());
        nppiFree(oDeviceSrc2.data());
        nppiFree(oDeviceDst1.data());
        nppiFree(oDeviceDst2.data());

        exit(EXIT_SUCCESS);
    }
    catch (npp::Exception &rException)
    {
        std::cerr << "Program error! The following exception occurred: \n";
        std::cerr << rException << std::endl;
        std::cerr << "Aborting." << std::endl;

        exit(EXIT_FAILURE);
    }
    catch (...)
    {
        std::cerr << "Program error! An unknow type of exception occurred. \n";
        std::cerr << "Aborting." << std::endl;

        exit(EXIT_FAILURE);
        return -1;
    }

    return 0;
}

czero69 · March 24, 2021, 3:33pm

Quoting the documentation:

The NppiResizeBatchCXR or NppiImageDescriptor and NppiResizeBatchROI_Advanced arrays must be in device memory.

So in your case you need to copy pBatchList array to the device.

And for example, calling nppiResizeBatch_8u_C3R_Advanced_Ctx you need to copy these 3 arrays to the device:

array of input images pointers
array of output crops pointers
array of rois

also, you can check this batched example call in NPP lib

Copying these arrays to the device will solve your problem.

Topic		Replies	Views
Using nppiResizeBatch_8u_C3R causes exception wrap illegal address GPU-Accelerated Libraries npp	3	856	August 24, 2022
npp nppiResize_8u_C1R gives unexpected result GPU-Accelerated Libraries	4	1240	January 1, 2020
problem with NPP image resize function --nppiResize_8u_C1R CUDA Programming and Performance	3	2798	March 16, 2012
nppiResize_8u_C3R function of cuda 10.1 outputs a wrong result GPU-Accelerated Libraries	0	972	August 22, 2019
Nppi resize doesn't work with 1x1px GPU-Accelerated Libraries npp	5	1296	October 12, 2021
NVPP nppiResize return error? NVPP CUDA Programming and Performance	0	5069	May 27, 2010
Using Nvidia NPP to resize image CUDA Programming and Performance	2	2795	March 22, 2018
NVPP nppiResize return error? about calling nvpp. CUDA Programming and Performance	0	2331	May 27, 2010
NPP library functions nppiResize_8U_C3R and nppiBGRToLab_8u_C3R differ from cv::resize() output General	10	5101	October 12, 2021
The NPPIRESIZE function does not output a value in different cases GPU-Accelerated Libraries npp	3	716	August 5, 2023

Image batch resize with npp

Related topics