[closed]nppiRemap_8u_C3R function

vsw · June 5, 2016, 10:13am

Hi,

I want to remap image, here is my code:

int nImageWidth{ 640 };
    int nImageHeight{ 480 };
     int radius{ 240 };

    uint8_t * pOut{ new uint8_t[ nImageWidth * nImageHeight * 3 ] };
    YUV2RGB( pIn, pOut );
    //testsave pOut to bmp image OK


    int dstStepCUDA;
    Npp8u * pDstImgCUDA{ nppiMalloc_8u_C3( 4 * radius, 4 * radius, & dstStepCUDA) };
    NPP_ASSERT_NOT_NULL( pDstImgCUDA );    
    NppiSize dstRectCUDA{ 4 * radius, 4 * radius };

    //input image params from pOut
    NppiSize srcSize{ nImageWidth, nImageHeight };
    //square rect inside 4:3 image
    NppiRect srcRect{ ( nImageWidth - nImageHeight ) / 2, 0, nImageHeight, nImageHeight };

    //allocate host x, y maps
    Npp32f * pMapX{ new Npp32f[ 4 * radius * 4 * radius ] };
    Npp32f * pMapY{ new Npp32f[ 4 * radius * 4 * radius ] };
    //fill maps OK
    polar2LinearMaps( radius, radius, radius, 0.0, pMapX, pMapY );

    //allocate device x map
    Npp32f * pMapXCUDA = nppiMalloc_32f_C1(4 * radius, 4 * radius, & nMapPitchCUDA);
    NPP_ASSERT_NOT_NULL( pMapXCUDA );
    //upload cuda x map to device
    NPP_CHECK_CUDA(
        cudaMemcpy2D( pMapXCUDA, nMapPitchCUDA, pMapX, 4 * radius * sizeof(Npp32f), 4 * radius, 4 * radius, cudaMemcpyHostToDevice );
    );

    //allocate device y map
    Npp32f * pMapYCUDA = nppiMalloc_32f_C1(4 * radius, 4 * radius, & nMapPitchCUDA);
    NPP_ASSERT_NOT_NULL( pMapYCUDA );
    //upload cuda y map to device
    NPP_CHECK_CUDA(
        cudaMemcpy2D( pMapYCUDA, nMapPitchCUDA, pMapY, 4 * radius * sizeof(Npp32f), 4 * radius, 4 * radius, cudaMemcpyHostToDevice );
    );

Output is OK:

cudaSetDevice GPU 0 = GK20A
NPP Library Version 6.5.34
CUDA Driver Version: 6.5
CUDA Runtime Version: 6.5
Device 0: < GK20A >, Compute SM 3.2 detected
0 3314.55 ms

But if i apply remap function, Ubuntu freezes:

try{

    qDebug() << nppiRemap_8u_C3R(
        pOut, srcSize, nImageWidth * 3, srcRect,
        pMapXCUDA, nMapPitchCUDA,
        pMapYCUDA, nMapPitchCUDA,
        pDstImgCUDA, dstStepCUDA, dstRectCUDA,
        NPPI_INTER_NN
    );
    } catch( int a ){
        qDebug() <<  "Caught exception number:  " << a;
        return;

    }

Please help to understand my issue.

Best regards Viktor.

Robert_Crovella · June 5, 2016, 3:13pm

I’m pretty sure in nppiRemap_xxxxxx the source image pointer is supposed to point to device memory. But your source image pointer (pOut) points to host memory:

uint8_t * pOut{ new uint8_t[ nImageWidth * nImageHeight * 3 ] };

    qDebug() << nppiRemap_8u_C3R(
        pOut...

vsw · June 5, 2016, 7:25pm

Thank you very much! Yes it is my mistake. Now is no crashes and status of remap functiom = 0.

But now always black bitmaps returned.

I have tried different maps sizes and output cuda arrays. But allways black pixels.

Do you have any ideas?

Robert_Crovella · June 5, 2016, 7:49pm

read the documentation.
the remap function will return black pixels under some circumstances

vsw · June 6, 2016, 6:57pm

Please advice me true manual for NPP api.
I read NPP_Library.pdf about remap function and there is only functions definitions and few words about rect.
I have read and try OpenCV and Intel IPP remap docs with examples and it works fine.

Here is my code:

static void processImage( const void * pIn, int size ) {

	//convert input YUV buffer to RGB buffer
    Npp8u * pOut{ new Npp8u[ nImageWidth * nImageHeight * 3 ] };
    YUV2RGB( pIn, pOut );
    QImage i( pOut, nImageWidth, nImageHeight, QImage::Format_RGB888 );
    //i.save( "/home/ubuntu/test/testx" + QString::number( count ) + ".bmp" ); <-- full image OK

	//create source device buffer and fill it with RGB buffer 
    int srcStepCUDA;
    Npp8u * pSrcImgCUDA{ nppiMalloc_8u_C3( nImageWidth, nImageHeight, & srcStepCUDA) };
    NPP_ASSERT_NOT_NULL( pSrcImgCUDA );
    NPP_CHECK_CUDA(
        cudaMemcpy2D( pSrcImgCUDA, srcStepCUDA, pOut, nImageWidth * 3, nImageWidth, nImageHeight, cudaMemcpyHostToDevice );
    );

	//create destination device buffer
    int w{ 2 * nImageHeight };
    int h{ nImageHeight / 2 };		
    int dstStepCUDA;
    Npp8u * pDstImgCUDA{ nppiMalloc_8u_C3( w, h, & dstStepCUDA) };
    NPP_ASSERT_NOT_NULL( pDstImgCUDA );
    NppiSize dstRectCUDA{ w, h };

    NppiSize srcSize{ nImageWidth, nImageHeight };
    NppiRect srcRect{ ( nImageWidth - nImageHeight ) / 2, 0, nImageHeight, nImageHeight };
    //NppiRect srcRect{ 0, 0, nImageWidth, nImageHeight };

    // start timer
    struct timeval t1, t2;
    double elapsedTime;
    gettimeofday(&t1, NULL);

    NppStatus res = nppiRemap_8u_C3R(
        pSrcImgCUDA, srcSize, srcStepCUDA, srcRect,
        pMapXTopCUDA, nMapPitchCUDA,
        pMapYTopCUDA, nMapPitchCUDA,
        pDstImgCUDA, dstStepCUDA, dstRectCUDA,
        NPPI_INTER_LINEAR
    );

    qDebug() << "Status " << res;

    // stop timer
    gettimeofday(&t2, NULL);
    elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;      // sec to ms
    elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0;   // us to ms
    qDebug() << "remap " << elapsedTime << " ms";

	//create RGB buffer and fill it from device destination buffer
    Npp8u * p{ new Npp8u[ w * h * 3 ] };
    NPP_CHECK_CUDA(
        cudaMemcpy2D( p, w * 3, pDstImgCUDA, dstStepCUDA, w, h, cudaMemcpyDeviceToHost )
    );
    QImage outImage( p, w, h, w * 3, QImage::Format_RGB888 );
    outImage.save( "/home/ubuntu/test/test" + QString::number( count ) + ".bmp" ); <-- 1/3 part of width is color and 2/3 part of width is black

    nppiFree( pDstImgCUDA );
    nppiFree( pSrcImgCUDA );
    delete [] pOut;
    delete [] p;
}

Now if i don’t apply remap function then i get image with only 1/3 part of color pixels.
If i use remap then i have full black image.

Here is 4 similar maps:

Npp32f * pMapXTop{ new Npp32f[ w * h ] };
    Npp32f * pMapYTop{ new Npp32f[ w * h ] };
    Npp32f * pMapXBottom{ new Npp32f[ w * h ] };
    Npp32f * pMapYBottom{ new Npp32f[ w * h ] };

    polar2LinearMaps( radius, xT, yT, 0.0, pMapXTop, pMapYTop );
    polar2LinearMaps( radius, xB, yB, 0.0, pMapXBottom, pMapYBottom );

    pMapXTopCUDA = nppiMalloc_32f_C1( w, h, & nMapPitchCUDA );
    NPP_ASSERT_NOT_NULL( pMapXTopCUDA );
    NPP_CHECK_CUDA(
        cudaMemcpy2D( pMapXTopCUDA, nMapPitchCUDA, pMapXTop, w * sizeof(Npp32f), w, h, cudaMemcpyHostToDevice );
    );

    pMapYTopCUDA = nppiMalloc_32f_C1( w, h, & nMapPitchCUDA );
    NPP_ASSERT_NOT_NULL( pMapYTopCUDA );
    NPP_CHECK_CUDA(
        cudaMemcpy2D( pMapYTopCUDA, nMapPitchCUDA, pMapYTop, w * sizeof(Npp32f), w, h, cudaMemcpyHostToDevice );
    );

    pMapXBottomCUDA = nppiMalloc_32f_C1( w, h, & nMapPitchCUDA );
    NPP_ASSERT_NOT_NULL( pMapXBottomCUDA );
    NPP_CHECK_CUDA(
        cudaMemcpy2D( pMapXBottomCUDA, nMapPitchCUDA, pMapXBottom, w * sizeof(Npp32f), w, h, cudaMemcpyHostToDevice );
    );

    pMapYBottomCUDA = nppiMalloc_32f_C1( w, h, & nMapPitchCUDA );
    NPP_ASSERT_NOT_NULL( pMapYBottomCUDA );
    NPP_CHECK_CUDA(
        cudaMemcpy2D( pMapYBottomCUDA, nMapPitchCUDA, pMapYBottom, w * sizeof(Npp32f), w, h, cudaMemcpyHostToDevice );
    );

    delete [] pMapXTop;
    delete [] pMapYTop;
    delete [] pMapXBottom;
    delete [] pMapYBottom;

Please can you give me advice where is my issue?

Also may be my tegra 3 board has corrupted RAM or bad driver?

Best regards Viktor.

vsw · June 11, 2016, 10:06am

Solved. Width in cudaMemcpy2D must be in bytes. Thanks.

Topic		Replies	Views
Using nppiResizeBatch_8u_C3R causes exception wrap illegal address GPU-Accelerated Libraries npp	3	815	August 24, 2022
npp nppiResize_8u_C1R gives unexpected result GPU-Accelerated Libraries	4	1166	January 1, 2020
NPP - nppiFilter_8u_C1R returns KERNEL_EXECUTION Debug options? CUDA Programming and Performance	6	6763	April 25, 2010
Very poor performance with NPP CrossCorrValid GPU-Accelerated Libraries npp	8	3321	May 25, 2022
nppiResize_8u_C3R function of cuda 10.1 outputs a wrong result GPU-Accelerated Libraries	0	940	August 22, 2019
NPP function nppiCrossCorrFull_NormLevel_8u32f_C1R too slow??? CUDA Programming and Performance	8	1538	March 7, 2015
Issues with nppiMean_StdDev_32f from the NPP library GPU-Accelerated Libraries	15	3377	October 31, 2017
A critical problem with nppiFilter CUDA Programming and Performance	6	7466	February 21, 2013
NPP library functions nppiResize_8U_C3R and nppiBGRToLab_8u_C3R differ from cv::resize() output General	10	4896	October 12, 2021
Problem with nppi morphological operation GPU-Accelerated Libraries	3	1100	August 7, 2018

[closed]nppiRemap_8u_C3R function

Related topics