About speed of image_resize?

763659972 · August 13, 2019, 10:16am

I have written a coding of image_resize. Here is the code:

#include <cuda.h>
#include <cuda_runtime.h>
#include "time.h"

template <typename T>
__global__ void resizeKernel(const  float* src, int srcHeight, int srcWidth, int dstHeight,
                              int dstWidth, int depth,float * dst)
{
    float scale_y;
    float scale_x;
  
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int z=blockIdx.z*blockDim.z+threadIdx.z;

    if (x >= dstWidth || y >= dstHeight) return;

    scale_x = (float) srcWidth / dstWidth;
    scale_y = (float) srcHeight / dstHeight;
   
    float srcYf = (float) ((y + 0.5) * scale_y - 0.5);
    int sy = (int) srcYf;
    srcYf -= sy;
   
    if (sy < 0) {
        srcYf = 0, sy = 0;
    }
    if (sy >= srcHeight - 1) {
        srcYf = 0, sy = srcHeight - 2;
    }

    short cbufy[2];
    cbufy[0] = (1.f - srcYf) * 2048;
    if (cbufy[2] > 32767) { cbufy[0] = 32767; }
    cbufy[1] = 2048 - cbufy[0];

    float srcXf = (float) ((x + 0.5) * scale_x - 0.5);
    int sx = (int) srcXf;
    srcXf -= sx;

    if (sx < 0) {
        srcXf = 0, sx = 0;
    }
    if (sx >= srcWidth - 1) {
        srcXf = 0, sx = srcWidth - 2;
    }

    short cbufx[2];
    cbufx[0] = (1.f - srcXf) * 2048;
    if (cbufx[0] > 32767) { cbufx[0] = 32767; }
    cbufx[1] = 2048 - cbufx[0];

    for (int k = 0; k < depth; k++) {
    *(dst + (y * dstWidth + x) * depth + k) = (float)((int)(*(src + (sy * srcWidth + sx) * depth + k) * cbufy[0] * cbufx[0] +
                                                                *(src + ((sy + 1) * srcWidth + sx) * depth + k) * cbufy[0] *
                                                                cbufx[1] +
                                                                *(src + (sy * srcWidth + (sx + 1)) * depth + k) * cbufy[1] *
                                                                cbufx[0] +
                                                                *(src + ((sy + 1) * srcWidth + (sx + 1)) * depth + k) *
                                                                cbufy[1] * cbufx[1]) >> 22);
    }

}

int ResizeInference(cudaStream_t stream,  const void* image,
                    int batch_size, int input_height, int input_width, int resize_height,
                    int resize_width, int depth, void* output)
{
    int output_volume = batch_size * resize_height * resize_width * depth;
    int block = 1024;
    int grid = (output_volume + block - 1 ) / block;
    dim3 block(uint,uint);

    resizeKernel<float> <<< grid, block,0,stream >>>(static_cast<const float*>(image),input_height,input_width,resize_height,
            resize_width,depth,static_cast<float*>(output));

    return 0;

}

I use TensorRT enqueue function to call it. The running time of the resize is 6ms. If there has some improvemnt of the speed. The requirement is under 1ms.

HannesF99 · August 13, 2019, 11:52am

you might consider the batch resize functions in the NVIDIA NPP library

https://docs.nvidia.com/cuda/npp/group__image__resize__batch.html

Topic		Replies	Views
About speed of image_resize? TensorRT	0	406	August 13, 2019
Image Convolution [src added] CUDA Programming and Performance	3	3890	November 28, 2007
Re_arranging Cuda array CUDA Programming and Performance	8	44	September 23, 2024
[Help] Kernel Optimization Image subsampling CUDA Programming and Performance	2	4211	July 30, 2007
Please help code 6 error. CUDA Programming and Performance	1	937	November 16, 2013
Resize problem CUDA Programming and Performance	1	1839	February 19, 2020
How to further speedup a CUDA code for a Convolution Neural Network? CUDA Programming and Performance	4	1539	December 10, 2018
The GPU utilization is low CUDA Programming and Performance	3	2028	November 14, 2014
Kernel for pixel format conversions CUDA Programming and Performance cuda	6	827	October 16, 2020
cuda program running speed is very slow CUDA Programming and Performance	5	1364	December 22, 2016

About speed of image_resize?

Related topics