VPI image pointing to managed memory

valeriy.manin · September 27, 2023, 12:18pm

I have a memory buffer allocated with cudaMallocManaged; this buffer is used as input to the AI model as a binding. I do the image preprocessing with the OpenCV as follows: I create and OpenCV matrix pointing to that buffer on GPU and preprocess images inplace. Like:

cv::Size inputSize = _modelConfig.inputSize;
int numBindings = engine->getNbIOTensors();
void *buffer_in, *buffer_out; // buffers for input and output of the model
int size_in = inputSize.height * inputSize.width * 3 * sizeof(float);
cudaMallocManaged(&buffer_in, size_in);
int size_out = 1 * sizeof(float); // output is juts a float number
cudaMallocManaged(&buffer_out, size_out);

void** bindings = new void*[numBindings];
bindings[0] = buffer_in;
bindings[1] = buffer_out;

// create cv::cuda::GpuMat outputMat pointing to bindings[0] what is buffer_in
cv::cuda::GpuMat outputMat = cv::cuda::GpuMat(inputSize.height, inputSize.width, CV_32FC3, bindings[0]);
preprocess outputMat with the OpenCV…

Now I want to do the same with the VPI library. But it cannot point to the Managed buffer bindings[0]. It requires whether NvBufSurface or cudaArray. Is it possible to make for example, NvBufSurface from the managed memory? What would you advice in order to make a VPIImage pointing to the bounding[0]?

With the best wishes,
Valeriy

AastaLLL · September 28, 2023, 3:35am

Hi,

VPI can wrap the GPU buffer.
Which VPI version do you use? 1.2 or 2.x?

Thanks.

valeriy.manin · September 28, 2023, 6:23am

I use vpi 2.
How can I wrap the GPU buffer? As I see it, the code should look like:

cudaMallocManaged(&GPU_buffer, buffer_size);
VPIIMage img_vpi;
VPIImageData img_data;
img_data.bufferType = ?;
img_data.buffer.? = GPU_buffer;

VPIStatus err_vpi = vpiImageCreateWrapper(&img_data, nullptr, VPI_BACKEND_CUDA, &img_vpi);

what should I use instead of “?”
for buffertype there are options: VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR, VPI_IMAGE_BUFFER_CUDA_ARRAY, VPI_IMAGE_BUFFER_EGLIMAGE, VPI_IMAGE_BUFFER_NVBUFFER

valeriy.manin · October 11, 2023, 11:37am

Could you please help me with my question?

AastaLLL · October 12, 2023, 5:21am

Hi,

Sorry for the late update.
Here is an example to wrap CUDA buffer with VPI 2.x for your reference:

// Given width, height, d_ptr(GPU buffer pointer)

VPIImage img;
VPIImageData data = {};
data.bufferType = VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
data.buffer.pitch.numPlanes = 1;
data.buffer.pitch.planes[0].width  = width;
data.buffer.pitch.planes[0].height = height;
data.buffer.pitch.planes[0].data   = d_ptr;

CHECK_STATUS(vpiImageCreateWrapper(&data, nullptr, 0, &img));

Thanks.

valeriy.manin · October 12, 2023, 11:58am

Thank you very much. I tried your code on Nvidia DevKit with Jetson AGX Xavier as follows:

void* buff;
int width = 100;
int height = 100;
cudaMalloc(&buff, width * height * 4 * sizeof(float));
VPIImage img_vpi;
VPIImageData img_data = {};
img_data.bufferType = VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
img_data.buffer.pitch.numPlanes = 1;
img_data.buffer.pitch.planes[0].width = width;
img_data.buffer.pitch.planes[0].height = height;
img_data.buffer.pitch.planes[0].data = buff;
VPIStatus err_vpi = vpiImageCreateWrapper(&img_data, nullptr, 0, &img_vpi);
std::cout << vpiStatusGetName(err_vpi) << std::endl;

the output is:
VPI_ERROR_INVALID_ARGUMENT

AastaLLL · October 16, 2023, 6:26am

Hi,

We want to check the invalid argument error in our environment.
Could you wrap the code into a runnable source and share it with us?

Thanks.

valeriy.manin · October 16, 2023, 9:26am

include
include <opencv2/cudaimgproc.hpp>
include <opencv2/imgcodecs.hpp>
include <opencv2/cudawarping.hpp> // for cv::cuda::resize
include <cuda.h>
include <cuda_runtime.h>
include <vpi/Image.h>
include <vpi/Status.h>
include <vpi/ImageFormat.h>

int main()
{
cuInit(0);
int img_width_output = 224;
int img_height_output = 224;
int size_output = img_width_output * img_height_output * 3 * sizeof(float);
void* output_buff; // a preprocessed image will be put here; this buffer will be used as a model input (binding)
cudaMalloc(&output_buff, size_output);

// how I do it with OpenCV GPU version
cv::Mat img_input;
cv::cuda::GpuMat img_gpu_in, img_gpu_processed;
std::string path = "304194-17086108.jpg"; // here use your image
img_input = cv::imread(path);
img_gpu_in = cv::cuda::GpuMat(img_input); // build OpenCV GPU image based on the CPU version
// wrap an OpenCV GPU image around previously allocated output_buff
img_gpu_processed = cv::cuda::GpuMat(img_height_output, img_width_output, CV_32FC3, output_buff);
cv::cuda::resize(img_gpu_in, img_gpu_processed, cv::Size(img_height_output, img_width_output), 0, 0, cv::INTER_NEAREST);
// the resulting resized image is stored in output_buff and I can use it as model input in future

// now I try to do the same with VPIimage instead of OpenCV GPU version
VPIImage img_vpi;
VPIImageData img_data = {};
img_data.bufferType = VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
img_data.buffer.pitch.numPlanes = 1;
img_data.buffer.pitch.planes[0].width = img_width_output;
img_data.buffer.pitch.planes[0].height = img_height_output;
img_data.buffer.pitch.planes[0].pixelType = VPI_PIXEL_TYPE_3F32;
img_data.buffer.pitch.planes[0].data = output_buff;
//VPIStatus err_vpi = vpiImageCreateWrapper(&img_data, nullptr, VPI_BACKEND_CUDA, &img_vpi);
VPIStatus err_vpi = vpiImageCreateWrapper(&img_data, nullptr, 0, &img_vpi);
std::cout << vpiStatusGetName(err_vpi) << std::endl; // Output: VPI_ERROR_INVALID_ARGUMENT

img_gpu_in.release();
img_gpu_processed.release();
cudaFree(output_buff);

return 0;

}

Compile it with:
nvcc -std=c++17 -I /usr/local/cuda/include/ -I /usr/local/include/opencv4/ -I /opt/nvidia/vpi2/include/ -lcuda -lcudart -lopencv_core -lopencv_imgcodecs -lopencv_cudawarping -lnvvpi ForGuysFromNvidia.cu -o ForGuysFromNvidia

AastaLLL · October 17, 2023, 5:50am

Thanks.

Will get back to you after checking.

AastaLLL · October 26, 2023, 8:47am

Hi,

Below line is not correct:

img_data.buffer.pitch.planes[0].pixelType = VPI_PIXEL_TYPE_3F32;

Only below is supported in VPI Image:
https://docs.nvidia.com/vpi/ImageFormat_8h.html
For example, please try this instead:

img_data.buffer.pitch.format = VPI_IMAGE_FORMAT_F32;

Thanks.

valeriy.manin · October 26, 2023, 9:43am

I replaced the line ```
img_data.buffer.pitch.planes[0].pixelType = VPI_PIXEL_TYPE_3F32;

with ```
img_data.buffer.pitch.format = VPI_IMAGE_FORMAT_F32;

but still have the same output VPI_ERROR_INVALID_ARGUMENT

AastaLLL · November 2, 2023, 5:57am

Hi,

Could you try if the below setting can work?

VPIImageData data;
data.bufferType = VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
data.buffer.pitch.format = VPI_IMAGE_FORMAT_Y8_ER;
data.buffer.pitch.planes[0].data      = image_ptr;
data.buffer.pitch.planes[0].width     = image_width;
data.buffer.pitch.planes[0].height    = image_height;
data.buffer.pitch.planes[0].pixelType = VPI_PIXEL_TYPE_DEFAULT;

Thanks.

valeriy.manin · November 2, 2023, 9:23am

I tried. The whole code is the following:

include
include <opencv2/cudaimgproc.hpp>
include <opencv2/imgcodecs.hpp>
include <opencv2/cudawarping.hpp> // for cv::cuda::resize
include <cuda.h>
include <cuda_runtime.h>
include <vpi/Image.h>
include <vpi/Status.h>
include <vpi/ImageFormat.h>

int main()
{
cuInit(0);
int img_width_output = 224;
int img_height_output = 224;
int size_output = img_width_output * img_height_output * 3 * sizeof(float);
void* output_buff; // a preprocessed image will be put here; this buffer will be used as a model input (binding)
cudaMalloc(&output_buff, size_output);

// how I do it with OpenCV GPU version
cv::Mat img_input;
cv::cuda::GpuMat img_gpu_in, img_gpu_processed;
std::string path = "frame0001.png"; // here use your image
img_input = cv::imread(path);
img_gpu_in = cv::cuda::GpuMat(img_input); // build OpenCV GPU image based on the CPU version
// wrap an OpenCV GPU image around previously allocated output_buff
img_gpu_processed = cv::cuda::GpuMat(img_height_output, img_width_output, CV_32FC3, output_buff);
cv::cuda::resize(img_gpu_in, img_gpu_processed, cv::Size(img_height_output, img_width_output), 0, 0, cv::INTER_NEAREST);
// the resulting resized image is stored in output_buff and I can use it as model input in future

// now I try to do the same with VPIimage instead of OpenCV GPU version
VPIImage img_vpi;

VPIImageData data;
data.bufferType = VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
data.buffer.pitch.format = VPI_IMAGE_FORMAT_Y8_ER;
data.buffer.pitch.planes[0].data      = output_buff;
data.buffer.pitch.planes[0].width     = img_width_output;
data.buffer.pitch.planes[0].height    = img_height_output;
data.buffer.pitch.planes[0].pixelType = VPI_PIXEL_TYPE_DEFAULT;

//VPIStatus err_vpi = vpiImageCreateWrapper(&data, nullptr, VPI_BACKEND_CUDA, &img_vpi);
VPIStatus err_vpi = vpiImageCreateWrapper(&data, nullptr, 0, &img_vpi);
std::cout << vpiStatusGetName(err_vpi) << std::endl; // Output: VPI_ERROR_INVALID_ARGUMENT

img_gpu_in.release();
img_gpu_processed.release();
cudaFree(output_buff);

return 0;

}

The error is still the same.

AastaLLL · November 10, 2023, 6:32am

Hi,

Sorry for the late.
Please see the sample below:

#include <iostream>
#include <fstream>
#include <sstream>

#define CHECK_STATUS(STMT)                                    \
    do                                                        \
    {                                                         \
        VPIStatus status = (STMT);                            \
        if (status != VPI_SUCCESS)                            \
        {                                                     \
            char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH];       \
            vpiGetLastStatusMessage(buffer, sizeof(buffer));  \
            std::ostringstream ss;                            \
            ss << "line " << __LINE__ <<": ";                 \
            ss << vpiStatusGetName(status) << ": " << buffer; \
            throw std::runtime_error(ss.str());               \
        }                                                     \
    } while (0);


int main(int argc, char *argv[])
{
    int width  = 224;
    int height = 224;
    int pitch  = (int((width-1)/256)+1)*256;

    char* ptr;
    cudaMalloc((void**)&ptr, pitch*height*sizeof(char));

    VPIImage img = NULL;
    VPIImageData data = {};
    data.bufferType = VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
    data.buffer.pitch.numPlanes = 1;
    data.buffer.pitch.planes[0].width      = width;
    data.buffer.pitch.planes[0].height     = height;
    data.buffer.pitch.planes[0].pitchBytes = pitch;
    data.buffer.pitch.planes[0].data       = ptr;
    data.buffer.pitch.format               = VPI_IMAGE_FORMAT_Y8_ER;

    CHECK_STATUS(vpiImageCreateWrapper(&data, nullptr, 0, &img));

    vpiImageDestroy(img);
    cudaFree(ptr);
    return 0;
}

Thanks.

valeriy.manin · November 14, 2023, 2:13pm

Thank you very much! The problem was that I didn’t use the pitch value. Could you please explain what is the idea behind this formula?
int pitch = (int((width-1)/256)+1)*256;

I understand that it is something with the memory layout. But what is the exact meaning and from where the 256 value comes from?

system · December 5, 2023, 2:08am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
VPI Image wraper around NvBufSurface on Jetson AGX Xavier Jetson AGX Xavier vpi	10	1009	December 26, 2023
Get a VPIIMage from NvBufferSurface Jetson AGX Xavier vpi	6	1013	October 2, 2023
VPI CUDA interop with managed memory Jetson AGX Xavier cuda , vpi	16	1990	October 18, 2021
Wrap cv::cuda::GpuMat with VPIImage Jetson AGX Xavier opencv , cuda , vpi	3	1275	October 18, 2021
VPI 3.2 Image Wrapper Performance Problems when using CudaMallocManaged Jetson AGX Orin vpi	6	163	February 17, 2025
Convert VPIImage to cv::cuda::GpuMat Jetson AGX Xavier vpi	5	974	October 18, 2021
Performance issues using vpiImageCreate*Wrapper Jetson Xavier NX vpi	8	2150	February 9, 2023
Convert frame from NvBufSurface (Deepstream 6.4) to VPIImage (VPI3.0) DeepStream SDK vpi	5	443	May 21, 2024
VPI CUDA, opencv GPU Mat Jetson AGX Orin vpi	2	127	November 11, 2024
Deepstream VPI 3.0 Wrapper Compatibility dGPU DeepStream SDK deepstream	3	147	January 14, 2025

VPI image pointing to managed memory

Related topics