NvBufSurface Color Format conversion

JetPack 6.0 Orin64
Release 5.15.136-tegra
CTI Rogue-Orin + Orin AGX
L4T 36.3.0
CUDA 12.2.140
cuDNN 8.9.4.25
TensorRT 10.3.0.30
VPI 3.2.5

Im trying to convert BGR to NV12 using two NvBufSurface buffers using NvBufSurfTransfrom.

1st question, is this even possible to do with NvBufSurface?

2nd:
Here is my code:

#include <iostream>
#include "nvbufsurface.h"
#include "nvbufsurftransform.h"

int main() {
    NvBufSurface *src_surf = nullptr;
    NvBufSurface *dst_surf = nullptr;

    // Define the source and destination surface parameters
    NvBufSurfaceCreateParams src_create_params = {0};
    NvBufSurfaceCreateParams dst_create_params = {0};

    int width = 10;
    int height = 20;

    // Source surface parameters - assume it's in BGR format
    src_create_params.gpuId = 0;
    src_create_params.width = width;
    src_create_params.height = height;
    src_create_params.size = 0;
    src_create_params.colorFormat = NVBUF_COLOR_FORMAT_BGR;
    src_create_params.layout = NVBUF_LAYOUT_PITCH;
    src_create_params.memType =  NVBUF_MEM_DEFAULT ; //NVBUF_MEM_SURFACE_ARRAY; //NVBUF_MEM_DEFAULT;

    // Destination surface parameters - NV12 format
    dst_create_params.gpuId = 0;
    dst_create_params.width = width;
    dst_create_params.height = height;
    dst_create_params.size = 0;
    dst_create_params.colorFormat = NVBUF_COLOR_FORMAT_NV12;
    dst_create_params.layout = NVBUF_LAYOUT_PITCH;
    dst_create_params.memType =  NVBUF_MEM_DEFAULT; //NVBUF_MEM_DEFAULT;

    // Create the source NvBufSurface
    if (NvBufSurfaceCreate(&src_surf, 1, &src_create_params) != 0) {
        std::cerr << "Failed to create source NvBufSurface" << std::endl;
        return -1;
    }
    src_surf->numFilled = 1; // suggested by nvidia
    if (NvBufSurfaceMemSet(src_surf, 0, 0, 0) != 0) {
        std::cerr << "Failed to initialize source surface memory." << std::endl;
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }

    // Create the destination NvBufSurface
    if (NvBufSurfaceCreate(&dst_surf, 1, &dst_create_params) != 0) {
        std::cerr << "Failed to create destination NvBufSurface" << std::endl;
        NvBufSurfaceDestroy(src_surf);
        return -1;
    }
    dst_surf->numFilled = 1;
    if (NvBufSurfaceMemSet(dst_surf, 0, 0, 0) != 0) {
        std::cerr << "Failed to initialize destination surface memory." << std::endl;
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }
    std::cout << "Created src and dest surfaces buffers" << std::endl;

   
    if (NvBufSurfaceMap(src_surf, 0, 0, NVBUF_MAP_READ_WRITE) != 0) {
        std::cerr << "Failed to map source NvBufSurface" << std::endl;
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }


    // if (NvBufSurfaceMap(dst_surf, 0, 0, NVBUF_MAP_READ_WRITE) != 0) {
    //     std::cerr << "Failed to map source NvBufSurface" << std::endl;
    //     NvBufSurfaceDestroy(src_surf);
    //     NvBufSurfaceDestroy(dst_surf);
    //     return -1;
    // }

    if (NvBufSurfaceSyncForCpu(src_surf, 0, 0) != 0) {
        std::cerr << "Failed to sync source NvBufSurface for CPU access" << std::endl;
        NvBufSurfaceUnMap(src_surf, 0, 0);
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }

    // Fill the source buffer with dummy BGR data
    std::cout << "Filling source buffer with dummy BGR data" << std::endl;
    
    uint8_t *src_ptr = static_cast<uint8_t *>(src_surf->surfaceList[0].dataPtr);
    if (src_ptr == nullptr) {
        std::cerr << "Source buffer data pointer is null." << std::endl;
        NvBufSurfaceUnMap(src_surf, 0, 0);
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }
    
    std::cout << "width: " << src_create_params.width << std::endl;
    std::cout << "height: " << src_create_params.height << std::endl;
    for (uint32_t y = 0; y < src_create_params.height; ++y) {
        std::cout << "y: " << y << std::endl; 
        // for (uint32_t x = 0; x < src_create_params.width * 3; ++x) {
        //     std::cout  << x << " " ;
        //     src_ptr[y * src_surf->surfaceList[0].pitch + x] = static_cast<uint8_t>((x + y) % 256);
        // }

        for (uint32_t x = 0; x < src_create_params.width * 3; x += 3) {
            std::cout  << x << " " ;
            // Safely write BGR values, ensuring you stay within bounds for each row.
            if (x + 2 < src_surf->surfaceList[0].pitch) {  // Avoid out-of-bounds on the last row
                src_ptr[y * src_surf->surfaceList[0].pitch + x + 0] = static_cast<uint8_t>(x % 256);  // B
                src_ptr[y * src_surf->surfaceList[0].pitch + x + 1] = static_cast<uint8_t>(y % 256);  // G
                src_ptr[y * src_surf->surfaceList[0].pitch + x + 2] = static_cast<uint8_t>((x + y) % 256);  // R
            }
        }

        std::cout << std::endl;
    }
    
    std::cout << "Success: Source buffer filled with dummy BGR data" << std::endl;

    // Sync for device access
    if (NvBufSurfaceSyncForDevice(src_surf, -1, -1) != 0) {
        std::cerr << "Failed to sync source NvBufSurface for device access" << std::endl;
        NvBufSurfaceUnMap(src_surf, 0, 0);
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }
    std::cout << "Success: Source buffer synced for device access" << std::endl;

    NvBufSurfaceUnMap(src_surf, -1, -1); //release from cpu to GPU
    std::cout << "Success: Source buffer unmapped" << std::endl;


    // This is not needed as we are not using dst_surf with CPU ? 
    // if (NvBufSurfaceSyncForDevice(dst_surf, -1, -1) != 0) {
    //     std::cerr << "Failed to sync destination NvBufSurface for device access" << std::endl;
    //     NvBufSurfaceUnMap(dst_surf, 0, 0);
    //     NvBufSurfaceDestroy(src_surf);
    //     NvBufSurfaceDestroy(dst_surf);
    //     return -1;
    // }
    // std::cout << "Success: Destination buffer synced for device access" << std::endl;

    // Set up the NvBufSurfTransform parameters for the color conversion
    NvBufSurfTransformConfigParams transform_config_params;
    NvBufSurfTransform_Error err;

    // Set default transform configuration
    transform_config_params.compute_mode = NvBufSurfTransformCompute_GPU; // NvBufSurfTransformCompute_Default => VIC = Jetson does not support RGB/BGR; //
    transform_config_params.gpu_id = 0;

    if ((err = NvBufSurfTransformSetSessionParams(&transform_config_params)) != NvBufSurfTransformError_Success) {
        std::cerr << "Failed to set NvBufSurfTransform session parameters" << std::endl;
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }

    // Define transformation parameters
    NvBufSurfTransformParams transform_params;
    transform_params.src_rect = NULL; // Full frame transform
    transform_params.dst_rect = NULL; // Full frame transform
    transform_params.transform_flag = NVBUFSURF_TRANSFORM_FILTER; // Use a default filter
    transform_params.transform_filter = NvBufSurfTransformInter_Default;

    // Perform the transformation (color conversion)
    std::cout << "Performing the transformation (color conversion)" << std::endl;
    if ((err = NvBufSurfTransform(src_surf, dst_surf, &transform_params)) != NvBufSurfTransformError_Success) {
        std::cerr << "Failed to transform surface: " << err << std::endl;
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }
    std::cout << "Success: Transformation (color conversion) completed" << std::endl;
    // Sync destination buffer for CPU read if needed
    if (NvBufSurfaceMap(dst_surf, 0, 0, NVBUF_MAP_READ) != 0) {
        std::cerr << "Failed to map destination NvBufSurface" << std::endl;
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }

    if (NvBufSurfaceSyncForCpu(dst_surf, 0, 0) != 0) {
        std::cerr << "Failed to sync destination NvBufSurface for CPU access" << std::endl;
        NvBufSurfaceUnMap(dst_surf, 0, 0);
        NvBufSurfaceDestroy(src_surf);
        NvBufSurfaceDestroy(dst_surf);
        return -1;
    }

    // Print or further process the NV12 data...
    // Note: For demonstration purposes, we won't access it directly here.

    // Unmap the destination surface
    NvBufSurfaceUnMap(dst_surf, 0, 0);

    // Clean up and destroy surfaces
    NvBufSurfaceDestroy(src_surf);
    NvBufSurfaceDestroy(dst_surf);

    std::cout << "Color conversion from BGR to NV12 completed successfully!" << std::endl;

    return 0;
}

When running it segfaults:
Success: Source buffer filled with dummy BGR data
Success: Source buffer synced for device access
Success: Source buffer unmapped
Performing the transformation (color conversion)
Segmentation fault (core dumped)

Im not confident that this setup is correct so would love some feedback as well as answer to question 1.

Some othe related posts state that Jetson do not support BGR format and to check the documentation. However, Jetson Linux API as early as 35.2.1 have BGR as an NVBUF_COLOR_FORMAT_BGR

Hi,
24-bit BGR is not supported. Please use 32-bit RGBA or BGRx.

Thanks for the response.
Is there a reason that various 3 channel color formats are part of the NvBufSurface.h but are not support on Jetson?

I understand the VIC may have limiation but is that still the case if using GPU for NVBUF_MEM_CUDA *

Hi,
VIC does not support 24-bit BGR. So you may do like:

  1. Allocate CUDA buffer in BGR to capture your frame data
  2. Copy the CUDA to RGBA NvBufSurface
  3. Call NvBufferTransform() to convert RGBA NvBufSurface to another NV12 NvBufSurface

You could also use VPI for the conversion, according to the table below BGR8 to NV12_ER is supported using CUDA backend.

Could you point me in the correct direction for this operation.
Are the following order of operations correct?

//set NvBufSurfTransformSetSessionParams()
//create NvBufSurface *surface with type RGBA / PITCH
//create NvBufSurface *dst_surf with type NV12 / PITCH


NvBufSurfaceMap(surface, 0, 0, NVBUF_MAP_READ);
//Fill *surface with data 

 NvBufSurfaceSyncForDevice (surface, 0, 0);
 NvBufSurfaceUnMap (surface, 0, 0);

NvBufSurfaceMap(surface, 0, 0, NVBUF_MAP_READ_WRITE); // assume we will do something with NV12 like push into appsrc. 

 NvBufSurfaceSyncForDevice (dst_surf , 0, 0);
 NvBufSurfaceUnMap (dst_surf , 0, 0);

// Perform RGBA to NV12 conversion
NvBufSurfTransformParams transform_params;
    NvBufSurfTransformRect src_rect = {0}, dst_rect = {0};
    src_rect.top = 0;
    src_rect.left = 0;
    src_rect.width = w;
    src_rect.height = h;
    dst_rect.top = 0;
    dst_rect.left = 0;
    dst_rect.width = w;
    dst_rect.height = h;
    transform_params.transform_flip = NvBufSurfTransform_None; // Use a default filter
    transform_params.src_rect = &src_rect; // Full frame transform
    transform_params.dst_rect = &dst_rect; // Full frame transform
    transform_params.transform_flag |= NVBUFSURF_TRANSFORM_FILTER; // Use a default filter
    transform_params.transform_filter = NvBufSurfTransformInter_Default;
    NvBufSurfTransform_Error err;
    if ((err = NvBufSurfTransform(surface, dst_surf, &transform_params)) != NvBufSurfTransformError_Success) {
            std::cerr << "Failed to transform surface: " << err << std::endl;
            NvBufSurfaceDestroy(surface);
            NvBufSurfaceDestroy(dst_surf);
            return -1;
    }

// Do something with dst_surface with type NV12

Now I have NV12 buffer with data? When following this structure, the resulting N12 image is just green - not correct.

Prior to adding the code to to the converstion from RGBA to NV12 using NvBufSurfTransfrom, I was succesful in pushing NvBufSurface(RGBA) from cv::Mat(RGBA) to appsrc and then using nvvidconv to convert to NV12.

Thanks I will check this out and see if it help with performance.

I was able to get the NvBufSurfTransform() to work.

My issue was that I needed to set

transform_config_params.compute_mode = NvBufSurfTransformCompute_Default;  `

Using NvBufSurfTransformCompute_GPU did not work as well as having to use NVBUF_LAYOUT_PITCH for N12. Using BLOCK_LINEAR resulted in errors.

Does NvBufSurfTransform() only support VIC on Jetson Hardware, and NvBufSurfTransformCompute_GPU is not supported ?

Hi,
Please check this:
Using multiple Backends for nvbufsurfacetransform on Jetson - #5 by DaneLLL

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.