Encoding NV12 by nvJPEG

Hi all !

Could anyone suggest the best way to encode NV12 images with following params:
width=1920, height=1080, pitch=2048
to JPEG ?

Now I am trying to do this with nvJPEG and after four days I still have no success and getiing distorted image:


and no any errors during conversion.

The code is following:

#include <fstream>
#include <filesystem>
#include <nvjpeg.h>
#include "nvidia-support/screenshoter.h"

namespace nvt {
namespace nvidia {

    screenshoter::screenshoter(transcoding_config& config){

        NVTLOGINFO("screenshoter::screenshoter():config: enabled={}, save={}, directory={}, width={}, quality={}, frequency={}", config.screenshots.enabled,
                                                                                                                                 config.screenshots.save,
                                                                                                                                 config.screenshots.directory,
                                                                                                                                 config.screenshots.width,
                                                                                                                                 config.screenshots.quality,
                                                                                                                                 config.screenshots.frequency);
        // DEBUG !
        _config = config;

        enebled = config.screenshots.enabled;
        if(!enebled)
            return;

        save = config.screenshots.save;
        if(save)
        {
            directory = config.screenshots.directory;            
            std::filesystem::create_directories(directory);
            if(directory.back() != '/')
                directory += "/";
        }

        screenshot_width = config.screenshots.width;
        quality = config.screenshots.quality;
        frequency = config.screenshots.frequency;

        cudaGetDeviceProperties(&props, config.gpu);
        NVTLOGINFO("screenshoter::screenshoter(): Using GPU {} ({}, {} SMs, {} th/SM max, CC {}.{}, ECC {})",
                    config.gpu, props.name, props.multiProcessorCount,
                    props.maxThreadsPerMultiProcessor, props.major, props.minor,
                    props.ECCEnabled ? "on" : "off");

        status = nvjpegCreateSimple(&nvjpeg_handle);
        NVTLOGINFO("screenshoter::screenshoter():nvjpegCreate({})", status);

        status = nvjpegJpegStateCreate(nvjpeg_handle, &jpeg_state);
        NVTLOGINFO("screenshoter::screenshoter():nvjpegJpegStateCreate({})", status);

        status = nvjpegEncoderStateCreate(nvjpeg_handle, &encoder_state, NULL);
        NVTLOGINFO("screenshoter::screenshoter():nvjpegEncoderStateCreate({})", status);

        status = nvjpegEncoderParamsCreate(nvjpeg_handle, &encode_params, NULL);
        NVTLOGINFO("screenshoter::screenshoter():nvjpegEncoderParamsCreate({})", status);
        
        status = nvjpegEncoderParamsSetQuality(encode_params, quality, NULL);
        NVTLOGINFO("screenshoter::screenshoter():nvjpegEncoderParamsSetQuality({})", status);

        //status = nvjpegEncoderParamsSetOptimizedHuffman(encode_params, 0, NULL);
        //NVTLOGINFO("screenshoter::screenshoter():nvjpegEncoderParamsSetOptimizedHuffman({})", status);

        status = nvjpegEncoderParamsSetSamplingFactors(encode_params, NVJPEG_CSS_444, NULL);
        NVTLOGINFO("screenshoter::screenshoter():nvjpegEncoderParamsSetSamplingFactors({})", status);

        index = 0;
    }

    screenshoter::~screenshoter(){
        NVTLOGINFO("screenshoter::~screenshoter()");
        if(!enebled)
            return;

        nvjpegEncoderParamsDestroy(encode_params);
        nvjpegEncoderStateDestroy(encoder_state);
        nvjpegJpegStateDestroy(jpeg_state);
        nvjpegDestroy(nvjpeg_handle);
    }

    bool screenshoter::encode(unsigned char* cuda_buffer,
                              const size_t frame_width,
                              const size_t frame_height,
                              const size_t frame_pitch){

        NVTLOGINFO("screenshoter::encode(): frame_width={}, frame_height={}, frame_pitch={}", frame_width, frame_height, frame_pitch);

        screenshot_height = (double)screenshot_width * (double)frame_height / (double)frame_width;

        cudaDeviceSynchronize();

        size_t buffer_size = frame_width * frame_height * NVJPEG_MAX_COMPONENT;
        unsigned char* rgb_buffer;
        cudaError_t cuda_error = cudaMalloc((void**)&rgb_buffer, buffer_size);
        NVTLOGINFO("screenshoter::encode(): cudaMalloc({})", cuda_error);

        const Npp8u *const 	pSrc[2] = {cuda_buffer, cuda_buffer + frame_width * frame_height};
        NppiSize oSizeROI;
        oSizeROI.width = frame_width;
        oSizeROI.height = frame_height;
        NppStatus npp_status = nppiNV12ToRGB_8u_P2C3R (pSrc, frame_pitch, (Npp8u*)rgb_buffer, frame_width * 3, oSizeROI);

        NVTLOGINFO("screenshoter::encode(): nppiNV12ToRGB_8u_P2C3R({})", npp_status);

        nvjpegImage_t frame =
        {
            {
                rgb_buffer,
                rgb_buffer + frame_width * frame_height,
                rgb_buffer + frame_width * frame_height * 2,
                rgb_buffer + frame_width * frame_height * 3
            },

            {
                (unsigned int)frame_width,
                (unsigned int)frame_width,
                (unsigned int)frame_width,
                (unsigned int)frame_width
            }
        };

        nvjpegStatus_t status = nvjpegEncodeImage(nvjpeg_handle,
                                                  encoder_state,
                                                  encode_params,
                                                  &frame,
                                                  NVJPEG_INPUT_RGB,
                                                  screenshot_width,
                                                  screenshot_height,
                                                  NULL);
        NVTLOGINFO("screenshoter::encode(): nvjpegEncodeImage({})", status);

        std::vector<unsigned char> screenshot;
        size_t length = 0;
        status = nvjpegEncodeRetrieveBitstream(nvjpeg_handle,
                                               encoder_state,
                                               NULL,
                                               &length,
                                               NULL);

        NVTLOGINFO("screenshoter::encode(): nvjpegEncodeRetrieveBitstream({}):length={}", status, length);

        screenshot.resize(length);

        status = nvjpegEncodeRetrieveBitstream(nvjpeg_handle,
                                               encoder_state,
                                               screenshot.data(),
                                               &length,
                                               NULL);

        NVTLOGINFO("screenshoter::encode(): nvjpegEncodeRetrieveBitstream({})", status);
        
        cudaDeviceSynchronize();

        if(save)
        {
            std::string file_name("screenshot_");
            file_name += std::to_string(++index);
            file_name += ".jpeg";
            file_name = directory + file_name;

            std::ofstream output_file(file_name, std::ios::out | std::ios::binary);
            output_file.write((const char*)screenshot.data(), length);
            output_file.close();
        }
        
        cudaFree(rgb_buffer);
        return true;
    }

} /* namespace nvidia */
} /* namespace nvt */

I tried to pack NV12 frame to nvjpegImage_t structure directly and use

nvjpegStatus_t status = nvjpegEncodeYUV(nvjpeg_handle,
                                                encoder_state,
                                                encode_params,
                                                &frame,
                                                NVJPEG_CSS_420, // For NV12 encoded frame
                                                screenshot_width,
                                                screenshot_height,
                                                NULL);

but in this case result is following:

and I have no idea how to pack it into nvjpegImage_t correctly. I suppose that the reason is pitch=2048 while width=1920, but don know to solve this problem.

Any help please !

Solved by using frame_pitch instead of frame_width in all places and converting to RGB, but still have a question: is it possible to pass directly NV12 to nvjpegEncodeYUV ? I was not able to pack NV12 frame into nvjpegImage_t structure, always getting color distortion.