Grayscale lossless JPEG encoding with nvJPEG

Hi,
I am experimenting with lossless compression with nvJPEG for grayscale buffers. Here is the code with lossy encoding and quality 100 that works:

#include <nvjpeg.h>

#include <cuda_runtime.h>

#include <cstdint>
#include <fstream>
#include <iostream>
#include <vector>

#define CHECK_CUDA(err)                                                        \
    {                                                                          \
        if (err != cudaSuccess) {                                              \
            std::cerr << __FILE__ << ":" << __LINE__                           \
                      << " - CUDA Error: " << cudaGetErrorString(err) << '\n'; \
            exit(1);                                                           \
        }                                                                      \
    }
#define CHECK_NVJPEG(err)                                    \
    {                                                        \
        if (err != NVJPEG_STATUS_SUCCESS) {                  \
            std::cerr << __FILE__ << ":" << __LINE__         \
                      << " - nvJPEG Error: " << err << '\n'; \
            exit(1);                                         \
        }                                                    \
    }

std::vector<uint8_t> readBinaryFile(const std::string& filename) {
    // Open the file in binary mode
    std::ifstream file(filename, std::ios::binary);

    // Check if the file was opened successfully
    if (!file) {
        std::cerr << "Error opening file: " << filename << '\n';
        return {};
    }

    // Get the size of the file
    file.seekg(0, std::ios::end);
    auto file_size = file.tellg();
    file.seekg(0, std::ios::beg);

    // Create a vector to hold the binary data
    std::vector<uint8_t> buffer(file_size);

    // Read the file into the vector
    file.read(reinterpret_cast<char*>(buffer.data()), file_size);

    // Close the file
    file.close();

    return buffer;
}

void writeBinaryFile(const std::string& filename,
                     const std::vector<uint8_t>& buffer) {
    // Open the file in binary mode
    std::ofstream file(filename, std::ios::binary);

    // Check if the file was opened successfully
    if (!file) {
        std::cerr << "Error opening file: " << filename << '\n';
        return;
    }

    // Write the file
    file.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());

    // Close the file
    file.close();
}

int devMalloc(void*  /*ctx*/, void** p, size_t size, cudaStream_t stream) {
    return (int)cudaMallocAsync(p, size, stream);
}

int devFree(void* /*ctx*/, void* p, size_t /*size*/, cudaStream_t stream) {
    return (int)cudaFreeAsync(p, stream);
}

int pinnedMalloc(void* /*ctx*/, void** p, size_t size,
                 cudaStream_t /*stream*/) {
    return (int)cudaMallocHost(p, size);
}

int pinnedFree(void* /*ctx*/, void* p, size_t /*size*/,
               cudaStream_t /*stream*/) {
    return (int)cudaFreeHost(p);
}

int main(int  argc, char*  argv[]) {
    if (argc != 5) {
        std::cout << argv[0] << " input_file width height output_file\n";
        return EXIT_FAILURE;
    }

    std::string input_file = argv[1];
    std::string output_file = argv[4];
    size_t width = std::atoi(argv[2]);
    size_t height = std::atoi(argv[3]);

    auto input = readBinaryFile(input_file);

    if (input.empty()) {
        return EXIT_FAILURE;
    }

    if (input.size() != height * width) {
        std::cerr << "input.size(): " << input.size() << " != height * width ("
                  << height * width << ")\n";
        return EXIT_FAILURE;
    }

    cudaStream_t stream = nullptr;

    cudaEvent_t start_ev = nullptr;
    CHECK_CUDA(cudaEventCreateWithFlags(&start_ev, cudaEventDefault));

    cudaEvent_t end_ev = nullptr;
    CHECK_CUDA(cudaEventCreateWithFlags(&end_ev, cudaEventDefault));

    unsigned char* d_input = nullptr;

    // allocate GPU buffer for input image
    CHECK_CUDA(
        cudaMallocAsync(&d_input, input.size() * sizeof(uint8_t), stream));

    // copy input to GPU
    CHECK_CUDA(cudaMemcpyAsync(d_input, input.data(),
                               input.size() * sizeof(uint8_t),
                               cudaMemcpyHostToDevice, stream));

    nvjpegDevAllocatorV2_t dev_allocator = {&devMalloc, &devFree, nullptr};
    nvjpegPinnedAllocatorV2_t pinned_allocator = {&pinnedMalloc, &pinnedFree,
                                                  nullptr};

    nvjpegHandle_t nv_handle = nullptr;
    CHECK_NVJPEG(nvjpegCreateExV2(NVJPEG_BACKEND_LOSSLESS_JPEG, &dev_allocator,
                                  &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
                                  &nv_handle));

    nvjpegEncoderState_t nv_enc_state = nullptr;
    CHECK_NVJPEG(nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream));

    nvjpegEncoderParams_t nv_enc_params = nullptr;
    CHECK_NVJPEG(nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream));

    nvjpegImage_t imgdesc = {{d_input, nullptr}, {width, 0}};

    CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(
        nv_enc_params, NVJPEG_CSS_GRAY, stream));

    int optimal = 0;  // no huffman optimization
    // int optimal = 1;  // huffman optimization little bit smaller but way slower!

    CHECK_NVJPEG(
        nvjpegEncoderParamsSetOptimizedHuffman(nv_enc_params, optimal, stream));

    int quality = 100;
    CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(nv_enc_params, quality, stream));

    // --------------------- Set encoding -----------------------------

    // OK faster than NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN and smaller files
    CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
        nv_enc_params, NVJPEG_ENCODING_BASELINE_DCT, stream));

    // KO
    // CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    //     nv_enc_params, NVJPEG_ENCODING_EXTENDED_SEQUENTIAL_DCT_HUFFMAN,
    //     stream));

    // OK
    // CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    //     nv_enc_params, NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, stream));

    // KO
    // CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    //     nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));

    // -----------------------------------------------------------------

    CHECK_CUDA(cudaEventRecord(start_ev, stream));

    // Compress image
    CHECK_NVJPEG(nvjpegEncodeYUV(nv_handle, nv_enc_state, nv_enc_params,
                                 &imgdesc, NVJPEG_CSS_GRAY, width, height,
                                 stream));

    CHECK_CUDA(cudaEventRecord(end_ev, stream));

    // reusing input for the output
    // if compression works, it should be large enough
    size_t encoded_length = input.size();
    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstreamDevice(
        nv_handle, nv_enc_state, d_input, &encoded_length, stream));

    // not sure it is necessary
    CHECK_CUDA(cudaStreamSynchronize(stream));

    std::vector<uint8_t> output(encoded_length);

    CHECK_CUDA(cudaMemcpyAsync(output.data(), d_input,
                               encoded_length * sizeof(uint8_t),
                               cudaMemcpyDeviceToHost, stream));

    CHECK_CUDA(cudaFreeAsync(d_input, stream));

    CHECK_CUDA(cudaStreamSynchronize(stream));

    float duration_ms = 0;

    CHECK_CUDA(cudaEventElapsedTime(&duration_ms, start_ev, end_ev));

    std::cout << "JPEG encode duration: " << duration_ms << " ms / channel\n";

    // cleanup
    CHECK_NVJPEG(nvjpegEncoderParamsDestroy(nv_enc_params));
    CHECK_NVJPEG(nvjpegEncoderStateDestroy(nv_enc_state));
    CHECK_NVJPEG(nvjpegDestroy(nv_handle));

    CHECK_CUDA(cudaEventDestroy(end_ev));
    CHECK_CUDA(cudaEventDestroy(start_ev));

    // write file to disk
    writeBinaryFile(output_file, output);
}

If instead, I enable lossless compression with :

CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));

I get an error 2 (invalid parameter) on that line.

Any idea on how to activate lossless compression please?

Test ran on A4500 Ada Generation GPU with CUDA 12.8.

Hi @guillaume9,

Just a quick question to better understand the issue, do you get the same error with other encoding types or just with lossless compression?

Regards,
Francis Guindon

Embedded SW Engineer at RidgeRun
Contact us: support@ridgerun.com
Developers wiki: https://developer.ridgerun.com/
Website: http://www.ridgerun.com/

Hi,
Thanks for your interest.

I get an error when I call

CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));

or

CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
     nv_enc_params, NVJPEG_ENCODING_EXTENDED_SEQUENTIAL_DCT_HUFFMAN,
     stream));

but not the others.