Grayscale lossless JPEG encoding with nvJPEG

guillaume9 · May 5, 2025, 9:57am

Hi,
I am experimenting with lossless compression with nvJPEG for grayscale buffers. Here is the code with lossy encoding and quality 100 that works:

#include <nvjpeg.h>

#include <cuda_runtime.h>

#include <cstdint>
#include <fstream>
#include <iostream>
#include <vector>

#define CHECK_CUDA(err)                                                        \
    {                                                                          \
        if (err != cudaSuccess) {                                              \
            std::cerr << __FILE__ << ":" << __LINE__                           \
                      << " - CUDA Error: " << cudaGetErrorString(err) << '\n'; \
            exit(1);                                                           \
        }                                                                      \
    }
#define CHECK_NVJPEG(err)                                    \
    {                                                        \
        if (err != NVJPEG_STATUS_SUCCESS) {                  \
            std::cerr << __FILE__ << ":" << __LINE__         \
                      << " - nvJPEG Error: " << err << '\n'; \
            exit(1);                                         \
        }                                                    \
    }

std::vector<uint8_t> readBinaryFile(const std::string& filename) {
    // Open the file in binary mode
    std::ifstream file(filename, std::ios::binary);

    // Check if the file was opened successfully
    if (!file) {
        std::cerr << "Error opening file: " << filename << '\n';
        return {};
    }

    // Get the size of the file
    file.seekg(0, std::ios::end);
    auto file_size = file.tellg();
    file.seekg(0, std::ios::beg);

    // Create a vector to hold the binary data
    std::vector<uint8_t> buffer(file_size);

    // Read the file into the vector
    file.read(reinterpret_cast<char*>(buffer.data()), file_size);

    // Close the file
    file.close();

    return buffer;
}

void writeBinaryFile(const std::string& filename,
                     const std::vector<uint8_t>& buffer) {
    // Open the file in binary mode
    std::ofstream file(filename, std::ios::binary);

    // Check if the file was opened successfully
    if (!file) {
        std::cerr << "Error opening file: " << filename << '\n';
        return;
    }

    // Write the file
    file.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());

    // Close the file
    file.close();
}

int devMalloc(void*  /*ctx*/, void** p, size_t size, cudaStream_t stream) {
    return (int)cudaMallocAsync(p, size, stream);
}

int devFree(void* /*ctx*/, void* p, size_t /*size*/, cudaStream_t stream) {
    return (int)cudaFreeAsync(p, stream);
}

int pinnedMalloc(void* /*ctx*/, void** p, size_t size,
                 cudaStream_t /*stream*/) {
    return (int)cudaMallocHost(p, size);
}

int pinnedFree(void* /*ctx*/, void* p, size_t /*size*/,
               cudaStream_t /*stream*/) {
    return (int)cudaFreeHost(p);
}

int main(int  argc, char*  argv[]) {
    if (argc != 5) {
        std::cout << argv[0] << " input_file width height output_file\n";
        return EXIT_FAILURE;
    }

    std::string input_file = argv[1];
    std::string output_file = argv[4];
    size_t width = std::atoi(argv[2]);
    size_t height = std::atoi(argv[3]);

    auto input = readBinaryFile(input_file);

    if (input.empty()) {
        return EXIT_FAILURE;
    }

    if (input.size() != height * width) {
        std::cerr << "input.size(): " << input.size() << " != height * width ("
                  << height * width << ")\n";
        return EXIT_FAILURE;
    }

    cudaStream_t stream = nullptr;

    cudaEvent_t start_ev = nullptr;
    CHECK_CUDA(cudaEventCreateWithFlags(&start_ev, cudaEventDefault));

    cudaEvent_t end_ev = nullptr;
    CHECK_CUDA(cudaEventCreateWithFlags(&end_ev, cudaEventDefault));

    unsigned char* d_input = nullptr;

    // allocate GPU buffer for input image
    CHECK_CUDA(
        cudaMallocAsync(&d_input, input.size() * sizeof(uint8_t), stream));

    // copy input to GPU
    CHECK_CUDA(cudaMemcpyAsync(d_input, input.data(),
                               input.size() * sizeof(uint8_t),
                               cudaMemcpyHostToDevice, stream));

    nvjpegDevAllocatorV2_t dev_allocator = {&devMalloc, &devFree, nullptr};
    nvjpegPinnedAllocatorV2_t pinned_allocator = {&pinnedMalloc, &pinnedFree,
                                                  nullptr};

    nvjpegHandle_t nv_handle = nullptr;
    CHECK_NVJPEG(nvjpegCreateExV2(NVJPEG_BACKEND_LOSSLESS_JPEG, &dev_allocator,
                                  &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
                                  &nv_handle));

    nvjpegEncoderState_t nv_enc_state = nullptr;
    CHECK_NVJPEG(nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream));

    nvjpegEncoderParams_t nv_enc_params = nullptr;
    CHECK_NVJPEG(nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream));

    nvjpegImage_t imgdesc = {{d_input, nullptr}, {width, 0}};

    CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(
        nv_enc_params, NVJPEG_CSS_GRAY, stream));

    int optimal = 0;  // no huffman optimization
    // int optimal = 1;  // huffman optimization little bit smaller but way slower!

    CHECK_NVJPEG(
        nvjpegEncoderParamsSetOptimizedHuffman(nv_enc_params, optimal, stream));

    int quality = 100;
    CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(nv_enc_params, quality, stream));

    // --------------------- Set encoding -----------------------------

    // OK faster than NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN and smaller files
    CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
        nv_enc_params, NVJPEG_ENCODING_BASELINE_DCT, stream));

    // KO
    // CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    //     nv_enc_params, NVJPEG_ENCODING_EXTENDED_SEQUENTIAL_DCT_HUFFMAN,
    //     stream));

    // OK
    // CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    //     nv_enc_params, NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, stream));

    // KO
    // CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    //     nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));

    // -----------------------------------------------------------------

    CHECK_CUDA(cudaEventRecord(start_ev, stream));

    // Compress image
    CHECK_NVJPEG(nvjpegEncodeYUV(nv_handle, nv_enc_state, nv_enc_params,
                                 &imgdesc, NVJPEG_CSS_GRAY, width, height,
                                 stream));

    CHECK_CUDA(cudaEventRecord(end_ev, stream));

    // reusing input for the output
    // if compression works, it should be large enough
    size_t encoded_length = input.size();
    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstreamDevice(
        nv_handle, nv_enc_state, d_input, &encoded_length, stream));

    // not sure it is necessary
    CHECK_CUDA(cudaStreamSynchronize(stream));

    std::vector<uint8_t> output(encoded_length);

    CHECK_CUDA(cudaMemcpyAsync(output.data(), d_input,
                               encoded_length * sizeof(uint8_t),
                               cudaMemcpyDeviceToHost, stream));

    CHECK_CUDA(cudaFreeAsync(d_input, stream));

    CHECK_CUDA(cudaStreamSynchronize(stream));

    float duration_ms = 0;

    CHECK_CUDA(cudaEventElapsedTime(&duration_ms, start_ev, end_ev));

    std::cout << "JPEG encode duration: " << duration_ms << " ms / channel\n";

    // cleanup
    CHECK_NVJPEG(nvjpegEncoderParamsDestroy(nv_enc_params));
    CHECK_NVJPEG(nvjpegEncoderStateDestroy(nv_enc_state));
    CHECK_NVJPEG(nvjpegDestroy(nv_handle));

    CHECK_CUDA(cudaEventDestroy(end_ev));
    CHECK_CUDA(cudaEventDestroy(start_ev));

    // write file to disk
    writeBinaryFile(output_file, output);
}

If instead, I enable lossless compression with :

CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));

I get an error 2 (invalid parameter) on that line.

Any idea on how to activate lossless compression please?

Test ran on A4500 Ada Generation GPU with CUDA 12.8.

francis.guindon · June 23, 2025, 10:26pm

Hi @guillaume9,

Just a quick question to better understand the issue, do you get the same error with other encoding types or just with lossless compression?

Regards,
Francis Guindon

Embedded SW Engineer at RidgeRun
Contact us: support@ridgerun.com
Developers wiki: https://developer.ridgerun.com/
Website: http://www.ridgerun.com/

guillaume9 · June 24, 2025, 10:48am

Hi,
Thanks for your interest.

I get an error when I call

CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
    nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));

or

CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
     nv_enc_params, NVJPEG_ENCODING_EXTENDED_SEQUENTIAL_DCT_HUFFMAN,
     stream));

but not the others.

Topic		Replies	Views
Grayscale lossless JPEG encoding with nvJPEG General Topics and Other SDKs	1	69	April 10, 2025
Nvjpeg Encoding method can't set to others General Topics and Other SDKs decoder , encoder , image-processing	1	111	April 10, 2025
nvjpegEncodeImage returns error GPU-Accelerated Libraries	1	943	April 20, 2020
nvJPEG encoding issue GPU-Accelerated Libraries nvjpeg	1	1404	July 31, 2022
NVJPEG_STATUS_EXECUTION_FAILED on a simple JPEG compression example GPU-Accelerated Libraries nvjpeg	5	2079	October 19, 2021
Enable lossless Feature Video Processing & Optical Flow	4	1713	February 15, 2018
nvjpegEncodeImage() in nvJPEG Error#2 GPU-Accelerated Libraries	2	2193	August 29, 2019
Dose TX2 support lossless H.264 encoding? Jetson TX2	4	862	October 18, 2021
Lossless encoding Jetson Xavier NX gstreamer , encoder	2	1371	May 18, 2022
NVJPEG throwing NVJPEG_STATUS_INVALID_PARAMETER Error CUDA Programming and Performance	1	1201	November 11, 2019

Grayscale lossless JPEG encoding with nvJPEG

Related topics