Hi,
I am experimenting with lossless compression with nvJPEG for grayscale buffers. Here is the code with lossy encoding and quality 100 that works:
#include <nvjpeg.h>
#include <cuda_runtime.h>
#include <cstdint>
#include <fstream>
#include <iostream>
#include <vector>
#define CHECK_CUDA(err) \
{ \
if (err != cudaSuccess) { \
std::cerr << __FILE__ << ":" << __LINE__ \
<< " - CUDA Error: " << cudaGetErrorString(err) << '\n'; \
exit(1); \
} \
}
#define CHECK_NVJPEG(err) \
{ \
if (err != NVJPEG_STATUS_SUCCESS) { \
std::cerr << __FILE__ << ":" << __LINE__ \
<< " - nvJPEG Error: " << err << '\n'; \
exit(1); \
} \
}
std::vector<uint8_t> readBinaryFile(const std::string& filename) {
// Open the file in binary mode
std::ifstream file(filename, std::ios::binary);
// Check if the file was opened successfully
if (!file) {
std::cerr << "Error opening file: " << filename << '\n';
return {};
}
// Get the size of the file
file.seekg(0, std::ios::end);
auto file_size = file.tellg();
file.seekg(0, std::ios::beg);
// Create a vector to hold the binary data
std::vector<uint8_t> buffer(file_size);
// Read the file into the vector
file.read(reinterpret_cast<char*>(buffer.data()), file_size);
// Close the file
file.close();
return buffer;
}
void writeBinaryFile(const std::string& filename,
const std::vector<uint8_t>& buffer) {
// Open the file in binary mode
std::ofstream file(filename, std::ios::binary);
// Check if the file was opened successfully
if (!file) {
std::cerr << "Error opening file: " << filename << '\n';
return;
}
// Write the file
file.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
// Close the file
file.close();
}
int devMalloc(void* /*ctx*/, void** p, size_t size, cudaStream_t stream) {
return (int)cudaMallocAsync(p, size, stream);
}
int devFree(void* /*ctx*/, void* p, size_t /*size*/, cudaStream_t stream) {
return (int)cudaFreeAsync(p, stream);
}
int pinnedMalloc(void* /*ctx*/, void** p, size_t size,
cudaStream_t /*stream*/) {
return (int)cudaMallocHost(p, size);
}
int pinnedFree(void* /*ctx*/, void* p, size_t /*size*/,
cudaStream_t /*stream*/) {
return (int)cudaFreeHost(p);
}
int main(int argc, char* argv[]) {
if (argc != 5) {
std::cout << argv[0] << " input_file width height output_file\n";
return EXIT_FAILURE;
}
std::string input_file = argv[1];
std::string output_file = argv[4];
size_t width = std::atoi(argv[2]);
size_t height = std::atoi(argv[3]);
auto input = readBinaryFile(input_file);
if (input.empty()) {
return EXIT_FAILURE;
}
if (input.size() != height * width) {
std::cerr << "input.size(): " << input.size() << " != height * width ("
<< height * width << ")\n";
return EXIT_FAILURE;
}
cudaStream_t stream = nullptr;
cudaEvent_t start_ev = nullptr;
CHECK_CUDA(cudaEventCreateWithFlags(&start_ev, cudaEventDefault));
cudaEvent_t end_ev = nullptr;
CHECK_CUDA(cudaEventCreateWithFlags(&end_ev, cudaEventDefault));
unsigned char* d_input = nullptr;
// allocate GPU buffer for input image
CHECK_CUDA(
cudaMallocAsync(&d_input, input.size() * sizeof(uint8_t), stream));
// copy input to GPU
CHECK_CUDA(cudaMemcpyAsync(d_input, input.data(),
input.size() * sizeof(uint8_t),
cudaMemcpyHostToDevice, stream));
nvjpegDevAllocatorV2_t dev_allocator = {&devMalloc, &devFree, nullptr};
nvjpegPinnedAllocatorV2_t pinned_allocator = {&pinnedMalloc, &pinnedFree,
nullptr};
nvjpegHandle_t nv_handle = nullptr;
CHECK_NVJPEG(nvjpegCreateExV2(NVJPEG_BACKEND_LOSSLESS_JPEG, &dev_allocator,
&pinned_allocator, NVJPEG_FLAGS_DEFAULT,
&nv_handle));
nvjpegEncoderState_t nv_enc_state = nullptr;
CHECK_NVJPEG(nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream));
nvjpegEncoderParams_t nv_enc_params = nullptr;
CHECK_NVJPEG(nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream));
nvjpegImage_t imgdesc = {{d_input, nullptr}, {width, 0}};
CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(
nv_enc_params, NVJPEG_CSS_GRAY, stream));
int optimal = 0; // no huffman optimization
// int optimal = 1; // huffman optimization little bit smaller but way slower!
CHECK_NVJPEG(
nvjpegEncoderParamsSetOptimizedHuffman(nv_enc_params, optimal, stream));
int quality = 100;
CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(nv_enc_params, quality, stream));
// --------------------- Set encoding -----------------------------
// OK faster than NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN and smaller files
CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
nv_enc_params, NVJPEG_ENCODING_BASELINE_DCT, stream));
// KO
// CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
// nv_enc_params, NVJPEG_ENCODING_EXTENDED_SEQUENTIAL_DCT_HUFFMAN,
// stream));
// OK
// CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
// nv_enc_params, NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, stream));
// KO
// CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
// nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));
// -----------------------------------------------------------------
CHECK_CUDA(cudaEventRecord(start_ev, stream));
// Compress image
CHECK_NVJPEG(nvjpegEncodeYUV(nv_handle, nv_enc_state, nv_enc_params,
&imgdesc, NVJPEG_CSS_GRAY, width, height,
stream));
CHECK_CUDA(cudaEventRecord(end_ev, stream));
// reusing input for the output
// if compression works, it should be large enough
size_t encoded_length = input.size();
CHECK_NVJPEG(nvjpegEncodeRetrieveBitstreamDevice(
nv_handle, nv_enc_state, d_input, &encoded_length, stream));
// not sure it is necessary
CHECK_CUDA(cudaStreamSynchronize(stream));
std::vector<uint8_t> output(encoded_length);
CHECK_CUDA(cudaMemcpyAsync(output.data(), d_input,
encoded_length * sizeof(uint8_t),
cudaMemcpyDeviceToHost, stream));
CHECK_CUDA(cudaFreeAsync(d_input, stream));
CHECK_CUDA(cudaStreamSynchronize(stream));
float duration_ms = 0;
CHECK_CUDA(cudaEventElapsedTime(&duration_ms, start_ev, end_ev));
std::cout << "JPEG encode duration: " << duration_ms << " ms / channel\n";
// cleanup
CHECK_NVJPEG(nvjpegEncoderParamsDestroy(nv_enc_params));
CHECK_NVJPEG(nvjpegEncoderStateDestroy(nv_enc_state));
CHECK_NVJPEG(nvjpegDestroy(nv_handle));
CHECK_CUDA(cudaEventDestroy(end_ev));
CHECK_CUDA(cudaEventDestroy(start_ev));
// write file to disk
writeBinaryFile(output_file, output);
}
If instead, I enable lossless compression with :
CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(
nv_enc_params, NVJPEG_ENCODING_LOSSLESS_HUFFMAN, stream));
I get an error 2 (invalid parameter) on that line.
Any idea on how to activate lossless compression please?