JPEG Compression using NPP Proof of Concept

PapaSmurf007 · January 28, 2011, 10:17pm

I had some issues getting the JPEG compression related functions to work due to bugs and vague documentation, but here is some proof-of-concept code that may save you some time:

/*

 The MIT License

Copyright (c) 2010 Stephen Rhein

Permission is hereby granted, free of charge, to any person obtaining a copy

 of this software and associated documentation files (the "Software"), to deal

 in the Software without restriction, including without limitation the rights

 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

 copies of the Software, and to permit persons to whom the Software is

 furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in

 all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

 THE SOFTWARE.

 */

/*

 * This example proof of concept has been derived from a comparable example

 * from the Intel Performance Primitives documentation at the following link:

 * http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/ippxe/ipp_manual_lnx/IPPI/ippi_ch15/functn_DCTQuantFwd8x8LS_JPEG.htm#ex15-4

 */

#include <npp.h>

#include <cuda_runtime.h>

#include <Exceptions.h>

#include <stdio.h>

#include <stdlib.h>

//Source block for DCT transform

const Npp8u src[8 * 8] = { 4, 4, 4, 4, 4, 4, 4, 4,

4, 3, 3, 3, 3, 3, 3, 4,

4, 3, 2, 2, 2, 2, 3, 4,

4, 3, 2, 1, 1, 2, 3, 4,

4, 3, 2, 1, 1, 2, 3, 4,

4, 3, 2, 2, 2, 2, 3, 4,

4, 3, 3, 3, 3, 3, 3, 4,

4, 4, 4, 4, 4, 4, 4, 4 };

//Raw Quantization Table

Npp8u pQuantRawTable[64] = { 16, 11, 12, 14, 12, 10, 16, 14,

13, 14, 18, 17, 16, 19, 24, 40,

26, 24, 22, 22, 24, 49, 35, 37,

29, 40, 58, 51, 61, 60, 57, 51,

56, 55, 64, 72, 92, 78, 64, 68,

87, 69, 55, 56, 80, 109, 81, 87,

95, 98, 103, 104, 103, 62, 77, 113,

121, 112, 100, 120, 92, 101, 103, 99 };

//Quality factor used to transform quantization table

const int quality = 75;

//Scale factor used to avoid division during quantization

const int scale = (1 << 15);

/*

 * convert_zigzag2natural[i] is the natural-order position of the i'th element

 * of zigzag order.

 */

const int convert_zigzag2natural[64] = { 0, 1, 8, 16, 9, 2, 3, 10,

17, 24, 32, 25, 18, 11, 4, 5,

12, 19, 26, 33, 40, 48, 41, 34,

27, 20, 13, 6, 7, 14, 21, 28,

35, 42, 49, 56, 57, 50, 43, 36,

29, 22, 15, 23, 30, 37, 44, 51,

58, 59, 52, 45, 38, 31, 39, 46,

53, 60, 61, 54, 47, 55, 62, 63 };

int main() {

	try {

		Npp16u pQuantFwdTable[64];

		Npp16u pQuantInvTable[64];

		Npp16s dstDCT[64];

		Npp8u dstIDCT[64];

		NppiSize invDCTroi;

		NppiSize fwdDCTroi;

		Npp8u *devSrc;

		Npp16s *devDstDCT;

		Npp8u *devDstIDCT;

		Npp16u *devPQuantFwdTable;

		Npp16u* devPQuantInvTable;

		//Allocate device memory and initialize variables

		NPP_CHECK_CUDA(cudaMalloc(&devSrc,64 * sizeof(Npp8u)));

		NPP_CHECK_CUDA(cudaMalloc(&devDstIDCT,64 * sizeof(Npp8u)));

		NPP_CHECK_CUDA(cudaMalloc(&devDstDCT,64 * sizeof(Npp16s)));

		NPP_CHECK_CUDA(cudaMalloc(&devPQuantFwdTable,64 * sizeof(Npp16u)));

		NPP_CHECK_CUDA(cudaMalloc(&devPQuantInvTable,64 * sizeof(Npp16u)));

		//Forward DCT regions of interest are pixel based

		fwdDCTroi.height = 8;

		fwdDCTroi.width = 8;

		//Inverse DCT regions of interest are coefficient based

		invDCTroi.height = 1;

		invDCTroi.width = 64;

		//Transform raw quantization table according to quality factor

		NPP_CHECK_NPP(nppiQuantFwdRawTableInit_JPEG_8u(pQuantRawTable, quality));

		//The function below has a bug, but the loop below achieves the correct result.

		//NPP_CHECK_NPP(nppiQuantFwdTableInit_JPEG_8u16u(pQuantRawTable, pQuantFwdTable));

		for (int i = 0; i < 64; ++i) {

			pQuantFwdTable[convert_zigzag2natural[i]] = (scale

					/ (double) pQuantRawTable[i]) + 0.5;

		}

		NPP_CHECK_CUDA(cudaMemcpy(devPQuantFwdTable, pQuantFwdTable,64 * sizeof(Npp16u),cudaMemcpyHostToDevice));

		//The function below has a bug, but the loop below achieves the correct result.

		//NPP_CHECK_NPP(nppiQuantInvTableInit_JPEG_8u16u(pQuantRawTable, pQuantInvTable));

		for (int i = 0; i < 64; ++i) {

			pQuantInvTable[convert_zigzag2natural[i]] = pQuantRawTable[i];

		}

		NPP_CHECK_CUDA(cudaMemcpy(devPQuantInvTable, pQuantInvTable,64 * sizeof(Npp16u),cudaMemcpyHostToDevice));

		//Copy src to device and begin transformations

		NPP_CHECK_CUDA(cudaMemcpy(devSrc,src,64 * sizeof(Npp8u),cudaMemcpyHostToDevice));

		//Perform forward DCT

		NPP_CHECK_NPP(nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R(devSrc, 8

						* sizeof(Npp8u), devDstDCT, 64 * sizeof(Npp16s), devPQuantFwdTable,

						fwdDCTroi));

		NPP_CHECK_CUDA(cudaMemcpy(dstDCT,devDstDCT,64 * sizeof(Npp16s),cudaMemcpyDeviceToHost));

		//Perform inverse DCT

		NPP_CHECK_NPP(nppiDCTQuantInv8x8LS_JPEG_16s8u_C1R(devDstDCT, 64

						* sizeof(Npp16s), devDstIDCT, 8 * sizeof(Npp8u), devPQuantInvTable,

						invDCTroi));

		NPP_CHECK_CUDA(cudaMemcpy(dstIDCT,devDstIDCT,64 * sizeof(Npp8u),cudaMemcpyDeviceToHost));

		//PRINT RESULTS

		puts("src");

		for (int i = 0; i < 8; ++i) {

			for (int j = 0; j < 8; ++j) {

				printf("%4u", src[i * 8 + j]);

			}

			puts("");

		}

		puts("");

		puts("pQuantRawTable (with quality)");

		for (int i = 0; i < 8; ++i) {

			for (int j = 0; j < 8; ++j) {

				printf("%4u", pQuantRawTable[i * 8 + j]);

			}

			puts("");

		}

		puts("");

		puts("pQuantFwdTable");

		for (int i = 0; i < 8; ++i) {

			for (int j = 0; j < 8; ++j) {

				printf("%6u", pQuantFwdTable[i * 8 + j]);

			}

			puts("");

		}

		puts("");

		puts("pQuantInvTable");

		for (int i = 0; i < 8; ++i) {

			for (int j = 0; j < 8; ++j) {

				printf("%6u", pQuantInvTable[i * 8 + j]);

			}

			puts("");

		}

		puts("");

		puts("dstDCT");

		for (int i = 0; i < 8; ++i) {

			for (int j = 0; j < 8; ++j) {

				printf("%7d", dstDCT[i * 8 + j]);

			}

			puts("");

		}

		puts("");

		puts("dstIDCT");

		for (int i = 0; i < 8; ++i) {

			for (int j = 0; j < 8; ++j) {

				printf("%4d", dstIDCT[i * 8 + j]);

			}

			puts("");

		}

		puts("Lossyness");

		for (int i = 0; i < 8; ++i) {

			for (int j = 0; j < 8; ++j) {

				printf("%5d", src[i * 8 + j] - dstIDCT[i * 8 + j]);

			}

			puts("");

		}

	} catch (npp::Exception e) {

		printf("%s\n", e.toString().c_str());

		return EXIT_FAILURE;

	}

	return EXIT_SUCCESS;

}

gearoid.p.murphy · September 25, 2018, 7:54pm

Thanks so much for documenting your work! :)

PapaSmurf007 · November 8, 2018, 4:14am

My pleasure, I’m glad it was still useful after 7.5 years :)

malikanhar · July 10, 2019, 6:45am

Hi PapaSmurf007, is it possible for me to do jpeg encoding in C? I have already convert RGB buffer to YUV buffer, and now i want to encode the YUV buffer as jpeg so I can save the image. Thanks in advance

Topic		Replies	Views
JPEG Compression in CUDA CUDA Programming and Performance	2	2110	December 29, 2009
How to use NPP library for JPEG encoding? CUDA Programming and Performance	0	8854	December 22, 2010
How to use NPP? CUDA Programming and Performance	0	3631	December 8, 2010
lossless JPEG fast decompression on CUDA CUDA Programming and Performance	7	17015	May 3, 2012
nppi to encode a jpeg file CUDA Programming and Performance	1	869	March 9, 2018
NPP 3.2 JPEG Forward Quantization Problem Inconsistencies between demo code on IPP and NPP CUDA Programming and Performance	5	5448	July 14, 2011
NPP JPEG Compression problem nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R parameters CUDA Programming and Performance	1	8391	April 6, 2011
Error! Jpeg Compression using NPP CUDA Programming and Performance	4	10218	January 13, 2010
How to use NPP? use NPP library to save file as jpep GPU-Accelerated Libraries	1	850	July 10, 2019
NPP support encode JPEG progressive? GPU-Accelerated Libraries	0	450	April 9, 2018

JPEG Compression using NPP Proof of Concept

Related topics