/* * jcdctmgr.c * * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB * Copyright (C) 2011, 2014-2015, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * * This file contains the forward-DCT management logic. * This code selects a particular DCT implementation to be used, * and it performs related housekeeping chores including coefficient * quantization. */ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" #include "jdct.h" /* Private declarations for DCT subsystem */ #include "jsimddct.h" /* Private subobject for this module */ forward_DCT_method_ptr; float_DCT_method_ptr; convsamp_method_ptr; float_convsamp_method_ptr; quantize_method_ptr; float_quantize_method_ptr; METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *); my_fdct_controller; my_fdct_ptr; #if BITS_IN_JSAMPLE == 8 /* * Find the highest bit in an integer through binary search. */ LOCAL(int) flss(UINT16 val) { … } /* * Compute values to do a division using reciprocal. * * This implementation is based on an algorithm described in * "How to optimize for the Pentium family of microprocessors" * (http://www.agner.org/assem/). * More information about the basic algorithm can be found in * the paper "Integer Division Using Reciprocals" by Robert Alverson. * * The basic idea is to replace x/d by x * d^-1. In order to store * d^-1 with enough precision we shift it left a few places. It turns * out that this algoright gives just enough precision, and also fits * into DCTELEM: * * b = (the number of significant bits in divisor) - 1 * r = (word size) + b * f = 2^r / divisor * * f will not be an integer for most cases, so we need to compensate * for the rounding error introduced: * * no fractional part: * * result = input >> r * * fractional part of f < 0.5: * * round f down to nearest integer * result = ((input + 1) * f) >> r * * fractional part of f > 0.5: * * round f up to nearest integer * result = (input * f) >> r * * This is the original algorithm that gives truncated results. But we * want properly rounded results, so we replace "input" with * "input + divisor/2". * * In order to allow SIMD implementations we also tweak the values to * allow the same calculation to be made at all times: * * dctbl[0] = f rounded to nearest integer * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5) * dctbl[2] = 1 << ((word size) * 2 - r) * dctbl[3] = r - (word size) * * dctbl[2] is for stupid instruction sets where the shift operation * isn't member wise (e.g. MMX). * * The reason dctbl[2] and dctbl[3] reduce the shift with (word size) * is that most SIMD implementations have a "multiply and store top * half" operation. * * Lastly, we store each of the values in their own table instead * of in a consecutive manner, yet again in order to allow SIMD * routines. */ LOCAL(int) compute_reciprocal(UINT16 divisor, DCTELEM *dtbl) { … } #endif /* * Initialize for a processing pass. * Verify that all referenced Q-tables are present, and set up * the divisor table for each one. * In the current implementation, DCT of all components is done during * the first pass, even if only some components will be output in the * first scan. Hence all components should be examined here. */ METHODDEF(void) start_pass_fdctmgr(j_compress_ptr cinfo) { … } /* * Load data into workspace, applying unsigned->signed conversion. */ METHODDEF(void) convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) { … } /* * Quantize/descale the coefficients, and store into coef_blocks[]. */ METHODDEF(void) quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { … } /* * Perform forward DCT on one or more blocks of a component. * * The input samples are taken from the sample_data[] array starting at * position start_row/start_col, and moving to the right for any additional * blocks. The quantized coefficients are returned in coef_blocks[]. */ METHODDEF(void) forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks) /* This version is used for integer DCT implementations. */ { … } #ifdef DCT_FLOAT_SUPPORTED METHODDEF(void) convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace) { … } METHODDEF(void) quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace) { … } METHODDEF(void) forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks) /* This version is used for floating-point DCT implementations. */ { … } #endif /* DCT_FLOAT_SUPPORTED */ /* * Initialize FDCT manager. */ GLOBAL(void) jinit_forward_dct(j_compress_ptr cinfo) { … }