/* * jchuff.c * * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright (C) 2009-2011, 2014-2016, 2018-2022, D. R. Commander. * Copyright (C) 2015, Matthieu Darbois. * Copyright (C) 2018, Matthias Räncker. * Copyright (C) 2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * * This file contains Huffman entropy encoding routines. * * Much of the complexity here has to do with supporting output suspension. * If the data destination module demands suspension, we want to be able to * back up to the start of the current MCU. To do this, we copy state * variables into local working storage, and update them back to the * permanent JPEG objects only upon successful completion of an MCU. * * NOTE: All referenced figures are from * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. */ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" #include "jsimd.h" #include <limits.h> /* * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be * used for bit counting rather than the lookup table. This will reduce the * memory footprint by 64k, which is important for some mobile applications * that create many isolated instances of libjpeg-turbo (web browsers, for * instance.) This may improve performance on some mobile platforms as well. * This feature is enabled by default only on Arm processors, because some x86 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be * shown to have a significant performance impact even on the x86 chips that * have a fast implementation of it. When building for Armv6, you can * explicitly disable the use of clz/bsr by adding -mthumb to the compiler * flags (this defines __thumb__). */ /* NOTE: Both GCC and Clang define __GNUC__ */ #if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \ defined(_M_ARM) || defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC #if defined(_MSC_VER) && !defined(__clang__) #define JPEG_NBITS_NONZERO … #else #define JPEG_NBITS_NONZERO … #endif #define JPEG_NBITS … #else #include "jpeg_nbits_table.h" #define JPEG_NBITS(x) … #define JPEG_NBITS_NONZERO(x) … #endif /* Expanded entropy encoder object for Huffman encoding. * * The savable_state subrecord contains fields that change within an MCU, * but must not be updated permanently until we complete the MCU. */ #if defined(__x86_64__) && defined(__ILP32__) typedef unsigned long long bit_buf_type; #else bit_buf_type; #endif /* NOTE: The more optimal Huffman encoding algorithm is only used by the * intrinsics implementation of the Arm Neon SIMD extensions, which is why we * retain the old Huffman encoder behavior when using the GAS implementation. */ #if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \ defined(_M_ARM) || defined(_M_ARM64)) simd_bit_buf_type; #else typedef bit_buf_type simd_bit_buf_type; #endif #if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \ (defined(__x86_64__) && defined(__ILP32__)) #define BIT_BUF_SIZE … #elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32) #define BIT_BUF_SIZE … #else #error Cannot determine word size #endif #define SIMD_BIT_BUF_SIZE … savable_state; huff_entropy_encoder; huff_entropy_ptr; /* Working state while writing an MCU. * This struct contains all the fields that are needed by subroutines. */ working_state; /* Forward declarations */ METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data); METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo); #ifdef ENTROPY_OPT_SUPPORTED METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data); METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo); #endif /* * Initialize for a Huffman-compressed scan. * If gather_statistics is TRUE, we do not output anything during the scan, * just count the Huffman symbols used and generate Huffman code tables. */ METHODDEF(void) start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics) { … } /* * Compute the derived values for a Huffman table. * This routine also performs some validation checks on the table. * * Note this is also used by jcphuff.c. */ GLOBAL(void) jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno, c_derived_tbl **pdtbl) { … } /* Outputting bytes to the file */ /* Emit a byte, taking 'action' if must suspend. */ #define emit_byte(state, val, action) … LOCAL(boolean) dump_buffer(working_state *state) /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ { … } /* Outputting bits to the file */ /* Output byte b and, speculatively, an additional 0 byte. 0xFF must be * encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the * byte is 0xFF. Otherwise, the output buffer pointer is advanced by 1, and * the speculative 0 byte will be overwritten by the next byte. */ #define EMIT_BYTE(b) … /* Output the entire bit buffer. If there are no 0xFF bytes in it, then write * directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to * encode 0xFF as 0xFF 0x00. */ #if BIT_BUF_SIZE == 64 #define FLUSH() … #else #define FLUSH … #endif /* Fill the bit buffer to capacity with the leading bits from code, then output * the bit buffer and put the remaining bits from code into the bit buffer. */ #define PUT_AND_FLUSH(code, size) … /* Insert code into the bit buffer and output the bit buffer if needed. * NOTE: We can't flush with free_bits == 0, since the left shift in * PUT_AND_FLUSH() would have undefined behavior. */ #define PUT_BITS(code, size) … #define PUT_CODE(code, size) … /* Although it is exceedingly rare, it is possible for a Huffman-encoded * coefficient block to be larger than the 128-byte unencoded block. For each * of the 64 coefficients, PUT_BITS is invoked twice, and each invocation can * theoretically store 16 bits (for a maximum of 2048 bits or 256 bytes per * encoded block.) If, for instance, one artificially sets the AC * coefficients to alternating values of 32767 and -32768 (using the JPEG * scanning order-- 1, 8, 16, etc.), then this will produce an encoded block * larger than 200 bytes. */ #define BUFSIZE … #define LOAD_BUFFER() … #define STORE_BUFFER() … LOCAL(boolean) flush_bits(working_state *state) { … } /* Encode a single block's worth of coefficients */ LOCAL(boolean) encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val, c_derived_tbl *dctbl, c_derived_tbl *actbl) { … } LOCAL(boolean) encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val, c_derived_tbl *dctbl, c_derived_tbl *actbl) { … } /* * Emit a restart marker & resynchronize predictions. */ LOCAL(boolean) emit_restart(working_state *state, int restart_num) { … } /* * Encode and output one MCU's worth of Huffman-compressed coefficients. */ METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { … } /* * Finish up at the end of a Huffman-compressed scan. */ METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo) { … } /* * Huffman coding optimization. * * We first scan the supplied data and count the number of uses of each symbol * that is to be Huffman-coded. (This process MUST agree with the code above.) * Then we build a Huffman coding tree for the observed counts. * Symbols which are not needed at all for the particular image are not * assigned any code, which saves space in the DHT marker as well as in * the compressed data. */ #ifdef ENTROPY_OPT_SUPPORTED /* Process a single block's worth of coefficients */ LOCAL(void) htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, long dc_counts[], long ac_counts[]) { … } /* * Trial-encode one MCU's worth of Huffman-compressed coefficients. * No data is actually output, so no suspension return is possible. */ METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { … } /* * Generate the best Huffman code table for the given counts, fill htbl. * Note this is also used by jcphuff.c. * * The JPEG standard requires that no symbol be assigned a codeword of all * one bits (so that padding bits added at the end of a compressed segment * can't look like a valid code). Because of the canonical ordering of * codewords, this just means that there must be an unused slot in the * longest codeword length category. Annex K (Clause K.2) of * Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 suggests reserving such a slot * by pretending that symbol 256 is a valid symbol with count 1. In theory * that's not optimal; giving it count zero but including it in the symbol set * anyway should give a better Huffman code. But the theoretically better code * actually seems to come out worse in practice, because it produces more * all-ones bytes (which incur stuffed zero bytes in the final file). In any * case the difference is tiny. * * The JPEG standard requires Huffman codes to be no more than 16 bits long. * If some symbols have a very small but nonzero probability, the Huffman tree * must be adjusted to meet the code length restriction. We currently use * the adjustment method suggested in JPEG section K.2. This method is *not* * optimal; it may not choose the best possible limited-length code. But * typically only very-low-frequency symbols will be given less-than-optimal * lengths, so the code is almost optimal. Experimental comparisons against * an optimal limited-length-code algorithm indicate that the difference is * microscopic --- usually less than a hundredth of a percent of total size. * So the extra complexity of an optimal algorithm doesn't seem worthwhile. */ GLOBAL(void) jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) { … } /* * Finish up a statistics-gathering pass and create the new Huffman tables. */ METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo) { … } #endif /* ENTROPY_OPT_SUPPORTED */ /* * Module initialization routine for Huffman entropy encoding. */ GLOBAL(void) jinit_huff_encoder(j_compress_ptr cinfo) { … }