/* trees.c -- output deflated data using Huffman coding * Copyright (C) 1995-2024 Jean-loup Gailly * detect_data_type() function provided freely by Cosmin Truta, 2006 * For conditions of distribution and use, see copyright notice in zlib.h */ /* * ALGORITHM * * The "deflation" process uses several Huffman trees. The more * common source values are represented by shorter bit sequences. * * Each code tree is stored in a compressed form which is itself * a Huffman encoding of the lengths of all the code strings (in * ascending order by source values). The actual code strings are * reconstructed from the lengths in the inflate process, as described * in the deflate specification. * * REFERENCES * * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc * * Storer, James A. * Data Compression: Methods and Theory, pp. 49-50. * Computer Science Press, 1988. ISBN 0-7167-8156-5. * * Sedgewick, R. * Algorithms, p290. * Addison-Wesley, 1983. ISBN 0-201-06672-6. */ /* @(#) $Id$ */ /* #define GEN_TREES_H */ #include "deflate.h" #ifdef ZLIB_DEBUG # include <ctype.h> #endif /* =========================================================================== * Constants */ #define MAX_BL_BITS … /* Bit length codes must not exceed MAX_BL_BITS bits */ #define END_BLOCK … /* end of block literal code */ #define REP_3_6 … /* repeat previous bit length 3-6 times (2 bits of repeat count) */ #define REPZ_3_10 … /* repeat a zero length 3-10 times (3 bits of repeat count) */ #define REPZ_11_138 … /* repeat a zero length 11-138 times (7 bits of repeat count) */ local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = …; local const int extra_dbits[D_CODES] /* extra bits for each distance code */ = …; local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ = …; local const uch bl_order[BL_CODES] = …; /* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */ /* =========================================================================== * Local data. These are initialized only once. */ #define DIST_CODE_LEN … #if defined(GEN_TREES_H) || !defined(STDC) /* non ANSI compilers may not accept trees.h */ local ct_data static_ltree[L_CODES+2]; /* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see _tr_init * below). */ local ct_data static_dtree[D_CODES]; /* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */ uch _dist_code[DIST_CODE_LEN]; /* Distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */ uch _length_code[MAX_MATCH-MIN_MATCH+1]; /* length code for each normalized match length (0 == MIN_MATCH) */ local int base_length[LENGTH_CODES]; /* First normalized length for each code (0 = MIN_MATCH) */ local int base_dist[D_CODES]; /* First normalized distance for each code (0 = distance of 1) */ #else # include "trees.h" #endif /* GEN_TREES_H */ struct static_tree_desc_s { … }; #ifdef NO_INIT_GLOBAL_POINTERS #define TCONST #else #define TCONST … #endif local TCONST static_tree_desc static_l_desc = …; local TCONST static_tree_desc static_d_desc = …; local TCONST static_tree_desc static_bl_desc = …; /* =========================================================================== * Output a short LSB first on the stream. * IN assertion: there is enough room in pendingBuf. */ #define put_short(s, w) … /* =========================================================================== * Reverse the first len bits of a code, using straightforward code (a faster * method would use a table) * IN assertion: 1 <= len <= 15 */ local unsigned bi_reverse(unsigned code, int len) { … } /* =========================================================================== * Flush the bit buffer, keeping at most 7 bits in it. */ local void bi_flush(deflate_state *s) { … } /* =========================================================================== * Flush the bit buffer and align the output on a byte boundary */ local void bi_windup(deflate_state *s) { … } /* =========================================================================== * Generate the codes for a given tree and bit counts (which need not be * optimal). * IN assertion: the array bl_count contains the bit length statistics for * the given tree and the field len is set for all tree elements. * OUT assertion: the field code is set for all tree elements of non * zero code length. */ local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) { … } #ifdef GEN_TREES_H local void gen_trees_header(void); #endif #ifndef ZLIB_DEBUG #define send_code(s, c, tree) … /* Send a code of the given tree. c and tree must not have side effects */ #else /* !ZLIB_DEBUG */ #define send_code … #endif /* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. */ #ifdef ZLIB_DEBUG local void send_bits(deflate_state *s, int value, int length) { Tracevv((stderr," l %2d v %4x ", length, value)); Assert(length > 0 && length <= 15, "invalid length"); s->bits_sent += (ulg)length; /* If not enough room in bi_buf, use (valid) bits from bi_buf and * (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid)) * unused bits in value. */ if (s->bi_valid > (int)Buf_size - length) { s->bi_buf |= (ush)value << s->bi_valid; put_short(s, s->bi_buf); s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); s->bi_valid += length - Buf_size; } else { s->bi_buf |= (ush)value << s->bi_valid; s->bi_valid += length; } } #else /* !ZLIB_DEBUG */ #define send_bits(s, value, length) … #endif /* ZLIB_DEBUG */ /* the arguments must not have side effects */ /* =========================================================================== * Initialize the various 'constant' tables. */ local void tr_static_init(void) { … } /* =========================================================================== * Generate the file trees.h describing the static trees. */ #ifdef GEN_TREES_H # ifndef ZLIB_DEBUG # include <stdio.h> # endif #define SEPARATOR … void gen_trees_header(void) { FILE *header = fopen("trees.h", "w"); int i; Assert (header != NULL, "Can't open trees.h"); fprintf(header, "/* header created automatically with -DGEN_TREES_H */\n\n"); fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); for (i = 0; i < L_CODES+2; i++) { fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); } fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); for (i = 0; i < D_CODES; i++) { fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); } fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); for (i = 0; i < DIST_CODE_LEN; i++) { fprintf(header, "%2u%s", _dist_code[i], SEPARATOR(i, DIST_CODE_LEN-1, 20)); } fprintf(header, "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { fprintf(header, "%2u%s", _length_code[i], SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); } fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); for (i = 0; i < LENGTH_CODES; i++) { fprintf(header, "%1u%s", base_length[i], SEPARATOR(i, LENGTH_CODES-1, 20)); } fprintf(header, "local const int base_dist[D_CODES] = {\n"); for (i = 0; i < D_CODES; i++) { fprintf(header, "%5u%s", base_dist[i], SEPARATOR(i, D_CODES-1, 10)); } fclose(header); } #endif /* GEN_TREES_H */ /* =========================================================================== * Initialize a new block. */ local void init_block(deflate_state *s) { … } /* =========================================================================== * Initialize the tree data structures for a new zlib stream. */ void ZLIB_INTERNAL _tr_init(deflate_state *s) { … } #define SMALLEST … /* Index within the heap array of least frequent node in the Huffman tree */ /* =========================================================================== * Remove the smallest element from the heap and recreate the heap with * one less element. Updates heap and heap_len. */ #define pqremove(s, tree, top) … /* =========================================================================== * Compares to subtrees, using the tree depth as tie breaker when * the subtrees have equal frequency. This minimizes the worst case length. */ #define smaller(tree, n, m, depth) … /* =========================================================================== * Restore the heap property by moving down the tree starting at node k, * exchanging a node with the smallest of its two sons if necessary, stopping * when the heap property is re-established (each father smaller than its * two sons). */ local void pqdownheap(deflate_state *s, ct_data *tree, int k) { … } /* =========================================================================== * Compute the optimal bit lengths for a tree and update the total bit length * for the current block. * IN assertion: the fields freq and dad are set, heap[heap_max] and * above are the tree nodes sorted by increasing frequency. * OUT assertions: the field len is set to the optimal bit length, the * array bl_count contains the frequencies for each bit length. * The length opt_len is updated; static_len is also updated if stree is * not null. */ local void gen_bitlen(deflate_state *s, tree_desc *desc) { … } #ifdef DUMP_BL_TREE # include <stdio.h> #endif /* =========================================================================== * Construct one Huffman tree and assigns the code bit strings and lengths. * Update the total bit length for the current block. * IN assertion: the field freq is set for all tree elements. * OUT assertions: the fields len and code are set to the optimal bit length * and corresponding code. The length opt_len is updated; static_len is * also updated if stree is not null. The field max_code is set. */ local void build_tree(deflate_state *s, tree_desc *desc) { … } /* =========================================================================== * Scan a literal or distance tree to determine the frequencies of the codes * in the bit length tree. */ local void scan_tree(deflate_state *s, ct_data *tree, int max_code) { … } /* =========================================================================== * Send a literal or distance tree in compressed form, using the codes in * bl_tree. */ local void send_tree(deflate_state *s, ct_data *tree, int max_code) { … } /* =========================================================================== * Construct the Huffman tree for the bit lengths and return the index in * bl_order of the last bit length code to send. */ local int build_bl_tree(deflate_state *s) { … } /* =========================================================================== * Send the header for a block using dynamic Huffman trees: the counts, the * lengths of the bit length codes, the literal tree and the distance tree. * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. */ local void send_all_trees(deflate_state *s, int lcodes, int dcodes, int blcodes) { … } /* =========================================================================== * Send a stored block */ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, ulg stored_len, int last) { … } /* =========================================================================== * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) */ void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) { … } /* =========================================================================== * Send one empty static block to give enough lookahead for inflate. * This takes 10 bits, of which 7 may remain in the bit buffer. */ void ZLIB_INTERNAL _tr_align(deflate_state *s) { … } /* =========================================================================== * Send the block data compressed using the given Huffman trees */ local void compress_block(deflate_state *s, const ct_data *ltree, const ct_data *dtree) { … } /* =========================================================================== * Check if the data type is TEXT or BINARY, using the following algorithm: * - TEXT if the two conditions below are satisfied: * a) There are no non-portable control characters belonging to the * "block list" (0..6, 14..25, 28..31). * b) There is at least one printable character belonging to the * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). * - BINARY otherwise. * - The following partially-portable control characters form a * "gray list" that is ignored in this detection algorithm: * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). * IN assertion: the fields Freq of dyn_ltree are set. */ local int detect_data_type(deflate_state *s) { … } /* =========================================================================== * Determine the best encoding for the current block: dynamic trees, static * trees or store, and write out the encoded block. */ void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, ulg stored_len, int last) { … } /* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */ int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { … }