/* +++ trees.c */ /* trees.c -- output deflated data using Huffman coding * Copyright (C) 1995-1996 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ /* * ALGORITHM * * The "deflation" process uses several Huffman trees. The more * common source values are represented by shorter bit sequences. * * Each code tree is stored in a compressed form which is itself * a Huffman encoding of the lengths of all the code strings (in * ascending order by source values). The actual code strings are * reconstructed from the lengths in the inflate process, as described * in the deflate specification. * * REFERENCES * * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc * * Storer, James A. * Data Compression: Methods and Theory, pp. 49-50. * Computer Science Press, 1988. ISBN 0-7167-8156-5. * * Sedgewick, R. * Algorithms, p290. * Addison-Wesley, 1983. ISBN 0-201-06672-6. */ /* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */ /* #include "deflate.h" */ #include <linux/zutil.h> #include <linux/bitrev.h> #include "defutil.h" #ifdef DEBUG_ZLIB # include <ctype.h> #endif /* =========================================================================== * Constants */ #define MAX_BL_BITS … /* Bit length codes must not exceed MAX_BL_BITS bits */ #define END_BLOCK … /* end of block literal code */ #define REP_3_6 … /* repeat previous bit length 3-6 times (2 bits of repeat count) */ #define REPZ_3_10 … /* repeat a zero length 3-10 times (3 bits of repeat count) */ #define REPZ_11_138 … /* repeat a zero length 11-138 times (7 bits of repeat count) */ static const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = …; static const int extra_dbits[D_CODES] /* extra bits for each distance code */ = …; static const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ = …; static const uch bl_order[BL_CODES] = …; /* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */ /* =========================================================================== * Local data. These are initialized only once. */ static ct_data static_ltree[L_CODES+2]; /* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see zlib_tr_init * below). */ static ct_data static_dtree[D_CODES]; /* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */ static uch dist_code[512]; /* distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */ static uch length_code[MAX_MATCH-MIN_MATCH+1]; /* length code for each normalized match length (0 == MIN_MATCH) */ static int base_length[LENGTH_CODES]; /* First normalized length for each code (0 = MIN_MATCH) */ static int base_dist[D_CODES]; /* First normalized distance for each code (0 = distance of 1) */ struct static_tree_desc_s { … }; static static_tree_desc static_l_desc = …; static static_tree_desc static_d_desc = …; static static_tree_desc static_bl_desc = …; /* =========================================================================== * Local (static) routines in this file. */ static void tr_static_init (void); static void init_block (deflate_state *s); static void pqdownheap (deflate_state *s, ct_data *tree, int k); static void gen_bitlen (deflate_state *s, tree_desc *desc); static void gen_codes (ct_data *tree, int max_code, ush *bl_count); static void build_tree (deflate_state *s, tree_desc *desc); static void scan_tree (deflate_state *s, ct_data *tree, int max_code); static void send_tree (deflate_state *s, ct_data *tree, int max_code); static int build_bl_tree (deflate_state *s); static void send_all_trees (deflate_state *s, int lcodes, int dcodes, int blcodes); static void compress_block (deflate_state *s, ct_data *ltree, ct_data *dtree); static void set_data_type (deflate_state *s); static void bi_flush (deflate_state *s); static void copy_block (deflate_state *s, char *buf, unsigned len, int header); #ifndef DEBUG_ZLIB #define send_code(s, c, tree) … /* Send a code of the given tree. c and tree must not have side effects */ #else /* DEBUG_ZLIB */ #define send_code … #endif #define d_code(dist) … /* Mapping from a distance to a distance code. dist is the distance - 1 and * must not have side effects. dist_code[256] and dist_code[257] are never * used. */ /* =========================================================================== * Initialize the various 'constant' tables. In a multi-threaded environment, * this function may be called by two threads concurrently, but this is * harmless since both invocations do exactly the same thing. */ static void tr_static_init(void) { … } /* =========================================================================== * Initialize the tree data structures for a new zlib stream. */ void zlib_tr_init( deflate_state *s ) { … } /* =========================================================================== * Initialize a new block. */ static void init_block( deflate_state *s ) { … } #define SMALLEST … /* Index within the heap array of least frequent node in the Huffman tree */ /* =========================================================================== * Remove the smallest element from the heap and recreate the heap with * one less element. Updates heap and heap_len. */ #define pqremove(s, tree, top) … /* =========================================================================== * Compares to subtrees, using the tree depth as tie breaker when * the subtrees have equal frequency. This minimizes the worst case length. */ #define smaller(tree, n, m, depth) … /* =========================================================================== * Restore the heap property by moving down the tree starting at node k, * exchanging a node with the smallest of its two sons if necessary, stopping * when the heap property is re-established (each father smaller than its * two sons). */ static void pqdownheap( deflate_state *s, ct_data *tree, /* the tree to restore */ int k /* node to move down */ ) { … } /* =========================================================================== * Compute the optimal bit lengths for a tree and update the total bit length * for the current block. * IN assertion: the fields freq and dad are set, heap[heap_max] and * above are the tree nodes sorted by increasing frequency. * OUT assertions: the field len is set to the optimal bit length, the * array bl_count contains the frequencies for each bit length. * The length opt_len is updated; static_len is also updated if stree is * not null. */ static void gen_bitlen( deflate_state *s, tree_desc *desc /* the tree descriptor */ ) { … } /* =========================================================================== * Generate the codes for a given tree and bit counts (which need not be * optimal). * IN assertion: the array bl_count contains the bit length statistics for * the given tree and the field len is set for all tree elements. * OUT assertion: the field code is set for all tree elements of non * zero code length. */ static void gen_codes( ct_data *tree, /* the tree to decorate */ int max_code, /* largest code with non zero frequency */ ush *bl_count /* number of codes at each bit length */ ) { … } /* =========================================================================== * Construct one Huffman tree and assigns the code bit strings and lengths. * Update the total bit length for the current block. * IN assertion: the field freq is set for all tree elements. * OUT assertions: the fields len and code are set to the optimal bit length * and corresponding code. The length opt_len is updated; static_len is * also updated if stree is not null. The field max_code is set. */ static void build_tree( deflate_state *s, tree_desc *desc /* the tree descriptor */ ) { … } /* =========================================================================== * Scan a literal or distance tree to determine the frequencies of the codes * in the bit length tree. */ static void scan_tree( deflate_state *s, ct_data *tree, /* the tree to be scanned */ int max_code /* and its largest code of non zero frequency */ ) { … } /* =========================================================================== * Send a literal or distance tree in compressed form, using the codes in * bl_tree. */ static void send_tree( deflate_state *s, ct_data *tree, /* the tree to be scanned */ int max_code /* and its largest code of non zero frequency */ ) { … } /* =========================================================================== * Construct the Huffman tree for the bit lengths and return the index in * bl_order of the last bit length code to send. */ static int build_bl_tree( deflate_state *s ) { … } /* =========================================================================== * Send the header for a block using dynamic Huffman trees: the counts, the * lengths of the bit length codes, the literal tree and the distance tree. * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. */ static void send_all_trees( deflate_state *s, int lcodes, /* number of codes for each tree */ int dcodes, /* number of codes for each tree */ int blcodes /* number of codes for each tree */ ) { … } /* =========================================================================== * Send a stored block */ void zlib_tr_stored_block( deflate_state *s, char *buf, /* input block */ ulg stored_len, /* length of input block */ int eof /* true if this is the last block for a file */ ) { … } /* Send just the `stored block' type code without any length bytes or data. */ void zlib_tr_stored_type_only( deflate_state *s ) { … } /* =========================================================================== * Send one empty static block to give enough lookahead for inflate. * This takes 10 bits, of which 7 may remain in the bit buffer. * The current inflate code requires 9 bits of lookahead. If the * last two codes for the previous block (real code plus EOB) were coded * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode * the last real code. In this case we send two empty static blocks instead * of one. (There are no problems if the previous block is stored or fixed.) * To simplify the code, we assume the worst case of last real code encoded * on one bit only. */ void zlib_tr_align( deflate_state *s ) { … } /* =========================================================================== * Determine the best encoding for the current block: dynamic trees, static * trees or store, and output the encoded block to the zip file. This function * returns the total compressed length for the file so far. */ ulg zlib_tr_flush_block( deflate_state *s, char *buf, /* input block, or NULL if too old */ ulg stored_len, /* length of input block */ int eof /* true if this is the last block for a file */ ) { … } /* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */ int zlib_tr_tally( deflate_state *s, unsigned dist, /* distance of matched string */ unsigned lc /* match length-MIN_MATCH or unmatched char (if dist==0) */ ) { … } /* =========================================================================== * Send the block data compressed using the given Huffman trees */ static void compress_block( deflate_state *s, ct_data *ltree, /* literal tree */ ct_data *dtree /* distance tree */ ) { … } /* =========================================================================== * Set the data type to ASCII or BINARY, using a crude approximation: * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. * IN assertion: the fields freq of dyn_ltree are set and the total of all * frequencies does not exceed 64K (to fit in an int on 16 bit machines). */ static void set_data_type( deflate_state *s ) { … } /* =========================================================================== * Copy a stored block, storing first the length and its * one's complement if requested. */ static void copy_block( deflate_state *s, char *buf, /* the input data */ unsigned len, /* its length */ int header /* true if block header must be written */ ) { … }