/* * LZMA2 decoder * * Authors: Lasse Collin <[email protected]> * Igor Pavlov <https://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. */ #include "xz_private.h" #include "xz_lzma2.h" /* * Range decoder initialization eats the first five bytes of each LZMA chunk. */ #define RC_INIT_BYTES … /* * Minimum number of usable input buffer to safely decode one LZMA symbol. * The worst case is that we decode 22 bits using probabilities and 26 * direct bits. This may decode at maximum of 20 bytes of input. However, * lzma_main() does an extra normalization before returning, thus we * need to put 21 here. */ #define LZMA_IN_REQUIRED … /* * Dictionary (history buffer) * * These are always true: * start <= pos <= full <= end * pos <= limit <= end * * In multi-call mode, also these are true: * end == size * size <= size_max * allocated <= size * * Most of these variables are size_t to support single-call mode, * in which the dictionary variables address the actual output * buffer directly. */ struct dictionary { … }; /* Range decoder */ struct rc_dec { … }; /* Probabilities for a length decoder. */ struct lzma_len_dec { … }; struct lzma_dec { … }; struct lzma2_dec { … }; struct xz_dec_lzma2 { … }; /************** * Dictionary * **************/ /* * Reset the dictionary state. When in single-call mode, set up the beginning * of the dictionary to point to the actual output buffer. */ static void dict_reset(struct dictionary *dict, struct xz_buf *b) { … } /* Set dictionary write limit */ static void dict_limit(struct dictionary *dict, size_t out_max) { … } /* Return true if at least one byte can be written into the dictionary. */ static inline bool dict_has_space(const struct dictionary *dict) { … } /* * Get a byte from the dictionary at the given distance. The distance is * assumed to valid, or as a special case, zero when the dictionary is * still empty. This special case is needed for single-call decoding to * avoid writing a '\0' to the end of the destination buffer. */ static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist) { … } /* * Put one byte into the dictionary. It is assumed that there is space for it. */ static inline void dict_put(struct dictionary *dict, uint8_t byte) { … } /* * Repeat given number of bytes from the given distance. If the distance is * invalid, false is returned. On success, true is returned and *len is * updated to indicate how many bytes were left to be repeated. */ static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist) { … } /* Copy uncompressed data as is from input to dictionary and output buffers. */ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, uint32_t *left) { … } #ifdef XZ_DEC_MICROLZMA #define DICT_FLUSH_SUPPORTS_SKIPPING … #else #define DICT_FLUSH_SUPPORTS_SKIPPING … #endif /* * Flush pending data from dictionary to b->out. It is assumed that there is * enough space in b->out. This is guaranteed because caller uses dict_limit() * before decoding data into the dictionary. */ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) { … } /***************** * Range decoder * *****************/ /* Reset the range decoder. */ static void rc_reset(struct rc_dec *rc) { … } /* * Read the first five initial bytes into rc->code if they haven't been * read already. (Yes, the first byte gets completely ignored.) */ static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b) { … } /* Return true if there may not be enough input for the next decoding loop. */ static inline bool rc_limit_exceeded(const struct rc_dec *rc) { … } /* * Return true if it is possible (from point of view of range decoder) that * we have reached the end of the LZMA chunk. */ static inline bool rc_is_finished(const struct rc_dec *rc) { … } /* Read the next input byte if needed. */ static __always_inline void rc_normalize(struct rc_dec *rc) { … } /* * Decode one bit. In some versions, this function has been split in three * functions so that the compiler is supposed to be able to more easily avoid * an extra branch. In this particular version of the LZMA decoder, this * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 * on x86). Using a non-split version results in nicer looking code too. * * NOTE: This must return an int. Do not make it return a bool or the speed * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) */ static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob) { … } /* Decode a bittree starting from the most significant bit. */ static __always_inline uint32_t rc_bittree(struct rc_dec *rc, uint16_t *probs, uint32_t limit) { … } /* Decode a bittree starting from the least significant bit. */ static __always_inline void rc_bittree_reverse(struct rc_dec *rc, uint16_t *probs, uint32_t *dest, uint32_t limit) { … } /* Decode direct bits (fixed fifty-fifty probability) */ static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit) { … } /******** * LZMA * ********/ /* Get pointer to literal coder probability array. */ static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s) { … } /* Decode a literal (one 8-bit byte) */ static void lzma_literal(struct xz_dec_lzma2 *s) { … } /* Decode the length of the match into s->lzma.len. */ static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, uint32_t pos_state) { … } /* Decode a match. The distance will be stored in s->lzma.rep0. */ static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) { … } /* * Decode a repeated match. The distance is one of the four most recently * seen matches. The distance will be stored in s->lzma.rep0. */ static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) { … } /* LZMA decoder core */ static bool lzma_main(struct xz_dec_lzma2 *s) { … } /* * Reset the LZMA decoder and range decoder state. Dictionary is not reset * here, because LZMA state may be reset without resetting the dictionary. */ static void lzma_reset(struct xz_dec_lzma2 *s) { … } /* * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks * from the decoded lp and pb values. On success, the LZMA decoder state is * reset and true is returned. */ static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props) { … } /********* * LZMA2 * *********/ /* * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This * wrapper function takes care of making the LZMA decoder's assumption safe. * * As long as there is plenty of input left to be decoded in the current LZMA * chunk, we decode directly from the caller-supplied input buffer until * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into * s->temp.buf, which (hopefully) gets filled on the next call to this * function. We decode a few bytes from the temporary buffer so that we can * continue decoding from the caller-supplied input buffer again. */ static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) { … } /* * Take care of the LZMA2 control layer, and forward the job of actual LZMA * decoding or copying of uncompressed chunks to other functions. */ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b) { … } XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, uint32_t dict_max) { … } XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) { … } XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) { … } #ifdef XZ_DEC_MICROLZMA /* This is a wrapper struct to have a nice struct name in the public API. */ struct xz_dec_microlzma { … }; enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, struct xz_buf *b) { … } struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, uint32_t dict_size) { … } void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, uint32_t uncomp_size, int uncomp_size_is_exact) { … } void xz_dec_microlzma_end(struct xz_dec_microlzma *s) { … } #endif