xz_dec_lzma2.c | Explore in Territory

/*
 * LZMA2 decoder
 *
 * Authors: Lasse Collin <[email protected]>
 *          Igor Pavlov <https://7-zip.org/>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */

#include "xz_private.h"
#include "xz_lzma2.h"

/*
 * Range decoder initialization eats the first five bytes of each LZMA chunk.
 */
#define RC_INIT_BYTES …

/*
 * Minimum number of usable input buffer to safely decode one LZMA symbol.
 * The worst case is that we decode 22 bits using probabilities and 26
 * direct bits. This may decode at maximum of 20 bytes of input. However,
 * lzma_main() does an extra normalization before returning, thus we
 * need to put 21 here.
 */
#define LZMA_IN_REQUIRED …

/*
 * Dictionary (history buffer)
 *
 * These are always true:
 *    start <= pos <= full <= end
 *    pos <= limit <= end
 *
 * In multi-call mode, also these are true:
 *    end == size
 *    size <= size_max
 *    allocated <= size
 *
 * Most of these variables are size_t to support single-call mode,
 * in which the dictionary variables address the actual output
 * buffer directly.
 */
struct dictionary { … };

/* Range decoder */
struct rc_dec { … };

/* Probabilities for a length decoder. */
struct lzma_len_dec { … };

struct lzma_dec { … };

struct lzma2_dec { … };

struct xz_dec_lzma2 { … };

/**************
 * Dictionary *
 **************/

/*
 * Reset the dictionary state. When in single-call mode, set up the beginning
 * of the dictionary to point to the actual output buffer.
 */
static void dict_reset(struct dictionary *dict, struct xz_buf *b)
{ … }

/* Set dictionary write limit */
static void dict_limit(struct dictionary *dict, size_t out_max)
{ … }

/* Return true if at least one byte can be written into the dictionary. */
static inline bool dict_has_space(const struct dictionary *dict)
{ … }

/*
 * Get a byte from the dictionary at the given distance. The distance is
 * assumed to valid, or as a special case, zero when the dictionary is
 * still empty. This special case is needed for single-call decoding to
 * avoid writing a '\0' to the end of the destination buffer.
 */
static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
{ … }

/*
 * Put one byte into the dictionary. It is assumed that there is space for it.
 */
static inline void dict_put(struct dictionary *dict, uint8_t byte)
{ … }

/*
 * Repeat given number of bytes from the given distance. If the distance is
 * invalid, false is returned. On success, true is returned and *len is
 * updated to indicate how many bytes were left to be repeated.
 */
static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
{ … }

/* Copy uncompressed data as is from input to dictionary and output buffers. */
static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
			      uint32_t *left)
{ … }

#ifdef XZ_DEC_MICROLZMA
#define DICT_FLUSH_SUPPORTS_SKIPPING …
#else
#define DICT_FLUSH_SUPPORTS_SKIPPING …
#endif

/*
 * Flush pending data from dictionary to b->out. It is assumed that there is
 * enough space in b->out. This is guaranteed because caller uses dict_limit()
 * before decoding data into the dictionary.
 */
static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
{ … }

/*****************
 * Range decoder *
 *****************/

/* Reset the range decoder. */
static void rc_reset(struct rc_dec *rc)
{ … }

/*
 * Read the first five initial bytes into rc->code if they haven't been
 * read already. (Yes, the first byte gets completely ignored.)
 */
static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)
{ … }

/* Return true if there may not be enough input for the next decoding loop. */
static inline bool rc_limit_exceeded(const struct rc_dec *rc)
{ … }

/*
 * Return true if it is possible (from point of view of range decoder) that
 * we have reached the end of the LZMA chunk.
 */
static inline bool rc_is_finished(const struct rc_dec *rc)
{ … }

/* Read the next input byte if needed. */
static __always_inline void rc_normalize(struct rc_dec *rc)
{ … }

/*
 * Decode one bit. In some versions, this function has been split in three
 * functions so that the compiler is supposed to be able to more easily avoid
 * an extra branch. In this particular version of the LZMA decoder, this
 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
 * on x86). Using a non-split version results in nicer looking code too.
 *
 * NOTE: This must return an int. Do not make it return a bool or the speed
 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
 */
static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
{ … }

/* Decode a bittree starting from the most significant bit. */
static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
					   uint16_t *probs, uint32_t limit)
{ … }

/* Decode a bittree starting from the least significant bit. */
static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
					       uint16_t *probs,
					       uint32_t *dest, uint32_t limit)
{ … }

/* Decode direct bits (fixed fifty-fifty probability) */
static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
{ … }

/********
 * LZMA *
 ********/

/* Get pointer to literal coder probability array. */
static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
{ … }

/* Decode a literal (one 8-bit byte) */
static void lzma_literal(struct xz_dec_lzma2 *s)
{ … }

/* Decode the length of the match into s->lzma.len. */
static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
		     uint32_t pos_state)
{ … }

/* Decode a match. The distance will be stored in s->lzma.rep0. */
static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
{ … }

/*
 * Decode a repeated match. The distance is one of the four most recently
 * seen matches. The distance will be stored in s->lzma.rep0.
 */
static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
{ … }

/* LZMA decoder core */
static bool lzma_main(struct xz_dec_lzma2 *s)
{ … }

/*
 * Reset the LZMA decoder and range decoder state. Dictionary is not reset
 * here, because LZMA state may be reset without resetting the dictionary.
 */
static void lzma_reset(struct xz_dec_lzma2 *s)
{ … }

/*
 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
 * from the decoded lp and pb values. On success, the LZMA decoder state is
 * reset and true is returned.
 */
static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
{ … }

/*********
 * LZMA2 *
 *********/

/*
 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
 * wrapper function takes care of making the LZMA decoder's assumption safe.
 *
 * As long as there is plenty of input left to be decoded in the current LZMA
 * chunk, we decode directly from the caller-supplied input buffer until
 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
 * s->temp.buf, which (hopefully) gets filled on the next call to this
 * function. We decode a few bytes from the temporary buffer so that we can
 * continue decoding from the caller-supplied input buffer again.
 */
static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
{ … }

/*
 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
 * decoding or copying of uncompressed chunks to other functions.
 */
XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
				       struct xz_buf *b)
{ … }

XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
						   uint32_t dict_max)
{ … }

XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
{ … }

XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
{ … }

#ifdef XZ_DEC_MICROLZMA
/* This is a wrapper struct to have a nice struct name in the public API. */
struct xz_dec_microlzma { … };

enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
				 struct xz_buf *b)
{ … }

struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
						uint32_t dict_size)
{ … }

void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
			    uint32_t uncomp_size, int uncomp_size_is_exact)
{ … }

void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
{ … }
#endif
linux/lib/xz/xz_dec_lzma2.c