// SPDX-License-Identifier: GPL-2.0-or-later /* * lzx_decompress.c - A decompressor for the LZX compression format, which can * be used in "System Compressed" files. This is based on the code from wimlib. * This code only supports a window size (dictionary size) of 32768 bytes, since * this is the only size used in System Compression. * * Copyright (C) 2015 Eric Biggers */ #include "decompress_common.h" #include "lib.h" /* Number of literal byte values */ #define LZX_NUM_CHARS … /* The smallest and largest allowed match lengths */ #define LZX_MIN_MATCH_LEN … #define LZX_MAX_MATCH_LEN … /* Number of distinct match lengths that can be represented */ #define LZX_NUM_LENS … /* Number of match lengths for which no length symbol is required */ #define LZX_NUM_PRIMARY_LENS … #define LZX_NUM_LEN_HEADERS … /* Valid values of the 3-bit block type field */ #define LZX_BLOCKTYPE_VERBATIM … #define LZX_BLOCKTYPE_ALIGNED … #define LZX_BLOCKTYPE_UNCOMPRESSED … /* Number of offset slots for a window size of 32768 */ #define LZX_NUM_OFFSET_SLOTS … /* Number of symbols in the main code for a window size of 32768 */ #define LZX_MAINCODE_NUM_SYMBOLS … /* Number of symbols in the length code */ #define LZX_LENCODE_NUM_SYMBOLS … /* Number of symbols in the precode */ #define LZX_PRECODE_NUM_SYMBOLS … /* Number of bits in which each precode codeword length is represented */ #define LZX_PRECODE_ELEMENT_SIZE … /* Number of low-order bits of each match offset that are entropy-encoded in * aligned offset blocks */ #define LZX_NUM_ALIGNED_OFFSET_BITS … /* Number of symbols in the aligned offset code */ #define LZX_ALIGNEDCODE_NUM_SYMBOLS … /* Mask for the match offset bits that are entropy-encoded in aligned offset * blocks */ #define LZX_ALIGNED_OFFSET_BITMASK … /* Number of bits in which each aligned offset codeword length is represented */ #define LZX_ALIGNEDCODE_ELEMENT_SIZE … /* Maximum lengths (in bits) of the codewords in each Huffman code */ #define LZX_MAX_MAIN_CODEWORD_LEN … #define LZX_MAX_LEN_CODEWORD_LEN … #define LZX_MAX_PRE_CODEWORD_LEN … #define LZX_MAX_ALIGNED_CODEWORD_LEN … /* The default "filesize" value used in pre/post-processing. In the LZX format * used in cabinet files this value must be given to the decompressor, whereas * in the LZX format used in WIM files and system-compressed files this value is * fixed at 12000000. */ #define LZX_DEFAULT_FILESIZE … /* Assumed block size when the encoded block size begins with a 0 bit. */ #define LZX_DEFAULT_BLOCK_SIZE … /* Number of offsets in the recent (or "repeat") offsets queue. */ #define LZX_NUM_RECENT_OFFSETS … /* These values are chosen for fast decompression. */ #define LZX_MAINCODE_TABLEBITS … #define LZX_LENCODE_TABLEBITS … #define LZX_PRECODE_TABLEBITS … #define LZX_ALIGNEDCODE_TABLEBITS … #define LZX_READ_LENS_MAX_OVERRUN … /* Mapping: offset slot => first match offset that uses that offset slot. */ static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = …; /* Mapping: offset slot => how many extra bits must be read and added to the * corresponding offset slot base to decode the match offset. */ static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = …; /* Reusable heap-allocated memory for LZX decompression */ struct lzx_decompressor { … }; static void undo_e8_translation(void *target, s32 input_pos) { … } /* * Undo the 'E8' preprocessing used in LZX. Before compression, the * uncompressed data was preprocessed by changing the targets of suspected x86 * CALL instructions from relative offsets to absolute offsets. After * match/literal decoding, the decompressor must undo the translation. */ static void lzx_postprocess(u8 *data, u32 size) { … } /* Read a Huffman-encoded symbol using the precode. */ static forceinline u32 read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) { … } /* Read a Huffman-encoded symbol using the main code. */ static forceinline u32 read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) { … } /* Read a Huffman-encoded symbol using the length code. */ static forceinline u32 read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) { … } /* Read a Huffman-encoded symbol using the aligned offset code. */ static forceinline u32 read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) { … } /* * Read the precode from the compressed input bitstream, then use it to decode * @num_lens codeword length values. * * @is: The input bitstream. * * @lens: An array that contains the length values from the previous time * the codeword lengths for this Huffman code were read, or all 0's * if this is the first time. This array must have at least * (@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries. * * @num_lens: Number of length values to decode. * * Returns 0 on success, or -1 if the data was invalid. */ static int lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is, u8 *lens, u32 num_lens) { … } /* * Read the header of an LZX block and save the block type and (uncompressed) * size in *block_type_ret and *block_size_ret, respectively. * * If the block is compressed, also update the Huffman decode @tables with the * new Huffman codes. If the block is uncompressed, also update the match * offset @queue with the new match offsets. * * Return 0 on success, or -1 if the data was invalid. */ static int lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is, int *block_type_ret, u32 *block_size_ret, u32 recent_offsets[]) { … } /* Decompress a block of LZX-compressed data. */ static int lzx_decompress_block(const struct lzx_decompressor *d, struct input_bitstream *is, int block_type, u32 block_size, u8 * const out_begin, u8 *out_next, u32 recent_offsets[]) { … } /* * lzx_allocate_decompressor - Allocate an LZX decompressor * * Return the pointer to the decompressor on success, or return NULL and set * errno on failure. */ struct lzx_decompressor *lzx_allocate_decompressor(void) { … } /* * lzx_decompress - Decompress a buffer of LZX-compressed data * * @decompressor: A decompressor allocated with lzx_allocate_decompressor() * @compressed_data: The buffer of data to decompress * @compressed_size: Number of bytes of compressed data * @uncompressed_data: The buffer in which to store the decompressed data * @uncompressed_size: The number of bytes the data decompresses into * * Return 0 on success, or return -1 and set errno on failure. */ int lzx_decompress(struct lzx_decompressor *decompressor, const void *compressed_data, size_t compressed_size, void *uncompressed_data, size_t uncompressed_size) { … } /* * lzx_free_decompressor - Free an LZX decompressor * * @decompressor: A decompressor that was allocated with * lzx_allocate_decompressor(), or NULL. */ void lzx_free_decompressor(struct lzx_decompressor *decompressor) { … }