/* inffast_chunk.c -- fast decoding * Copyright (C) 1995-2017 Mark Adler * Copyright 2023 The Chromium Authors * For conditions of distribution and use, see copyright notice in zlib.h */ #include "zutil.h" #include "inftrees.h" #include "inflate.h" #include "contrib/optimizations/inffast_chunk.h" #include "contrib/optimizations/chunkcopy.h" #ifdef ASMINF # pragma message("Assembler code may have bugs -- use at your own risk") #else /* Decode literal, length, and distance codes and write out the resulting literal and match bytes until either not enough input or output is available, an end-of-block is encountered, or a data error is encountered. When large enough input and output buffers are supplied to inflate(), for example, a 16K input buffer and a 64K output buffer, more than 95% of the inflate() execution time is spent in this routine. Entry assumptions: state->mode == LEN strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes + 7 bytes) strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes + 2 bytes) start >= strm->avail_out state->bits < 8 (state->hold >> state->bits) == 0 strm->next_out[0..strm->avail_out] does not overlap with strm->next_in[0..strm->avail_in] strm->state->window is allocated with an additional CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize On return, state->mode is one of: LEN -- ran out of enough output space or enough available input TYPE -- reached end of block code, inflate() to interpret next block BAD -- error in block data Notes: INFLATE_FAST_MIN_INPUT: 6 or 8 bytes + 7 bytes - The maximum input bits used by a length/distance pair is 15 bits for the length code, 5 bits for the length extra, 15 bits for the distance code, and 13 bits for the distance extra. This totals 48 bits, or six bytes. Therefore if strm->avail_in >= 6, then there is enough input to avoid checking for available input while decoding. - The wide input data reading option reads 64 input bits at a time. Thus, if strm->avail_in >= 8, then there is enough input to avoid checking for available input while decoding. Reading consumes the input with: hold |= read64le(in) << bits; in += 6; bits += 48; reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded up, worst case) and 6 bytes is enough to decode as noted above. At exit, hold &= (1U << bits) - 1 drops excess input to keep the invariant: (state->hold >> state->bits) == 0 INFLATE_FAST_MIN_OUTPUT: 258 bytes + 2 bytes for literals = 260 bytes - The maximum bytes that a single length/distance pair can output is 258 bytes, which is the maximum length that can be coded. inflate_fast() requires strm->avail_out >= 260 for each loop to avoid checking for available output space while decoding. */ void ZLIB_INTERNAL inflate_fast_chunk_(z_streamp strm, unsigned start) { … } /* inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - Using bit fields for code structure - Different op definition to avoid & for extra bits (do & for table bits) - Three separate decoding do-loops for direct, window, and wnext == 0 - Special case for distance > 1 copies to do overlapped load and store copy - Explicit branch predictions (based on measured branch probabilities) - Deferring match copy and interspersed it with decoding subsequent codes - Swapping literal/length else - Swapping window/direct else - Larger unrolled copy loops (three is about right) - Moving len -= 3 statement into middle of loop */ #endif /* !ASMINF */