chromium/third_party/zlib/contrib/optimizations/inffast_chunk.c

/* inffast_chunk.c -- fast decoding
 * Copyright (C) 1995-2017 Mark Adler
 * Copyright 2023 The Chromium Authors
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
#include "contrib/optimizations/inffast_chunk.h"
#include "contrib/optimizations/chunkcopy.h"

#ifdef ASMINF
#  pragma message("Assembler code may have bugs -- use at your own risk")
#else

/*
   Decode literal, length, and distance codes and write out the resulting
   literal and match bytes until either not enough input or output is
   available, an end-of-block is encountered, or a data error is encountered.
   When large enough input and output buffers are supplied to inflate(), for
   example, a 16K input buffer and a 64K output buffer, more than 95% of the
   inflate() execution time is spent in this routine.

   Entry assumptions:

        state->mode == LEN
        strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes + 7 bytes)
        strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes + 2 bytes)
        start >= strm->avail_out
        state->bits < 8
        (state->hold >> state->bits) == 0
        strm->next_out[0..strm->avail_out] does not overlap with
              strm->next_in[0..strm->avail_in]
        strm->state->window is allocated with an additional
              CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize

   On return, state->mode is one of:

        LEN -- ran out of enough output space or enough available input
        TYPE -- reached end of block code, inflate() to interpret next block
        BAD -- error in block data

   Notes:

    INFLATE_FAST_MIN_INPUT: 6 or 8 bytes + 7 bytes

    - The maximum input bits used by a length/distance pair is 15 bits for the
      length code, 5 bits for the length extra, 15 bits for the distance code,
      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
      Therefore if strm->avail_in >= 6, then there is enough input to avoid
      checking for available input while decoding.

    - The wide input data reading option reads 64 input bits at a time. Thus,
      if strm->avail_in >= 8, then there is enough input to avoid checking for
      available input while decoding. Reading consumes the input with:

          hold |= read64le(in) << bits;
          in += 6;
          bits += 48;

      reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded
      up, worst case) and 6 bytes is enough to decode as noted above. At exit,
      hold &= (1U << bits) - 1 drops excess input to keep the invariant:

          (state->hold >> state->bits) == 0

    INFLATE_FAST_MIN_OUTPUT: 258 bytes + 2 bytes for literals = 260 bytes

    - The maximum bytes that a single length/distance pair can output is 258
      bytes, which is the maximum length that can be coded.  inflate_fast()
      requires strm->avail_out >= 260 for each loop to avoid checking for
      available output space while decoding.
 */
void ZLIB_INTERNAL inflate_fast_chunk_(z_streamp strm, unsigned start) {}

/*
   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
   - Using bit fields for code structure
   - Different op definition to avoid & for extra bits (do & for table bits)
   - Three separate decoding do-loops for direct, window, and wnext == 0
   - Special case for distance > 1 copies to do overlapped load and store copy
   - Explicit branch predictions (based on measured branch probabilities)
   - Deferring match copy and interspersed it with decoding subsequent codes
   - Swapping literal/length else
   - Swapping window/direct else
   - Larger unrolled copy loops (three is about right)
   - Moving len -= 3 statement into middle of loop
 */

#endif /* !ASMINF */