linux/lib/zstd/compress/zstd_compress_internal.h

/*
 * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

/* This header contains definitions
 * that shall **only** be used by modules within lib/compress.
 */

#ifndef ZSTD_COMPRESS_H
#define ZSTD_COMPRESS_H

/*-*************************************
*  Dependencies
***************************************/
#include "../common/zstd_internal.h"
#include "zstd_cwksp.h"


/*-*************************************
*  Constants
***************************************/
#define kSearchStrength
#define HASH_READ_SIZE
#define ZSTD_DUBT_UNSORTED_MARK


/*-*************************************
*  Context memory management
***************************************/
ZSTD_compressionStage_e;
ZSTD_cStreamStage;

ZSTD_prefixDict;

ZSTD_localDict;

ZSTD_hufCTables_t;

ZSTD_fseCTables_t;

ZSTD_entropyCTables_t;

/* *********************************************
*  Entropy buffer statistics structs and funcs *
***********************************************/
/* ZSTD_hufCTablesMetadata_t :
 *  Stores Literals Block Type for a super-block in hType, and
 *  huffman tree description in hufDesBuffer.
 *  hufDesSize refers to the size of huffman tree description in bytes.
 *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
ZSTD_hufCTablesMetadata_t;

/* ZSTD_fseCTablesMetadata_t :
 *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
 *  fse tables in fseTablesBuffer.
 *  fseTablesSize refers to the size of fse tables in bytes.
 *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
ZSTD_fseCTablesMetadata_t;

ZSTD_entropyCTablesMetadata_t;

/* ZSTD_buildBlockEntropyStats() :
 *  Builds entropy for the block.
 *  @return : 0 on success or error code */
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
                             const ZSTD_entropyCTables_t* prevEntropy,
                                   ZSTD_entropyCTables_t* nextEntropy,
                             const ZSTD_CCtx_params* cctxParams,
                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
                                   void* workspace, size_t wkspSize);

/* *******************************
*  Compression internals structs *
*********************************/

ZSTD_match_t;

rawSeq;

rawSeqStore_t;

UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore =;

ZSTD_optimal_t;

ZSTD_OptPrice_e;

optState_t;

ZSTD_compressedBlockState_t;

ZSTD_window_t;

#define ZSTD_WINDOW_START_INDEX

ZSTD_matchState_t;

#define ZSTD_ROW_HASH_CACHE_SIZE

struct ZSTD_matchState_t {};

ZSTD_blockState_t;

ldmEntry_t;

ldmMatchCandidate_t;

#define LDM_BATCH_SIZE

ldmState_t;

ldmParams_t;

SeqCollector;

struct ZSTD_CCtx_params_s {};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

#define COMPRESS_SEQUENCES_WORKSPACE_SIZE
#define ENTROPY_WORKSPACE_SIZE

/*
 * Indicates whether this compression proceeds directly from user-provided
 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
 * whether the context needs to buffer the input/output (ZSTDb_buffered).
 */
ZSTD_buffered_policy_e;

/*
 * Struct that contains all elements of block splitter that should be allocated
 * in a wksp.
 */
#define ZSTD_MAX_NB_BLOCK_SPLITS
ZSTD_blockSplitCtx;

struct ZSTD_CCtx_s {};

ZSTD_dictTableLoadMethod_e;

ZSTD_dictMode_e;

ZSTD_cParamMode_e;

ZSTD_blockCompressor;
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);


MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
{}

/* ZSTD_MLcode() :
 * note : mlBase = matchLength - MINMATCH;
 *        because it's the format it's stored in seqStore->sequences */
MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
{}

/* ZSTD_cParam_withinBounds:
 * @return 1 if value is within cParam bounds,
 * 0 otherwise */
MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
{}

/* ZSTD_noCompressBlock() :
 * Writes uncompressed block to dst buffer from given src.
 * Returns the size of the block */
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
{}

MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
{}


/* ZSTD_minGain() :
 * minimum compression required
 * to generate a compress block or a compressed literals section.
 * note : use same formula for both situations */
MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
{}

MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
{}

/*! ZSTD_safecopyLiterals() :
 *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
 *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
 *  large copies.
 */
static void
ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
{}

#define ZSTD_REP_MOVE
#define STORE_REPCODE_1
#define STORE_REPCODE_2
#define STORE_REPCODE_3
#define STORE_REPCODE(r)
#define STORE_OFFSET(o)
#define STORED_IS_OFFSET(o)
#define STORED_IS_REPCODE(o)
#define STORED_OFFSET(o)
#define STORED_REPCODE(o)
#define STORED_TO_OFFBASE(o)
#define OFFBASE_TO_STORED(o)

/*! ZSTD_storeSeq() :
 *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
 *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
 *  @matchLength : must be >= MINMATCH
 *  Allowed to overread literals up to litLimit.
*/
HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t* seqStorePtr,
              size_t litLength, const BYTE* literals, const BYTE* litLimit,
              U32 offBase_minus1,
              size_t matchLength)
{}

/* ZSTD_updateRep() :
 * updates in-place @rep (array of repeat offsets)
 * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
 */
MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
{}

repcodes_t;

MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
{}


/*-*************************************
*  Match length counter
***************************************/
static unsigned ZSTD_NbCommonBytes (size_t val)
{}


MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
{}

/* ZSTD_count_2segments() :
 *  can count match length with `ip` & `match` in 2 different segments.
 *  convention : on reaching mEnd, match count continue starting from iStart
 */
MEM_STATIC size_t
ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
{}


/*-*************************************
 *  Hashes
 ***************************************/
static const U32 prime3bytes =;
static U32    ZSTD_hash3(U32 u, U32 h) {}
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) {} /* only in zstd_opt.h */

static const U32 prime4bytes =;
static U32    ZSTD_hash4(U32 u, U32 h) {}
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) {}

static const U64 prime5bytes =;
static size_t ZSTD_hash5(U64 u, U32 h) {}
static size_t ZSTD_hash5Ptr(const void* p, U32 h) {}

static const U64 prime6bytes =;
static size_t ZSTD_hash6(U64 u, U32 h) {}
static size_t ZSTD_hash6Ptr(const void* p, U32 h) {}

static const U64 prime7bytes =;
static size_t ZSTD_hash7(U64 u, U32 h) {}
static size_t ZSTD_hash7Ptr(const void* p, U32 h) {}

static const U64 prime8bytes =;
static size_t ZSTD_hash8(U64 u, U32 h) {}
static size_t ZSTD_hash8Ptr(const void* p, U32 h) {}

MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
{}

/* ZSTD_ipow() :
 * Return base^exponent.
 */
static U64 ZSTD_ipow(U64 base, U64 exponent)
{}

#define ZSTD_ROLL_HASH_CHAR_OFFSET

/* ZSTD_rollingHash_append() :
 * Add the buffer to the hash value.
 */
static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
{}

/* ZSTD_rollingHash_compute() :
 * Compute the rolling hash value of the buffer.
 */
MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
{}

/* ZSTD_rollingHash_primePower() :
 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
 * over a window of length bytes.
 */
MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
{}

/* ZSTD_rollingHash_rotate() :
 * Rotate the rolling hash by one byte.
 */
MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
{}

/*-*************************************
*  Round buffer management
***************************************/
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
#endif
/* Max current allowed */
#define ZSTD_CURRENT_MAX
/* Maximum chunk size before overflow correction needs to be called again */
#define ZSTD_CHUNKSIZE_MAX

/*
 * ZSTD_window_clear():
 * Clears the window containing the history by simply setting it to empty.
 */
MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
{}

MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
{}

/*
 * ZSTD_window_hasExtDict():
 * Returns non-zero if the window has a non-empty extDict.
 */
MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
{}

/*
 * ZSTD_matchState_dictMode():
 * Inspects the provided matchState and figures out what dictMode should be
 * passed to the compressor.
 */
MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
{}

/* Defining this macro to non-zero tells zstd to run the overflow correction
 * code much more frequently. This is very inefficient, and should only be
 * used for tests and fuzzers.
 */
#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
#define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
#  else
#define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
#  endif
#endif

/*
 * ZSTD_window_canOverflowCorrect():
 * Returns non-zero if the indices are large enough for overflow correction
 * to work correctly without impacting compression ratio.
 */
MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
                                              U32 cycleLog,
                                              U32 maxDist,
                                              U32 loadedDictEnd,
                                              void const* src)
{}

/*
 * ZSTD_window_needOverflowCorrection():
 * Returns non-zero if the indices are getting too large and need overflow
 * protection.
 */
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
                                                  U32 cycleLog,
                                                  U32 maxDist,
                                                  U32 loadedDictEnd,
                                                  void const* src,
                                                  void const* srcEnd)
{}

/*
 * ZSTD_window_correctOverflow():
 * Reduces the indices to protect from index overflow.
 * Returns the correction made to the indices, which must be applied to every
 * stored index.
 *
 * The least significant cycleLog bits of the indices must remain the same,
 * which may be 0. Every index up to maxDist in the past must be valid.
 */
MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
                                           U32 maxDist, void const* src)
{}

/*
 * ZSTD_window_enforceMaxDist():
 * Updates lowLimit so that:
 *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
 *
 * It ensures index is valid as long as index >= lowLimit.
 * This must be called before a block compression call.
 *
 * loadedDictEnd is only defined if a dictionary is in use for current compression.
 * As the name implies, loadedDictEnd represents the index at end of dictionary.
 * The value lies within context's referential, it can be directly compared to blockEndIdx.
 *
 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
 * This is because dictionaries are allowed to be referenced fully
 * as long as the last byte of the dictionary is in the window.
 * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
 *
 * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
 * In dictMatchState mode, lowLimit and dictLimit are the same,
 * and the dictionary is below them.
 * forceWindow and dictMatchState are therefore incompatible.
 */
MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
                     const void* blockEnd,
                           U32   maxDist,
                           U32*  loadedDictEndPtr,
                     const ZSTD_matchState_t** dictMatchStatePtr)
{}

/* Similar to ZSTD_window_enforceMaxDist(),
 * but only invalidates dictionary
 * when input progresses beyond window size.
 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
 *              loadedDictEnd uses same referential as window->base
 *              maxDist is the window size */
MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t* window,
                       const void* blockEnd,
                             U32   maxDist,
                             U32*  loadedDictEndPtr,
                       const ZSTD_matchState_t** dictMatchStatePtr)
{}

MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {}

/*
 * ZSTD_window_update():
 * Updates the window by appending [src, src + srcSize) to the window.
 * If it is not contiguous, the current prefix becomes the extDict, and we
 * forget about the extDict. Handles overlap of the prefix and extDict.
 * Returns non-zero if the segment is contiguous.
 */
MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
                                  void const* src, size_t srcSize,
                                  int forceNonContiguous)
{}

/*
 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
 */
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
{}

/*
 * Returns the lowest allowed match index in the prefix.
 */
MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
{}



/* debug functions */
#if (DEBUGLEVEL>=2)

MEM_STATIC double ZSTD_fWeight(U32 rawStat)
{
    U32 const fp_accuracy = 8;
    U32 const fp_multiplier = (1 << fp_accuracy);
    U32 const newStat = rawStat + 1;
    U32 const hb = ZSTD_highbit32(newStat);
    U32 const BWeight = hb * fp_multiplier;
    U32 const FWeight = (newStat << fp_accuracy) >> hb;
    U32 const weight = BWeight + FWeight;
    assert(hb + fp_accuracy < 31);
    return (double)weight / fp_multiplier;
}

/* display a table content,
 * listing each element, its frequency, and its predicted bit cost */
MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
{
    unsigned u, sum;
    for (u=0, sum=0; u<=max; u++) sum += table[u];
    DEBUGLOG(2, "total nb elts: %u", sum);
    for (u=0; u<=max; u++) {
        DEBUGLOG(2, "%2u: %5u  (%.2f)",
                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
    }
}

#endif



/* ===============================================================
 * Shared internal declarations
 * These prototypes may be called from sources not in lib/compress
 * =============================================================== */

/* ZSTD_loadCEntropy() :
 * dict : must point at beginning of a valid zstd dictionary.
 * return : size of dictionary header (size of magic number + dict ID + entropy tables)
 * assumptions : magic number supposed already checked
 *               and dictSize >= 8 */
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
                         const void* const dict, size_t dictSize);

void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);

/* ==============================================================
 * Private declarations
 * These prototypes shall only be called from within lib/compress
 * ============================================================== */

/* ZSTD_getCParamsFromCCtxParams() :
 * cParams are built depending on compressionLevel, src size hints,
 * LDM and manually set compression parameters.
 * Note: srcSizeHint == 0 means 0!
 */
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);

/*! ZSTD_initCStream_internal() :
 *  Private use only. Init streaming operation.
 *  expects params to be valid.
 *  must receive dict, or cdict, or none, but not both.
 *  @return : 0, or an error code */
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                     const ZSTD_CDict* cdict,
                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);

void ZSTD_resetSeqStore(seqStore_t* ssPtr);

/*! ZSTD_getCParamsFromCDict() :
 *  as the name implies */
ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);

/* ZSTD_compressBegin_advanced_internal() :
 * Private use only. To be called from zstdmt_compress.c. */
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
                                    const void* dict, size_t dictSize,
                                    ZSTD_dictContentType_e dictContentType,
                                    ZSTD_dictTableLoadMethod_e dtlm,
                                    const ZSTD_CDict* cdict,
                                    const ZSTD_CCtx_params* params,
                                    unsigned long long pledgedSrcSize);

/* ZSTD_compress_advanced_internal() :
 * Private use only. To be called from zstdmt_compress.c. */
size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const void* dict,size_t dictSize,
                                 const ZSTD_CCtx_params* params);


/* ZSTD_writeLastEmptyBlock() :
 * output an empty Block with end-of-frame mark to complete a frame
 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
 *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
 */
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);


/* ZSTD_referenceExternalSequences() :
 * Must be called before starting a compression operation.
 * seqs must parse a prefix of the source.
 * This cannot be used when long range matching is enabled.
 * Zstd will use these sequences, and pass the literals to a secondary block
 * compressor.
 * @return : An error code on failure.
 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
 * access and data corruption.
 */
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);

/* ZSTD_cycleLog() :
 *  condition for correct operation : hashLog > 1 */
U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);

/* ZSTD_CCtx_trace() :
 *  Trace the end of a compression call.
 */
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);

#endif /* ZSTD_COMPRESS_H */