/* * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ /* This header contains definitions * that shall **only** be used by modules within lib/compress. */ #ifndef ZSTD_COMPRESS_H #define ZSTD_COMPRESS_H /*-************************************* * Dependencies ***************************************/ #include "../common/zstd_internal.h" #include "zstd_cwksp.h" /*-************************************* * Constants ***************************************/ #define kSearchStrength … #define HASH_READ_SIZE … #define ZSTD_DUBT_UNSORTED_MARK … /*-************************************* * Context memory management ***************************************/ ZSTD_compressionStage_e; ZSTD_cStreamStage; ZSTD_prefixDict; ZSTD_localDict; ZSTD_hufCTables_t; ZSTD_fseCTables_t; ZSTD_entropyCTables_t; /* ********************************************* * Entropy buffer statistics structs and funcs * ***********************************************/ /* ZSTD_hufCTablesMetadata_t : * Stores Literals Block Type for a super-block in hType, and * huffman tree description in hufDesBuffer. * hufDesSize refers to the size of huffman tree description in bytes. * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ ZSTD_hufCTablesMetadata_t; /* ZSTD_fseCTablesMetadata_t : * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and * fse tables in fseTablesBuffer. * fseTablesSize refers to the size of fse tables in bytes. * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ ZSTD_fseCTablesMetadata_t; ZSTD_entropyCTablesMetadata_t; /* ZSTD_buildBlockEntropyStats() : * Builds entropy for the block. * @return : 0 on success or error code */ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, ZSTD_entropyCTablesMetadata_t* entropyMetadata, void* workspace, size_t wkspSize); /* ******************************* * Compression internals structs * *********************************/ ZSTD_match_t; rawSeq; rawSeqStore_t; UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = …; ZSTD_optimal_t; ZSTD_OptPrice_e; optState_t; ZSTD_compressedBlockState_t; ZSTD_window_t; #define ZSTD_WINDOW_START_INDEX … ZSTD_matchState_t; #define ZSTD_ROW_HASH_CACHE_SIZE … struct ZSTD_matchState_t { … }; ZSTD_blockState_t; ldmEntry_t; ldmMatchCandidate_t; #define LDM_BATCH_SIZE … ldmState_t; ldmParams_t; SeqCollector; struct ZSTD_CCtx_params_s { … }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE … #define ENTROPY_WORKSPACE_SIZE … /* * Indicates whether this compression proceeds directly from user-provided * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or * whether the context needs to buffer the input/output (ZSTDb_buffered). */ ZSTD_buffered_policy_e; /* * Struct that contains all elements of block splitter that should be allocated * in a wksp. */ #define ZSTD_MAX_NB_BLOCK_SPLITS … ZSTD_blockSplitCtx; struct ZSTD_CCtx_s { … }; ZSTD_dictTableLoadMethod_e; ZSTD_dictMode_e; ZSTD_cParamMode_e; ZSTD_blockCompressor; ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) { … } /* ZSTD_MLcode() : * note : mlBase = matchLength - MINMATCH; * because it's the format it's stored in seqStore->sequences */ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) { … } /* ZSTD_cParam_withinBounds: * @return 1 if value is within cParam bounds, * 0 otherwise */ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) { … } /* ZSTD_noCompressBlock() : * Writes uncompressed block to dst buffer from given src. * Returns the size of the block */ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) { … } MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) { … } /* ZSTD_minGain() : * minimum compression required * to generate a compress block or a compressed literals section. * note : use same formula for both situations */ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) { … } MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) { … } /*! ZSTD_safecopyLiterals() : * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single * large copies. */ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { … } #define ZSTD_REP_MOVE … #define STORE_REPCODE_1 … #define STORE_REPCODE_2 … #define STORE_REPCODE_3 … #define STORE_REPCODE(r) … #define STORE_OFFSET(o) … #define STORED_IS_OFFSET(o) … #define STORED_IS_REPCODE(o) … #define STORED_OFFSET(o) … #define STORED_REPCODE(o) … #define STORED_TO_OFFBASE(o) … #define OFFBASE_TO_STORED(o) … /*! ZSTD_storeSeq() : * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). * @matchLength : must be >= MINMATCH * Allowed to overread literals up to litLimit. */ HINT_INLINE UNUSED_ATTR void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offBase_minus1, size_t matchLength) { … } /* ZSTD_updateRep() : * updates in-place @rep (array of repeat offsets) * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() */ MEM_STATIC void ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) { … } repcodes_t; MEM_STATIC repcodes_t ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) { … } /*-************************************* * Match length counter ***************************************/ static unsigned ZSTD_NbCommonBytes (size_t val) { … } MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) { … } /* ZSTD_count_2segments() : * can count match length with `ip` & `match` in 2 different segments. * convention : on reaching mEnd, match count continue starting from iStart */ MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) { … } /*-************************************* * Hashes ***************************************/ static const U32 prime3bytes = …; static U32 ZSTD_hash3(U32 u, U32 h) { … } MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { … } /* only in zstd_opt.h */ static const U32 prime4bytes = …; static U32 ZSTD_hash4(U32 u, U32 h) { … } static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { … } static const U64 prime5bytes = …; static size_t ZSTD_hash5(U64 u, U32 h) { … } static size_t ZSTD_hash5Ptr(const void* p, U32 h) { … } static const U64 prime6bytes = …; static size_t ZSTD_hash6(U64 u, U32 h) { … } static size_t ZSTD_hash6Ptr(const void* p, U32 h) { … } static const U64 prime7bytes = …; static size_t ZSTD_hash7(U64 u, U32 h) { … } static size_t ZSTD_hash7Ptr(const void* p, U32 h) { … } static const U64 prime8bytes = …; static size_t ZSTD_hash8(U64 u, U32 h) { … } static size_t ZSTD_hash8Ptr(const void* p, U32 h) { … } MEM_STATIC FORCE_INLINE_ATTR size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) { … } /* ZSTD_ipow() : * Return base^exponent. */ static U64 ZSTD_ipow(U64 base, U64 exponent) { … } #define ZSTD_ROLL_HASH_CHAR_OFFSET … /* ZSTD_rollingHash_append() : * Add the buffer to the hash value. */ static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) { … } /* ZSTD_rollingHash_compute() : * Compute the rolling hash value of the buffer. */ MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) { … } /* ZSTD_rollingHash_primePower() : * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash * over a window of length bytes. */ MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) { … } /* ZSTD_rollingHash_rotate() : * Rotate the rolling hash by one byte. */ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) { … } /*-************************************* * Round buffer management ***************************************/ #if (ZSTD_WINDOWLOG_MAX_64 > 31) # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" #endif /* Max current allowed */ #define ZSTD_CURRENT_MAX … /* Maximum chunk size before overflow correction needs to be called again */ #define ZSTD_CHUNKSIZE_MAX … /* * ZSTD_window_clear(): * Clears the window containing the history by simply setting it to empty. */ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) { … } MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) { … } /* * ZSTD_window_hasExtDict(): * Returns non-zero if the window has a non-empty extDict. */ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) { … } /* * ZSTD_matchState_dictMode(): * Inspects the provided matchState and figures out what dictMode should be * passed to the compressor. */ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) { … } /* Defining this macro to non-zero tells zstd to run the overflow correction * code much more frequently. This is very inefficient, and should only be * used for tests and fuzzers. */ #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION #define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY … # else #define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY … # endif #endif /* * ZSTD_window_canOverflowCorrect(): * Returns non-zero if the indices are large enough for overflow correction * to work correctly without impacting compression ratio. */ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, U32 cycleLog, U32 maxDist, U32 loadedDictEnd, void const* src) { … } /* * ZSTD_window_needOverflowCorrection(): * Returns non-zero if the indices are getting too large and need overflow * protection. */ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, U32 cycleLog, U32 maxDist, U32 loadedDictEnd, void const* src, void const* srcEnd) { … } /* * ZSTD_window_correctOverflow(): * Reduces the indices to protect from index overflow. * Returns the correction made to the indices, which must be applied to every * stored index. * * The least significant cycleLog bits of the indices must remain the same, * which may be 0. Every index up to maxDist in the past must be valid. */ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, U32 maxDist, void const* src) { … } /* * ZSTD_window_enforceMaxDist(): * Updates lowLimit so that: * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * * It ensures index is valid as long as index >= lowLimit. * This must be called before a block compression call. * * loadedDictEnd is only defined if a dictionary is in use for current compression. * As the name implies, loadedDictEnd represents the index at end of dictionary. * The value lies within context's referential, it can be directly compared to blockEndIdx. * * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. * This is because dictionaries are allowed to be referenced fully * as long as the last byte of the dictionary is in the window. * Once input has progressed beyond window size, dictionary cannot be referenced anymore. * * In normal dict mode, the dictionary lies between lowLimit and dictLimit. * In dictMatchState mode, lowLimit and dictLimit are the same, * and the dictionary is below them. * forceWindow and dictMatchState are therefore incompatible. */ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, const void* blockEnd, U32 maxDist, U32* loadedDictEndPtr, const ZSTD_matchState_t** dictMatchStatePtr) { … } /* Similar to ZSTD_window_enforceMaxDist(), * but only invalidates dictionary * when input progresses beyond window size. * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) * loadedDictEnd uses same referential as window->base * maxDist is the window size */ MEM_STATIC void ZSTD_checkDictValidity(const ZSTD_window_t* window, const void* blockEnd, U32 maxDist, U32* loadedDictEndPtr, const ZSTD_matchState_t** dictMatchStatePtr) { … } MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { … } /* * ZSTD_window_update(): * Updates the window by appending [src, src + srcSize) to the window. * If it is not contiguous, the current prefix becomes the extDict, and we * forget about the extDict. Handles overlap of the prefix and extDict. * Returns non-zero if the segment is contiguous. */ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, void const* src, size_t srcSize, int forceNonContiguous) { … } /* * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. */ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) { … } /* * Returns the lowest allowed match index in the prefix. */ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) { … } /* debug functions */ #if (DEBUGLEVEL>=2) MEM_STATIC double ZSTD_fWeight(U32 rawStat) { U32 const fp_accuracy = 8; U32 const fp_multiplier = (1 << fp_accuracy); U32 const newStat = rawStat + 1; U32 const hb = ZSTD_highbit32(newStat); U32 const BWeight = hb * fp_multiplier; U32 const FWeight = (newStat << fp_accuracy) >> hb; U32 const weight = BWeight + FWeight; assert(hb + fp_accuracy < 31); return (double)weight / fp_multiplier; } /* display a table content, * listing each element, its frequency, and its predicted bit cost */ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) { unsigned u, sum; for (u=0, sum=0; u<=max; u++) sum += table[u]; DEBUGLOG(2, "total nb elts: %u", sum); for (u=0; u<=max; u++) { DEBUGLOG(2, "%2u: %5u (%.2f)", u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); } } #endif /* =============================================================== * Shared internal declarations * These prototypes may be called from sources not in lib/compress * =============================================================== */ /* ZSTD_loadCEntropy() : * dict : must point at beginning of a valid zstd dictionary. * return : size of dictionary header (size of magic number + dict ID + entropy tables) * assumptions : magic number supposed already checked * and dictSize >= 8 */ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, const void* const dict, size_t dictSize); void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); /* ============================================================== * Private declarations * These prototypes shall only be called from within lib/compress * ============================================================== */ /* ZSTD_getCParamsFromCCtxParams() : * cParams are built depending on compressionLevel, src size hints, * LDM and manually set compression parameters. * Note: srcSizeHint == 0 means 0! */ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); /*! ZSTD_initCStream_internal() : * Private use only. Init streaming operation. * expects params to be valid. * must receive dict, or cdict, or none, but not both. * @return : 0, or an error code */ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, const void* dict, size_t dictSize, const ZSTD_CDict* cdict, const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); void ZSTD_resetSeqStore(seqStore_t* ssPtr); /*! ZSTD_getCParamsFromCDict() : * as the name implies */ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); /* ZSTD_compressBegin_advanced_internal() : * Private use only. To be called from zstdmt_compress.c. */ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); /* ZSTD_compress_advanced_internal() : * Private use only. To be called from zstdmt_compress.c. */ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, const ZSTD_CCtx_params* params); /* ZSTD_writeLastEmptyBlock() : * output an empty Block with end-of-frame mark to complete a frame * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) */ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); /* ZSTD_referenceExternalSequences() : * Must be called before starting a compression operation. * seqs must parse a prefix of the source. * This cannot be used when long range matching is enabled. * Zstd will use these sequences, and pass the literals to a secondary block * compressor. * @return : An error code on failure. * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory * access and data corruption. */ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); /* ZSTD_cycleLog() : * condition for correct operation : hashLog > 1 */ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); /* ZSTD_CCtx_trace() : * Trace the end of a compression call. */ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); #endif /* ZSTD_COMPRESS_H */