/* ****************************************************************** * Huffman encoder, part of New Generation Entropy library * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Public forum : https://groups.google.com/forum/#!forum/lz4c * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** * Compiler specifics ****************************************************************/ #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif /* ************************************************************** * Includes ****************************************************************/ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ #include "../common/compiler.h" #include "../common/bitstream.h" #include "hist.h" #define FSE_STATIC_LINKING_ONLY … #include "../common/fse.h" /* header compression */ #include "../common/huf.h" #include "../common/error_private.h" #include "../common/bits.h" /* ZSTD_highbit32 */ /* ************************************************************** * Error Management ****************************************************************/ #define HUF_isError … #define HUF_STATIC_ASSERT(c) … /* ************************************************************** * Required declarations ****************************************************************/ nodeElt; /* ************************************************************** * Debug Traces ****************************************************************/ #if DEBUGLEVEL >= 2 static size_t showU32(const U32* arr, size_t size) { size_t u; for (u=0; u<size; u++) { RAWLOG(6, " %u", arr[u]); (void)arr; } RAWLOG(6, " \n"); return size; } static size_t HUF_getNbBits(HUF_CElt elt); static size_t showCTableBits(const HUF_CElt* ctable, size_t size) { size_t u; for (u=0; u<size; u++) { RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable; } RAWLOG(6, " \n"); return size; } static size_t showHNodeSymbols(const nodeElt* hnode, size_t size) { size_t u; for (u=0; u<size; u++) { RAWLOG(6, " %u", hnode[u].byte); (void)hnode; } RAWLOG(6, " \n"); return size; } static size_t showHNodeBits(const nodeElt* hnode, size_t size) { size_t u; for (u=0; u<size; u++) { RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode; } RAWLOG(6, " \n"); return size; } #endif /* ******************************************************* * HUF : Huffman block compression *********************************************************/ #define HUF_WORKSPACE_MAX_ALIGNMENT … static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align) { … } /* HUF_compressWeights() : * Same as FSE_compress(), but dedicated to huff0's weights compression. * The use case needs much less stack memory. * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. */ #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER … HUF_CompressWeightsWksp; static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) { … } static size_t HUF_getNbBits(HUF_CElt elt) { … } static size_t HUF_getNbBitsFast(HUF_CElt elt) { … } static size_t HUF_getValue(HUF_CElt elt) { … } static size_t HUF_getValueFast(HUF_CElt elt) { … } static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits) { … } static void HUF_setValue(HUF_CElt* elt, size_t value) { … } HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable) { … } static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue) { … } HUF_WriteCTableWksp; size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize) { … } size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) { … } U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) { … } /** * HUF_setMaxHeight(): * Try to enforce @targetNbBits on the Huffman tree described in @huffNode. * * It attempts to convert all nodes with nbBits > @targetNbBits * to employ @targetNbBits instead. Then it adjusts the tree * so that it remains a valid canonical Huffman tree. * * @pre The sum of the ranks of each symbol == 2^largestBits, * where largestBits == huffNode[lastNonNull].nbBits. * @post The sum of the ranks of each symbol == 2^largestBits, * where largestBits is the return value (expected <= targetNbBits). * * @param huffNode The Huffman tree modified in place to enforce targetNbBits. * It's presumed sorted, from most frequent to rarest symbol. * @param lastNonNull The symbol with the lowest count in the Huffman tree. * @param targetNbBits The allowed number of bits, which the Huffman tree * may not respect. After this function the Huffman tree will * respect targetNbBits. * @return The maximum number of bits of the Huffman tree after adjustment. */ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits) { … } rankPos; huffNodeTable; /* Number of buckets available for HUF_sort() */ #define RANK_POSITION_TABLE_SIZE … HUF_buildCTable_wksp_tables; /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing. * Strategy is to use as many buckets as possible for representing distinct * counts while using the remainder to represent all "large" counts. * * To satisfy this requirement for 192 buckets, we can do the following: * Let buckets 0-166 represent distinct counts of [0, 166] * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. */ #define RANK_POSITION_MAX_COUNT_LOG … #define RANK_POSITION_LOG_BUCKETS_BEGIN … #define RANK_POSITION_DISTINCT_COUNT_CUTOFF … /* Return the appropriate bucket index for a given count. See definition of * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. */ static U32 HUF_getIndex(U32 const count) { … } /* Helper swap function for HUF_quickSortPartition() */ static void HUF_swapNodes(nodeElt* a, nodeElt* b) { … } /* Returns 0 if the huffNode array is not sorted by descending count */ MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) { … } /* Insertion sort by descending order */ HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { … } /* Pivot helper function for quicksort. */ static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { … } /* Classic quicksort by descending with partially iterative calls * to reduce worst case callstack size. */ static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { … } /** * HUF_sort(): * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket. * * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. * Must have (maxSymbolValue + 1) entries. * @param[in] count Histogram of the symbols. * @param[in] maxSymbolValue Maximum symbol value. * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. */ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { … } /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). */ #define STARTNODE … /* HUF_buildTree(): * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. * * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. * @param maxSymbolValue The maximum symbol value. * @return The smallest node in the Huffman tree (by count). */ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) { … } /** * HUF_buildCTableFromTree(): * Build the CTable given the Huffman tree in huffNode. * * @param[out] CTable The output Huffman CTable. * @param huffNode The Huffman tree. * @param nonNullRank The last and smallest node in the Huffman tree. * @param maxSymbolValue The maximum symbol value. * @param maxNbBits The exact maximum number of bits used in the Huffman tree. */ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) { … } size_t HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) { … } size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { … } int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { … } size_t HUF_compressBound(size_t size) { … } /** HUF_CStream_t: * Huffman uses its own BIT_CStream_t implementation. * There are three major differences from BIT_CStream_t: * 1. HUF_addBits() takes a HUF_CElt (size_t) which is * the pair (nbBits, value) in the format: * format: * - Bits [0, 4) = nbBits * - Bits [4, 64 - nbBits) = 0 * - Bits [64 - nbBits, 64) = value * 2. The bitContainer is built from the upper bits and * right shifted. E.g. to add a new value of N bits * you right shift the bitContainer by N, then or in * the new value into the N upper bits. * 3. The bitstream has two bit containers. You can add * bits to the second container and merge them into * the first container. */ #define HUF_BITS_IN_CONTAINER … HUF_CStream_t; /**! HUF_initCStream(): * Initializes the bitstream. * @returns 0 or an error code. */ static size_t HUF_initCStream(HUF_CStream_t* bitC, void* startPtr, size_t dstCapacity) { … } /*! HUF_addBits(): * Adds the symbol stored in HUF_CElt elt to the bitstream. * * @param elt The element we're adding. This is a (nbBits, value) pair. * See the HUF_CStream_t docs for the format. * @param idx Insert into the bitstream at this idx. * @param kFast This is a template parameter. If the bitstream is guaranteed * to have at least 4 unused bits after this call it may be 1, * otherwise it must be 0. HUF_addBits() is faster when fast is set. */ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast) { … } FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC) { … } /*! HUF_mergeIndex1() : * Merges the bit container @ index 1 into the bit container @ index 0 * and zeros the bit container @ index 1. */ FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC) { … } /*! HUF_flushBits() : * Flushes the bits in the bit container @ index 0. * * @post bitPos will be < 8. * @param kFast If kFast is set then we must know a-priori that * the bit container will not overflow. */ FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast) { … } /*! HUF_endMark() * @returns The Huffman stream end mark: A 1-bit value = 1. */ static HUF_CElt HUF_endMark(void) { … } /*! HUF_closeCStream() : * @return Size of CStream, in bytes, * or 0 if it could not fit into dstBuffer */ static size_t HUF_closeCStream(HUF_CStream_t* bitC) { … } FORCE_INLINE_TEMPLATE void HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast) { … } FORCE_INLINE_TEMPLATE void HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC, const BYTE* ip, size_t srcSize, const HUF_CElt* ct, int kUnroll, int kFastFlush, int kLastFast) { … } /** * Returns a tight upper bound on the output space needed by Huffman * with 8 bytes buffer to handle over-writes. If the output is at least * this large we don't need to do bounds checks during Huffman encoding. */ static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog) { … } FORCE_INLINE_TEMPLATE size_t HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { … } #if DYNAMIC_BMI2 static BMI2_TARGET_ATTRIBUTE size_t HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { … } static size_t HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { … } static size_t HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, const int flags) { … } #else static size_t HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, const int flags) { (void)flags; return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); } #endif size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) { … } static size_t HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) { … } size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) { … } HUF_nbStreams_e; static size_t HUF_compressCTable_internal( BYTE* const ostart, BYTE* op, BYTE* const oend, const void* src, size_t srcSize, HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) { … } HUF_compress_tables_t; #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE … #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO … unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) { … } unsigned HUF_minTableLog(unsigned symbolCardinality) { … } unsigned HUF_optimalTableLog( unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags) { … } /* HUF_compress_internal() : * `workSpace_align4` must be aligned on 4-bytes boundaries, * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ static size_t HUF_compress_internal (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, HUF_nbStreams_e nbStreams, void* workSpace, size_t wkspSize, HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) { … } size_t HUF_compress1X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int flags) { … } /* HUF_compress4X_repeat(): * compress input using 4 streams. * consider skipping quickly * reuse an existing huffman compression table */ size_t HUF_compress4X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int flags) { … }