huf_compress.c | Explore in Territory

/* ******************************************************************
 * Huffman encoder, part of New Generation Entropy library
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 *  You can contact the author at :
 *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
 *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
****************************************************************** */

/* **************************************************************
*  Compiler specifics
****************************************************************/
#ifdef _MSC_VER    /* Visual Studio */
#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
#endif


/* **************************************************************
*  Includes
****************************************************************/
#include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
#include "../common/compiler.h"
#include "../common/bitstream.h"
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY …
#include "../common/fse.h"        /* header compression */
#include "../common/huf.h"
#include "../common/error_private.h"
#include "../common/bits.h"       /* ZSTD_highbit32 */


/* **************************************************************
*  Error Management
****************************************************************/
#define HUF_isError …
#define HUF_STATIC_ASSERT(c) …


/* **************************************************************
*  Required declarations
****************************************************************/
nodeElt;


/* **************************************************************
*  Debug Traces
****************************************************************/

#if DEBUGLEVEL >= 2

static size_t showU32(const U32* arr, size_t size)
{
    size_t u;
    for (u=0; u<size; u++) {
        RAWLOG(6, " %u", arr[u]); (void)arr;
    }
    RAWLOG(6, " \n");
    return size;
}

static size_t HUF_getNbBits(HUF_CElt elt);

static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
{
    size_t u;
    for (u=0; u<size; u++) {
        RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
    }
    RAWLOG(6, " \n");
    return size;

}

static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
{
    size_t u;
    for (u=0; u<size; u++) {
        RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
    }
    RAWLOG(6, " \n");
    return size;
}

static size_t showHNodeBits(const nodeElt* hnode, size_t size)
{
    size_t u;
    for (u=0; u<size; u++) {
        RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
    }
    RAWLOG(6, " \n");
    return size;
}

#endif


/* *******************************************************
*  HUF : Huffman block compression
*********************************************************/
#define HUF_WORKSPACE_MAX_ALIGNMENT …

static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
{ … }


/* HUF_compressWeights() :
 * Same as FSE_compress(), but dedicated to huff0's weights compression.
 * The use case needs much less stack memory.
 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
 */
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER …

HUF_CompressWeightsWksp;

static size_t
HUF_compressWeights(void* dst, size_t dstSize,
              const void* weightTable, size_t wtSize,
                    void* workspace, size_t workspaceSize)
{ … }

static size_t HUF_getNbBits(HUF_CElt elt)
{ … }

static size_t HUF_getNbBitsFast(HUF_CElt elt)
{ … }

static size_t HUF_getValue(HUF_CElt elt)
{ … }

static size_t HUF_getValueFast(HUF_CElt elt)
{ … }

static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
{ … }

static void HUF_setValue(HUF_CElt* elt, size_t value)
{ … }

HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
{ … }

static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
{ … }

HUF_WriteCTableWksp;

size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
                            void* workspace, size_t workspaceSize)
{ … }


size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{ … }

U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{ … }


/**
 * HUF_setMaxHeight():
 * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
 *
 * It attempts to convert all nodes with nbBits > @targetNbBits
 * to employ @targetNbBits instead. Then it adjusts the tree
 * so that it remains a valid canonical Huffman tree.
 *
 * @pre               The sum of the ranks of each symbol == 2^largestBits,
 *                    where largestBits == huffNode[lastNonNull].nbBits.
 * @post              The sum of the ranks of each symbol == 2^largestBits,
 *                    where largestBits is the return value (expected <= targetNbBits).
 *
 * @param huffNode    The Huffman tree modified in place to enforce targetNbBits.
 *                    It's presumed sorted, from most frequent to rarest symbol.
 * @param lastNonNull The symbol with the lowest count in the Huffman tree.
 * @param targetNbBits  The allowed number of bits, which the Huffman tree
 *                    may not respect. After this function the Huffman tree will
 *                    respect targetNbBits.
 * @return            The maximum number of bits of the Huffman tree after adjustment.
 */
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
{ … }

rankPos;

huffNodeTable;

/* Number of buckets available for HUF_sort() */
#define RANK_POSITION_TABLE_SIZE …

HUF_buildCTable_wksp_tables;

/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
 * Strategy is to use as many buckets as possible for representing distinct
 * counts while using the remainder to represent all "large" counts.
 *
 * To satisfy this requirement for 192 buckets, we can do the following:
 * Let buckets 0-166 represent distinct counts of [0, 166]
 * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
 */
#define RANK_POSITION_MAX_COUNT_LOG …
#define RANK_POSITION_LOG_BUCKETS_BEGIN …
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF …

/* Return the appropriate bucket index for a given count. See definition of
 * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
 */
static U32 HUF_getIndex(U32 const count) { … }

/* Helper swap function for HUF_quickSortPartition() */
static void HUF_swapNodes(nodeElt* a, nodeElt* b) { … }

/* Returns 0 if the huffNode array is not sorted by descending count */
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) { … }

/* Insertion sort by descending order */
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { … }

/* Pivot helper function for quicksort. */
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { … }

/* Classic quicksort by descending with partially iterative calls
 * to reduce worst case callstack size.
 */
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { … }

/**
 * HUF_sort():
 * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
 * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
 *
 * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
 *                            Must have (maxSymbolValue + 1) entries.
 * @param[in]  count          Histogram of the symbols.
 * @param[in]  maxSymbolValue Maximum symbol value.
 * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
 */
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { … }


/** HUF_buildCTable_wksp() :
 *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
 *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
 */
#define STARTNODE …

/* HUF_buildTree():
 * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
 *
 * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
 * @param maxSymbolValue  The maximum symbol value.
 * @return                The smallest node in the Huffman tree (by count).
 */
static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
{ … }

/**
 * HUF_buildCTableFromTree():
 * Build the CTable given the Huffman tree in huffNode.
 *
 * @param[out] CTable         The output Huffman CTable.
 * @param      huffNode       The Huffman tree.
 * @param      nonNullRank    The last and smallest node in the Huffman tree.
 * @param      maxSymbolValue The maximum symbol value.
 * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
 */
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
{ … }

size_t
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
                     void* workSpace, size_t wkspSize)
{ … }

size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
{ … }

int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { … }

size_t HUF_compressBound(size_t size) { … }

/** HUF_CStream_t:
 * Huffman uses its own BIT_CStream_t implementation.
 * There are three major differences from BIT_CStream_t:
 *   1. HUF_addBits() takes a HUF_CElt (size_t) which is
 *      the pair (nbBits, value) in the format:
 *      format:
 *        - Bits [0, 4)            = nbBits
 *        - Bits [4, 64 - nbBits)  = 0
 *        - Bits [64 - nbBits, 64) = value
 *   2. The bitContainer is built from the upper bits and
 *      right shifted. E.g. to add a new value of N bits
 *      you right shift the bitContainer by N, then or in
 *      the new value into the N upper bits.
 *   3. The bitstream has two bit containers. You can add
 *      bits to the second container and merge them into
 *      the first container.
 */

#define HUF_BITS_IN_CONTAINER …

HUF_CStream_t;

/**! HUF_initCStream():
 * Initializes the bitstream.
 * @returns 0 or an error code.
 */
static size_t HUF_initCStream(HUF_CStream_t* bitC,
                                  void* startPtr, size_t dstCapacity)
{ … }

/*! HUF_addBits():
 * Adds the symbol stored in HUF_CElt elt to the bitstream.
 *
 * @param elt   The element we're adding. This is a (nbBits, value) pair.
 *              See the HUF_CStream_t docs for the format.
 * @param idx   Insert into the bitstream at this idx.
 * @param kFast This is a template parameter. If the bitstream is guaranteed
 *              to have at least 4 unused bits after this call it may be 1,
 *              otherwise it must be 0. HUF_addBits() is faster when fast is set.
 */
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
{ … }

FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
{ … }

/*! HUF_mergeIndex1() :
 * Merges the bit container @ index 1 into the bit container @ index 0
 * and zeros the bit container @ index 1.
 */
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
{ … }

/*! HUF_flushBits() :
* Flushes the bits in the bit container @ index 0.
*
* @post bitPos will be < 8.
* @param kFast If kFast is set then we must know a-priori that
*              the bit container will not overflow.
*/
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
{ … }

/*! HUF_endMark()
 * @returns The Huffman stream end mark: A 1-bit value = 1.
 */
static HUF_CElt HUF_endMark(void)
{ … }

/*! HUF_closeCStream() :
 *  @return Size of CStream, in bytes,
 *          or 0 if it could not fit into dstBuffer */
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
{ … }

FORCE_INLINE_TEMPLATE void
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
{ … }

FORCE_INLINE_TEMPLATE void
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
                                   const BYTE* ip, size_t srcSize,
                                   const HUF_CElt* ct,
                                   int kUnroll, int kFastFlush, int kLastFast)
{ … }

/**
 * Returns a tight upper bound on the output space needed by Huffman
 * with 8 bytes buffer to handle over-writes. If the output is at least
 * this large we don't need to do bounds checks during Huffman encoding.
 */
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
{ … }


FORCE_INLINE_TEMPLATE size_t
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
                                   const void* src, size_t srcSize,
                                   const HUF_CElt* CTable)
{ … }

#if DYNAMIC_BMI2

static BMI2_TARGET_ATTRIBUTE size_t
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
                                   const void* src, size_t srcSize,
                                   const HUF_CElt* CTable)
{ … }

static size_t
HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
                                      const void* src, size_t srcSize,
                                      const HUF_CElt* CTable)
{ … }

static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
                              const void* src, size_t srcSize,
                              const HUF_CElt* CTable, const int flags)
{ … }

#else

static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
                              const void* src, size_t srcSize,
                              const HUF_CElt* CTable, const int flags)
{
    (void)flags;
    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
}

#endif

size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{ … }

static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
                              const void* src, size_t srcSize,
                              const HUF_CElt* CTable, int flags)
{ … }

size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{ … }

HUF_nbStreams_e;

static size_t HUF_compressCTable_internal(
                BYTE* const ostart, BYTE* op, BYTE* const oend,
                const void* src, size_t srcSize,
                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
{ … }

HUF_compress_tables_t;

#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE …
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO …

unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
{ … }

unsigned HUF_minTableLog(unsigned symbolCardinality)
{ … }

unsigned HUF_optimalTableLog(
            unsigned maxTableLog,
            size_t srcSize,
            unsigned maxSymbolValue,
            void* workSpace, size_t wkspSize,
            HUF_CElt* table,
      const unsigned* count,
            int flags)
{ … }

/* HUF_compress_internal() :
 * `workSpace_align4` must be aligned on 4-bytes boundaries,
 * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
static size_t
HUF_compress_internal (void* dst, size_t dstSize,
                 const void* src, size_t srcSize,
                       unsigned maxSymbolValue, unsigned huffLog,
                       HUF_nbStreams_e nbStreams,
                       void* workSpace, size_t wkspSize,
                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
{ … }

size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
                      const void* src, size_t srcSize,
                      unsigned maxSymbolValue, unsigned huffLog,
                      void* workSpace, size_t wkspSize,
                      HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{ … }

/* HUF_compress4X_repeat():
 * compress input using 4 streams.
 * consider skipping quickly
 * reuse an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
                      const void* src, size_t srcSize,
                      unsigned maxSymbolValue, unsigned huffLog,
                      void* workSpace, size_t wkspSize,
                      HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{ … }
godot/thirdparty/zstd/compress/huf_compress.c