// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * * Copyright (C) 2001-2012, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: utrie.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2001oct20 * created by: Markus W. Scherer * * This is a common implementation of a "folded" trie. * It is a kind of compressed, serializable table of 16- or 32-bit values associated with * Unicode code points (0..0x10ffff). */ #ifdef UTRIE_DEBUG # include <stdio.h> #endif #include "unicode/utypes.h" #include "cmemory.h" #include "utrie.h" /* miscellaneous ------------------------------------------------------------ */ #undef ABS #define ABS(x) … static inline UBool equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { … } /* Building a trie ----------------------------------------------------------*/ U_CAPI UNewTrie * U_EXPORT2 utrie_open(UNewTrie *fillIn, uint32_t *aliasData, int32_t maxDataLength, uint32_t initialValue, uint32_t leadUnitValue, UBool latin1Linear) { … } U_CAPI UNewTrie * U_EXPORT2 utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) { … } U_CAPI void U_EXPORT2 utrie_close(UNewTrie *trie) { … } U_CAPI uint32_t * U_EXPORT2 utrie_getData(UNewTrie *trie, int32_t *pLength) { … } static int32_t utrie_allocDataBlock(UNewTrie *trie) { … } /** * No error checking for illegal arguments. * * @return -1 if no new data block available (out of memory in data array) * @internal */ static int32_t utrie_getDataBlock(UNewTrie *trie, UChar32 c) { … } /** * @return true if the value was successfully set */ U_CAPI UBool U_EXPORT2 utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) { … } U_CAPI uint32_t U_EXPORT2 utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) { … } /** * @internal */ static void utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value, uint32_t initialValue, UBool overwrite) { … } U_CAPI UBool U_EXPORT2 utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) { … } static int32_t _findSameIndexBlock(const int32_t *idx, int32_t indexLength, int32_t otherBlock) { … } /* * Fold the normalization data for supplementary code points into * a compact area on top of the BMP-part of the trie index, * with the lead surrogates indexing this compact area. * * Duplicate the index values for lead surrogates: * From inside the BMP area, where some may be overridden with folded values, * to just after the BMP area, where they can be retrieved for * code point lookups. */ static void utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) { … } /* * Set a value in the trie index map to indicate which data block * is referenced and which one is not. * utrie_compact() will remove data blocks that are not used at all. * Set * - 0 if it is used * - -1 if it is not used */ static void _findUnusedBlocks(UNewTrie *trie) { … } static int32_t _findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t step) { … } /* * Compact a folded build-time trie. * * The compaction * - removes blocks that are identical with earlier ones * - overlaps adjacent blocks as much as possible (if overlap==true) * - moves blocks in steps of the data granularity * - moves and overlaps blocks that overlap with multiple values in the overlap region * * It does not * - try to move and overlap blocks that are not already adjacent */ static void utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) { … } /* serialization ------------------------------------------------------------ */ /* * Default function for the folding value: * Just store the offset (16 bits) if there is any non-initial-value entry. * * The offset parameter is never 0. * Returning the offset itself is safe for UTRIE_SHIFT>=5 because * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800 * which fits into 16-bit trie values; * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases. * * Theoretically, it would be safer for all possible UTRIE_SHIFT including * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS * which would always result in a value of 0x40..0x43f * (start/end 1k blocks of supplementary Unicode code points). * However, this would be uglier, and would not work for some existing * binary data file formats. * * Also, we do not plan to change UTRIE_SHIFT because it would change binary * data file formats, and we would probably not make it smaller because of * the then even larger BMP index length even for empty tries. */ static uint32_t U_CALLCONV defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { … } U_CAPI int32_t U_EXPORT2 utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity, UNewTrieGetFoldedValue *getFoldedValue, UBool reduceTo16Bits, UErrorCode *pErrorCode) { … } /* inverse to defaultGetFoldedValue() */ U_CAPI int32_t U_EXPORT2 utrie_defaultGetFoldingOffset(uint32_t data) { … } U_CAPI int32_t U_EXPORT2 utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) { … } U_CAPI int32_t U_EXPORT2 utrie_unserializeDummy(UTrie *trie, void *data, int32_t length, uint32_t initialValue, uint32_t leadUnitValue, UBool make16BitTrie, UErrorCode *pErrorCode) { … } /* enumeration -------------------------------------------------------------- */ /* default UTrieEnumValue() returns the input value itself */ static uint32_t U_CALLCONV enumSameValue(const void * /*context*/, uint32_t value) { … } /** * Enumerate all ranges of code points with the same relevant values. * The values are transformed from the raw trie entries by the enumValue function. */ U_CAPI void U_EXPORT2 utrie_enum(const UTrie *trie, UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) { … }