// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * Copyright (C) 1997-2016, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * Date Name Description * 03/22/00 aliu Adapted from original C++ ICU Hashtable. * 07/06/01 aliu Modified to support int32_t keys on * platforms with sizeof(void*) < 32. ****************************************************************************** */ #include <string_view> #include "uhash.h" #include "unicode/ustring.h" #include "cstring.h" #include "cmemory.h" #include "uassert.h" #include "ustr_imp.h" /* This hashtable is implemented as a double hash. All elements are * stored in a single array with no secondary storage for collision * resolution (no linked list, etc.). When there is a hash collision * (when two unequal keys have the same hashcode) we resolve this by * using a secondary hash. The secondary hash is an increment * computed as a hash function (a different one) of the primary * hashcode. This increment is added to the initial hash value to * obtain further slots assigned to the same hash code. For this to * work, the length of the array and the increment must be relatively * prime. The easiest way to achieve this is to have the length of * the array be prime, and the increment be any value from * 1..length-1. * * Hashcodes are 32-bit integers. We make sure all hashcodes are * non-negative by masking off the top bit. This has two effects: (1) * modulo arithmetic is simplified. If we allowed negative hashcodes, * then when we computed hashcode % length, we could get a negative * result, which we would then have to adjust back into range. It's * simpler to just make hashcodes non-negative. (2) It makes it easy * to check for empty vs. occupied slots in the table. We just mark * empty or deleted slots with a negative hashcode. * * The central function is _uhash_find(). This function looks for a * slot matching the given key and hashcode. If one is found, it * returns a pointer to that slot. If the table is full, and no match * is found, it returns nullptr -- in theory. This would make the code * more complicated, since all callers of _uhash_find() would then * have to check for a nullptr result. To keep this from happening, we * don't allow the table to fill. When there is only one * empty/deleted slot left, uhash_put() will refuse to increase the * count, and fail. This simplifies the code. In practice, one will * seldom encounter this using default UHashtables. However, if a * hashtable is set to a U_FIXED resize policy, or if memory is * exhausted, then the table may fill. * * High and low water ratios control rehashing. They establish levels * of fullness (from 0 to 1) outside of which the data array is * reallocated and repopulated. Setting the low water ratio to zero * means the table will never shrink. Setting the high water ratio to * one means the table will never grow. The ratios should be * coordinated with the ratio between successive elements of the * PRIMES table, so that when the primeIndex is incremented or * decremented during rehashing, it brings the ratio of count / length * back into the desired range (between low and high water ratios). */ /******************************************************************** * PRIVATE Constants, Macros ********************************************************************/ /* This is a list of non-consecutive primes chosen such that * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81 * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this * ratio is changed, the low and high water ratios should also be * adjusted to suit. * * These prime numbers were also chosen so that they are the largest * prime number while being less than a power of two. */ static const int32_t PRIMES[] = …; #define PRIMES_LENGTH … #define DEFAULT_PRIME_INDEX … /* These ratios are tuned to the PRIMES array such that a resize * places the table back into the zone of non-resizing. That is, * after a call to _uhash_rehash(), a subsequent call to * _uhash_rehash() should do nothing (should not churn). This is only * a potential problem with U_GROW_AND_SHRINK. */ static const float RESIZE_POLICY_RATIO_TABLE[6] = …; /* Invariants for hashcode values: * DELETED < 0 * EMPTY < 0 * Real hashes >= 0 Hashcodes may not start out this way, but internally they are adjusted so that they are always positive. We assume 32-bit hashcodes; adjust these constants for other hashcode sizes. */ #define HASH_DELETED … #define HASH_EMPTY … #define IS_EMPTY_OR_DELETED(x) … /* This macro expects a UHashTok.pointer as its keypointer and valuepointer parameters */ #define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) … /* * Constants for hinting whether a key or value is an integer * or a pointer. If a hint bit is zero, then the associated * token is assumed to be an integer. */ #define HINT_BOTH_INTEGERS … #define HINT_KEY_POINTER … #define HINT_VALUE_POINTER … #define HINT_ALLOW_ZERO … /******************************************************************** * PRIVATE Implementation ********************************************************************/ static UHashTok _uhash_setElement(UHashtable *hash, UHashElement* e, int32_t hashcode, UHashTok key, UHashTok value, int8_t hint) { … } /** * Assumes that the given element is not empty or deleted. */ static UHashTok _uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) { … } static void _uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { … } /** * Allocate internal data array of a size determined by the given * prime index. If the index is out of range it is pinned into range. * If the allocation fails the status is set to * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In * either case the previous array pointer is overwritten. * * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1. */ static void _uhash_allocate(UHashtable *hash, int32_t primeIndex, UErrorCode *status) { … } static UHashtable* _uhash_init(UHashtable *result, UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t primeIndex, UErrorCode *status) { … } static UHashtable* _uhash_create(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t primeIndex, UErrorCode *status) { … } /** * Look for a key in the table, or if no such key exists, the first * empty slot matching the given hashcode. Keys are compared using * the keyComparator function. * * First find the start position, which is the hashcode modulo * the length. Test it to see if it is: * * a. identical: First check the hash values for a quick check, * then compare keys for equality using keyComparator. * b. deleted * c. empty * * Stop if it is identical or empty, otherwise continue by adding a * "jump" value (moduloing by the length again to keep it within * range) and retesting. For efficiency, there need enough empty * values so that the searches stop within a reasonable amount of time. * This can be changed by changing the high/low water marks. * * In theory, this function can return nullptr, if it is full (no empty * or deleted slots) and if no matching key is found. In practice, we * prevent this elsewhere (in uhash_put) by making sure the last slot * in the table is never filled. * * The size of the table should be prime for this algorithm to work; * otherwise we are not guaranteed that the jump value (the secondary * hash) is relatively prime to the table length. */ static UHashElement* _uhash_find(const UHashtable *hash, UHashTok key, int32_t hashcode) { … } /** * Attempt to grow or shrink the data arrays in order to make the * count fit between the high and low water marks. hash_put() and * hash_remove() call this method when the count exceeds the high or * low water marks. This method may do nothing, if memory allocation * fails, or if the count is already in range, or if the length is * already at the low or high limit. In any case, upon return the * arrays will be valid. */ static void _uhash_rehash(UHashtable *hash, UErrorCode *status) { … } static UHashTok _uhash_remove(UHashtable *hash, UHashTok key) { … } static UHashTok _uhash_put(UHashtable *hash, UHashTok key, UHashTok value, int8_t hint, UErrorCode *status) { … } /******************************************************************** * PUBLIC API ********************************************************************/ U_CAPI UHashtable* U_EXPORT2 uhash_open(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode *status) { … } U_CAPI UHashtable* U_EXPORT2 uhash_openSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode *status) { … } U_CAPI UHashtable* U_EXPORT2 uhash_init(UHashtable *fillinResult, UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode *status) { … } U_CAPI UHashtable* U_EXPORT2 uhash_initSize(UHashtable *fillinResult, UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode *status) { … } U_CAPI void U_EXPORT2 uhash_close(UHashtable *hash) { … } U_CAPI UHashFunction *U_EXPORT2 uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) { … } U_CAPI UKeyComparator *U_EXPORT2 uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) { … } U_CAPI UValueComparator *U_EXPORT2 uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ … } U_CAPI UObjectDeleter *U_EXPORT2 uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) { … } U_CAPI UObjectDeleter *U_EXPORT2 uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) { … } U_CAPI void U_EXPORT2 uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { … } U_CAPI int32_t U_EXPORT2 uhash_count(const UHashtable *hash) { … } U_CAPI void* U_EXPORT2 uhash_get(const UHashtable *hash, const void* key) { … } U_CAPI void* U_EXPORT2 uhash_iget(const UHashtable *hash, int32_t key) { … } U_CAPI int32_t U_EXPORT2 uhash_geti(const UHashtable *hash, const void* key) { … } U_CAPI int32_t U_EXPORT2 uhash_igeti(const UHashtable *hash, int32_t key) { … } U_CAPI int32_t U_EXPORT2 uhash_getiAndFound(const UHashtable *hash, const void *key, UBool *found) { … } U_CAPI int32_t U_EXPORT2 uhash_igetiAndFound(const UHashtable *hash, int32_t key, UBool *found) { … } U_CAPI void* U_EXPORT2 uhash_put(UHashtable *hash, void* key, void* value, UErrorCode *status) { … } U_CAPI void* U_EXPORT2 uhash_iput(UHashtable *hash, int32_t key, void* value, UErrorCode *status) { … } U_CAPI int32_t U_EXPORT2 uhash_puti(UHashtable *hash, void* key, int32_t value, UErrorCode *status) { … } U_CAPI int32_t U_EXPORT2 uhash_iputi(UHashtable *hash, int32_t key, int32_t value, UErrorCode *status) { … } U_CAPI int32_t U_EXPORT2 uhash_putiAllowZero(UHashtable *hash, void *key, int32_t value, UErrorCode *status) { … } U_CAPI int32_t U_EXPORT2 uhash_iputiAllowZero(UHashtable *hash, int32_t key, int32_t value, UErrorCode *status) { … } U_CAPI void* U_EXPORT2 uhash_remove(UHashtable *hash, const void* key) { … } U_CAPI void* U_EXPORT2 uhash_iremove(UHashtable *hash, int32_t key) { … } U_CAPI int32_t U_EXPORT2 uhash_removei(UHashtable *hash, const void* key) { … } U_CAPI int32_t U_EXPORT2 uhash_iremovei(UHashtable *hash, int32_t key) { … } U_CAPI void U_EXPORT2 uhash_removeAll(UHashtable *hash) { … } U_CAPI UBool U_EXPORT2 uhash_containsKey(const UHashtable *hash, const void *key) { … } /** * Returns true if the UHashtable contains an item with this integer key. * * @param hash The target UHashtable. * @param key An integer key stored in a hashtable * @return true if the key is found. */ U_CAPI UBool U_EXPORT2 uhash_icontainsKey(const UHashtable *hash, int32_t key) { … } U_CAPI const UHashElement* U_EXPORT2 uhash_find(const UHashtable *hash, const void* key) { … } U_CAPI const UHashElement* U_EXPORT2 uhash_nextElement(const UHashtable *hash, int32_t *pos) { … } U_CAPI void* U_EXPORT2 uhash_removeElement(UHashtable *hash, const UHashElement* e) { … } /******************************************************************** * UHashTok convenience ********************************************************************/ /** * Return a UHashTok for an integer. */ /*U_CAPI UHashTok U_EXPORT2 uhash_toki(int32_t i) { UHashTok tok; tok.integer = i; return tok; }*/ /** * Return a UHashTok for a pointer. */ /*U_CAPI UHashTok U_EXPORT2 uhash_tokp(void* p) { UHashTok tok; tok.pointer = p; return tok; }*/ /******************************************************************** * PUBLIC Key Hash Functions ********************************************************************/ U_CAPI int32_t U_EXPORT2 uhash_hashUChars(const UHashTok key) { … } U_CAPI int32_t U_EXPORT2 uhash_hashChars(const UHashTok key) { … } U_CAPI int32_t U_EXPORT2 uhash_hashIChars(const UHashTok key) { … } U_CAPI int32_t U_EXPORT2 uhash_hashIStringView(const UHashTok key) { … } U_CAPI UBool U_EXPORT2 uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ … } /******************************************************************** * PUBLIC Comparator Functions ********************************************************************/ U_CAPI UBool U_EXPORT2 uhash_compareUChars(const UHashTok key1, const UHashTok key2) { … } U_CAPI UBool U_EXPORT2 uhash_compareChars(const UHashTok key1, const UHashTok key2) { … } U_CAPI UBool U_EXPORT2 uhash_compareIChars(const UHashTok key1, const UHashTok key2) { … } U_CAPI UBool U_EXPORT2 uhash_compareIStringView(const UHashTok key1, const UHashTok key2) { … } /******************************************************************** * PUBLIC int32_t Support Functions ********************************************************************/ U_CAPI int32_t U_EXPORT2 uhash_hashLong(const UHashTok key) { … } U_CAPI UBool U_EXPORT2 uhash_compareLong(const UHashTok key1, const UHashTok key2) { … }