//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_CONCURRENTHASHTABLE_H #define LLVM_ADT_CONCURRENTHASHTABLE_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/xxhash.h" #include <atomic> #include <cstddef> #include <iomanip> #include <mutex> #include <sstream> #include <type_traits> namespace llvm { /// ConcurrentHashTable - is a resizeable concurrent hashtable. /// The number of resizings limited up to x2^31. This hashtable is /// useful to have efficient access to aggregate data(like strings, /// type descriptors...) and to keep only single copy of such /// an aggregate. The hashtable allows only concurrent insertions: /// /// KeyDataTy* = insert ( const KeyTy& ); /// /// Data structure: /// /// Inserted value KeyTy is mapped to 64-bit hash value -> /// /// [------- 64-bit Hash value --------] /// [ StartEntryIndex ][ Bucket Index ] /// | | /// points to the points to /// first probe the bucket. /// position inside /// bucket entries /// /// After initialization, all buckets have an initial size. During insertions, /// buckets might be extended to contain more entries. Each bucket can be /// independently resized and rehashed(no need to lock the whole table). /// Different buckets may have different sizes. If the single bucket is full /// then the bucket is resized. /// /// BucketsArray keeps all buckets. Each bucket keeps an array of Entries /// (pointers to KeyDataTy) and another array of entries hashes: /// /// BucketsArray[BucketIdx].Hashes[EntryIdx]: /// BucketsArray[BucketIdx].Entries[EntryIdx]: /// /// [Bucket 0].Hashes -> [uint32_t][uint32_t] /// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*] /// /// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t] /// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*] /// ......................... /// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t] /// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*] /// /// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate /// KeyDataTy items. template <typename KeyTy, typename KeyDataTy, typename AllocatorTy> class ConcurrentHashTableInfoByPtr { … }; template <typename KeyTy, typename KeyDataTy, typename AllocatorTy, typename Info = ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>> class ConcurrentHashTableByPtr { public: ConcurrentHashTableByPtr( AllocatorTy &Allocator, uint64_t EstimatedSize = 100000, size_t ThreadsNum = parallel::strategy.compute_thread_count(), size_t InitialNumberOfBuckets = 128) : … { … } virtual ~ConcurrentHashTableByPtr() { … } /// Insert new value \p NewValue or return already existing entry. /// /// \returns entry and "true" if an entry is just inserted or /// "false" if an entry already exists. std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) { … } /// Print information about current state of hash table structures. void printStatistic(raw_ostream &OS) { … } protected: using ExtHashBitsTy = uint32_t; using EntryDataTy = KeyDataTy *; using HashesPtr = ExtHashBitsTy *; using DataPtr = EntryDataTy *; // Bucket structure. Keeps bucket data. struct Bucket { Bucket() = default; // Size of bucket. uint32_t Size = 0; // Number of non-null entries. uint32_t NumberOfEntries = 0; // Hashes for [Size] entries. HashesPtr Hashes = nullptr; // [Size] entries. DataPtr Entries = nullptr; #if LLVM_ENABLE_THREADS // Mutex for this bucket. std::mutex Guard; #endif }; // Reallocate and rehash bucket if this is full enough. void RehashBucket(Bucket &CurBucket) { … } uint32_t getBucketIdx(hash_code Hash) { … } uint32_t getExtHashBits(uint64_t Hash) { … } uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) { … } // Number of bits in hash mask. uint64_t HashBitsNum = 0; // Hash mask. uint64_t HashMask = 0; // Hash mask for the extended hash bits. uint64_t ExtHashMask = 0; // The maximal bucket size. uint32_t MaxBucketSize = 0; // Initial size of bucket. uint32_t InitialBucketSize = 0; // The number of buckets. uint32_t NumberOfBuckets = 0; // Array of buckets. std::unique_ptr<Bucket[]> BucketsArray; // Used for allocating KeyDataTy values. AllocatorTy &MultiThreadAllocator; }; } // end namespace llvm #endif // LLVM_ADT_CONCURRENTHASHTABLE_H