llvm/llvm/include/llvm/ADT/ConcurrentHashtable.h

//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
#define LLVM_ADT_CONCURRENTHASHTABLE_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/xxhash.h"
#include <atomic>
#include <cstddef>
#include <iomanip>
#include <mutex>
#include <sstream>
#include <type_traits>

namespace llvm {

/// ConcurrentHashTable - is a resizeable concurrent hashtable.
/// The number of resizings limited up to x2^31. This hashtable is
/// useful to have efficient access to aggregate data(like strings,
/// type descriptors...) and to keep only single copy of such
/// an aggregate. The hashtable allows only concurrent insertions:
///
/// KeyDataTy* = insert ( const KeyTy& );
///
/// Data structure:
///
/// Inserted value KeyTy is mapped to 64-bit hash value ->
///
///          [------- 64-bit Hash value --------]
///          [  StartEntryIndex ][ Bucket Index ]
///                    |                |
///              points to the     points to
///              first probe       the bucket.
///              position inside
///              bucket entries
///
/// After initialization, all buckets have an initial size. During insertions,
/// buckets might be extended to contain more entries. Each bucket can be
/// independently resized and rehashed(no need to lock the whole table).
/// Different buckets may have different sizes. If the single bucket is full
/// then the bucket is resized.
///
/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
/// (pointers to KeyDataTy) and another array of entries hashes:
///
/// BucketsArray[BucketIdx].Hashes[EntryIdx]:
/// BucketsArray[BucketIdx].Entries[EntryIdx]:
///
/// [Bucket 0].Hashes -> [uint32_t][uint32_t]
/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*]
///
/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*]
///                      .........................
/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*]
///
/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
/// KeyDataTy items.

template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
class ConcurrentHashTableInfoByPtr {};

template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
          typename Info =
              ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
class ConcurrentHashTableByPtr {
public:
  ConcurrentHashTableByPtr(
      AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
      size_t ThreadsNum = parallel::strategy.compute_thread_count(),
      size_t InitialNumberOfBuckets = 128)
      :{}

  virtual ~ConcurrentHashTableByPtr() {}

  /// Insert new value \p NewValue or return already existing entry.
  ///
  /// \returns entry and "true" if an entry is just inserted or
  /// "false" if an entry already exists.
  std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {}

  /// Print information about current state of hash table structures.
  void printStatistic(raw_ostream &OS) {}

protected:
  using ExtHashBitsTy = uint32_t;
  using EntryDataTy = KeyDataTy *;

  using HashesPtr = ExtHashBitsTy *;
  using DataPtr = EntryDataTy *;

  // Bucket structure. Keeps bucket data.
  struct Bucket {
    Bucket() = default;

    // Size of bucket.
    uint32_t Size = 0;

    // Number of non-null entries.
    uint32_t NumberOfEntries = 0;

    // Hashes for [Size] entries.
    HashesPtr Hashes = nullptr;

    // [Size] entries.
    DataPtr Entries = nullptr;

#if LLVM_ENABLE_THREADS
    // Mutex for this bucket.
    std::mutex Guard;
#endif
  };

  // Reallocate and rehash bucket if this is full enough.
  void RehashBucket(Bucket &CurBucket) {}

  uint32_t getBucketIdx(hash_code Hash) {}

  uint32_t getExtHashBits(uint64_t Hash) {}

  uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {}

  // Number of bits in hash mask.
  uint64_t HashBitsNum = 0;

  // Hash mask.
  uint64_t HashMask = 0;

  // Hash mask for the extended hash bits.
  uint64_t ExtHashMask = 0;

  // The maximal bucket size.
  uint32_t MaxBucketSize = 0;

  // Initial size of bucket.
  uint32_t InitialBucketSize = 0;

  // The number of buckets.
  uint32_t NumberOfBuckets = 0;

  // Array of buckets.
  std::unique_ptr<Bucket[]> BucketsArray;

  // Used for allocating KeyDataTy values.
  AllocatorTy &MultiThreadAllocator;
};

} // end namespace llvm

#endif // LLVM_ADT_CONCURRENTHASHTABLE_H