//===- llvm/ADT/IntervalMap.h - A sorted interval map -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements a coalescing interval map for small objects. /// /// KeyT objects are mapped to ValT objects. Intervals of keys that map to the /// same value are represented in a compressed form. /// /// Iterators provide ordered access to the compressed intervals rather than the /// individual keys, and insert and erase operations use key intervals as well. /// /// Like SmallVector, IntervalMap will store the first N intervals in the map /// object itself without any allocations. When space is exhausted it switches /// to a B+-tree representation with very small overhead for small key and /// value objects. /// /// A Traits class specifies how keys are compared. It also allows IntervalMap /// to work with both closed and half-open intervals. /// /// Keys and values are not stored next to each other in a std::pair, so we /// don't provide such a value_type. Dereferencing iterators only returns the /// mapped value. The interval bounds are accessible through the start() and /// stop() iterator methods. /// /// IntervalMap is optimized for small key and value objects, 4 or 8 bytes /// each is the optimal size. For large objects use std::map instead. // //===----------------------------------------------------------------------===// // // Synopsis: // // template <typename KeyT, typename ValT, unsigned N, typename Traits> // class IntervalMap { // public: // typedef KeyT key_type; // typedef ValT mapped_type; // typedef RecyclingAllocator<...> Allocator; // class iterator; // class const_iterator; // // explicit IntervalMap(Allocator&); // ~IntervalMap(): // // bool empty() const; // KeyT start() const; // KeyT stop() const; // ValT lookup(KeyT x, Value NotFound = Value()) const; // // const_iterator begin() const; // const_iterator end() const; // iterator begin(); // iterator end(); // const_iterator find(KeyT x) const; // iterator find(KeyT x); // // void insert(KeyT a, KeyT b, ValT y); // void clear(); // }; // // template <typename KeyT, typename ValT, unsigned N, typename Traits> // class IntervalMap::const_iterator { // public: // using iterator_category = std::bidirectional_iterator_tag; // using value_type = ValT; // using difference_type = std::ptrdiff_t; // using pointer = value_type *; // using reference = value_type &; // // bool operator==(const const_iterator &) const; // bool operator!=(const const_iterator &) const; // bool valid() const; // // const KeyT &start() const; // const KeyT &stop() const; // const ValT &value() const; // const ValT &operator*() const; // const ValT *operator->() const; // // const_iterator &operator++(); // const_iterator &operator++(int); // const_iterator &operator--(); // const_iterator &operator--(int); // void goToBegin(); // void goToEnd(); // void find(KeyT x); // void advanceTo(KeyT x); // }; // // template <typename KeyT, typename ValT, unsigned N, typename Traits> // class IntervalMap::iterator : public const_iterator { // public: // void insert(KeyT a, KeyT b, Value y); // void erase(); // }; // //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_INTERVALMAP_H #define LLVM_ADT_INTERVALMAP_H #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/RecyclingAllocator.h" #include <algorithm> #include <cassert> #include <iterator> #include <new> #include <utility> namespace llvm { //===----------------------------------------------------------------------===// //--- Key traits ---// //===----------------------------------------------------------------------===// // // The IntervalMap works with closed or half-open intervals. // Adjacent intervals that map to the same value are coalesced. // // The IntervalMapInfo traits class is used to determine if a key is contained // in an interval, and if two intervals are adjacent so they can be coalesced. // The provided implementation works for closed integer intervals, other keys // probably need a specialized version. // // The point x is contained in [a;b] when !startLess(x, a) && !stopLess(b, x). // // It is assumed that (a;b] half-open intervals are not used, only [a;b) is // allowed. This is so that stopLess(a, b) can be used to determine if two // intervals overlap. // //===----------------------------------------------------------------------===// template <typename T> struct IntervalMapInfo { … }; template <typename T> struct IntervalMapHalfOpenInfo { … }; /// IntervalMapImpl - Namespace used for IntervalMap implementation details. /// It should be considered private to the implementation. namespace IntervalMapImpl { IdxPair; //===----------------------------------------------------------------------===// //--- IntervalMapImpl::NodeBase ---// //===----------------------------------------------------------------------===// // // Both leaf and branch nodes store vectors of pairs. // Leaves store ((KeyT, KeyT), ValT) pairs, branches use (NodeRef, KeyT). // // Keys and values are stored in separate arrays to avoid padding caused by // different object alignments. This also helps improve locality of reference // when searching the keys. // // The nodes don't know how many elements they contain - that information is // stored elsewhere. Omitting the size field prevents padding and allows a node // to fill the allocated cache lines completely. // // These are typical key and value sizes, the node branching factor (N), and // wasted space when nodes are sized to fit in three cache lines (192 bytes): // // T1 T2 N Waste Used by // 4 4 24 0 Branch<4> (32-bit pointers) // 8 4 16 0 Leaf<4,4>, Branch<4> // 8 8 12 0 Leaf<4,8>, Branch<8> // 16 4 9 12 Leaf<8,4> // 16 8 8 0 Leaf<8,8> // //===----------------------------------------------------------------------===// template <typename T1, typename T2, unsigned N> class NodeBase { … }; /// IntervalMapImpl::adjustSiblingSizes - Move elements between sibling nodes. /// @param Node Array of pointers to sibling nodes. /// @param Nodes Number of nodes. /// @param CurSize Array of current node sizes, will be overwritten. /// @param NewSize Array of desired node sizes. template <typename NodeT> void adjustSiblingSizes(NodeT *Node[], unsigned Nodes, unsigned CurSize[], const unsigned NewSize[]) { … } /// IntervalMapImpl::distribute - Compute a new distribution of node elements /// after an overflow or underflow. Reserve space for a new element at Position, /// and compute the node that will hold Position after redistributing node /// elements. /// /// It is required that /// /// Elements == sum(CurSize), and /// Elements + Grow <= Nodes * Capacity. /// /// NewSize[] will be filled in such that: /// /// sum(NewSize) == Elements, and /// NewSize[i] <= Capacity. /// /// The returned index is the node where Position will go, so: /// /// sum(NewSize[0..idx-1]) <= Position /// sum(NewSize[0..idx]) >= Position /// /// The last equality, sum(NewSize[0..idx]) == Position, can only happen when /// Grow is set and NewSize[idx] == Capacity-1. The index points to the node /// before the one holding the Position'th element where there is room for an /// insertion. /// /// @param Nodes The number of nodes. /// @param Elements Total elements in all nodes. /// @param Capacity The capacity of each node. /// @param CurSize Array[Nodes] of current node sizes, or NULL. /// @param NewSize Array[Nodes] to receive the new node sizes. /// @param Position Insert position. /// @param Grow Reserve space for a new element at Position. /// @return (node, offset) for Position. IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity, const unsigned *CurSize, unsigned NewSize[], unsigned Position, bool Grow); //===----------------------------------------------------------------------===// //--- IntervalMapImpl::NodeSizer ---// //===----------------------------------------------------------------------===// // // Compute node sizes from key and value types. // // The branching factors are chosen to make nodes fit in three cache lines. // This may not be possible if keys or values are very large. Such large objects // are handled correctly, but a std::map would probably give better performance. // //===----------------------------------------------------------------------===// enum { … }; template <typename KeyT, typename ValT> struct NodeSizer { … }; //===----------------------------------------------------------------------===// //--- IntervalMapImpl::NodeRef ---// //===----------------------------------------------------------------------===// // // B+-tree nodes can be leaves or branches, so we need a polymorphic node // pointer that can point to both kinds. // // All nodes are cache line aligned and the low 6 bits of a node pointer are // always 0. These bits are used to store the number of elements in the // referenced node. Besides saving space, placing node sizes in the parents // allow tree balancing algorithms to run without faulting cache lines for nodes // that may not need to be modified. // // A NodeRef doesn't know whether it references a leaf node or a branch node. // It is the responsibility of the caller to use the correct types. // // Nodes are never supposed to be empty, and it is invalid to store a node size // of 0 in a NodeRef. The valid range of sizes is 1-64. // //===----------------------------------------------------------------------===// class NodeRef { … }; //===----------------------------------------------------------------------===// //--- IntervalMapImpl::LeafNode ---// //===----------------------------------------------------------------------===// // // Leaf nodes store up to N disjoint intervals with corresponding values. // // The intervals are kept sorted and fully coalesced so there are no adjacent // intervals mapping to the same value. // // These constraints are always satisfied: // // - Traits::stopLess(start(i), stop(i)) - Non-empty, sane intervals. // // - Traits::stopLess(stop(i), start(i + 1) - Sorted. // // - value(i) != value(i + 1) || !Traits::adjacent(stop(i), start(i + 1)) // - Fully coalesced. // //===----------------------------------------------------------------------===// template <typename KeyT, typename ValT, unsigned N, typename Traits> class LeafNode : public NodeBase<std::pair<KeyT, KeyT>, ValT, N> { … }; /// insertFrom - Add mapping of [a;b] to y if possible, coalescing as much as /// possible. This may cause the node to grow by 1, or it may cause the node /// to shrink because of coalescing. /// @param Pos Starting index = insertFrom(0, size, a) /// @param Size Number of elements in node. /// @param a Interval start. /// @param b Interval stop. /// @param y Value be mapped. /// @return (insert position, new size), or (i, Capacity+1) on overflow. template <typename KeyT, typename ValT, unsigned N, typename Traits> unsigned LeafNode<KeyT, ValT, N, Traits>:: insertFrom(unsigned &Pos, unsigned Size, KeyT a, KeyT b, ValT y) { … } //===----------------------------------------------------------------------===// //--- IntervalMapImpl::BranchNode ---// //===----------------------------------------------------------------------===// // // A branch node stores references to 1--N subtrees all of the same height. // // The key array in a branch node holds the rightmost stop key of each subtree. // It is redundant to store the last stop key since it can be found in the // parent node, but doing so makes tree balancing a lot simpler. // // It is unusual for a branch node to only have one subtree, but it can happen // in the root node if it is smaller than the normal nodes. // // When all of the leaf nodes from all the subtrees are concatenated, they must // satisfy the same constraints as a single leaf node. They must be sorted, // sane, and fully coalesced. // //===----------------------------------------------------------------------===// template <typename KeyT, typename ValT, unsigned N, typename Traits> class BranchNode : public NodeBase<NodeRef, KeyT, N> { … }; //===----------------------------------------------------------------------===// //--- IntervalMapImpl::Path ---// //===----------------------------------------------------------------------===// // // A Path is used by iterators to represent a position in a B+-tree, and the // path to get there from the root. // // The Path class also contains the tree navigation code that doesn't have to // be templatized. // //===----------------------------------------------------------------------===// class Path { … }; } // end namespace IntervalMapImpl //===----------------------------------------------------------------------===// //--- IntervalMap ----// //===----------------------------------------------------------------------===// template <typename KeyT, typename ValT, unsigned N = IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize, typename Traits = IntervalMapInfo<KeyT>> class IntervalMap { using Sizer = IntervalMapImpl::NodeSizer<KeyT, ValT>; using Leaf = IntervalMapImpl::LeafNode<KeyT, ValT, Sizer::LeafSize, Traits>; using Branch = IntervalMapImpl::BranchNode<KeyT, ValT, Sizer::BranchSize, Traits>; using RootLeaf = IntervalMapImpl::LeafNode<KeyT, ValT, N, Traits>; using IdxPair = IntervalMapImpl::IdxPair; // The RootLeaf capacity is given as a template parameter. We must compute the // corresponding RootBranch capacity. enum { DesiredRootBranchCap = (sizeof(RootLeaf) - sizeof(KeyT)) / (sizeof(KeyT) + sizeof(IntervalMapImpl::NodeRef)), RootBranchCap = DesiredRootBranchCap ? DesiredRootBranchCap : 1 }; using RootBranch = IntervalMapImpl::BranchNode<KeyT, ValT, RootBranchCap, Traits>; // When branched, we store a global start key as well as the branch node. struct RootBranchData { KeyT start; RootBranch node; }; public: using Allocator = typename Sizer::Allocator; using KeyType = KeyT; using ValueType = ValT; using KeyTraits = Traits; private: // The root data is either a RootLeaf or a RootBranchData instance. union { RootLeaf leaf; RootBranchData branchData; }; // Tree height. // 0: Leaves in root. // 1: Root points to leaf. // 2: root->branch->leaf ... unsigned height = 0; // Number of entries in the root node. unsigned rootSize = 0; // Allocator used for creating external nodes. Allocator *allocator = nullptr; const RootLeaf &rootLeaf() const { … } RootLeaf &rootLeaf() { … } const RootBranchData &rootBranchData() const { … } RootBranchData &rootBranchData() { … } const RootBranch &rootBranch() const { … } RootBranch &rootBranch() { … } KeyT rootBranchStart() const { … } KeyT &rootBranchStart() { … } template <typename NodeT> NodeT *newNode() { … } template <typename NodeT> void deleteNode(NodeT *P) { … } IdxPair branchRoot(unsigned Position); IdxPair splitRoot(unsigned Position); void switchRootToBranch() { … } void switchRootToLeaf() { … } bool branched() const { … } ValT treeSafeLookup(KeyT x, ValT NotFound) const; void visitNodes(void (IntervalMap::*f)(IntervalMapImpl::NodeRef, unsigned Level)); void deleteNode(IntervalMapImpl::NodeRef Node, unsigned Level); public: explicit IntervalMap(Allocator &a) : … { … } ///@{ /// NOTE: The moved-from or copied-from object's allocator needs to have a /// lifetime equal to or exceeding the moved-to or copied-to object to avoid /// undefined behaviour. IntervalMap(IntervalMap const &RHS) : … { … } IntervalMap &operator=(IntervalMap const &RHS) { … } IntervalMap(IntervalMap &&RHS) : … { … } IntervalMap &operator=(IntervalMap &&RHS) { … } ///@} ~IntervalMap() { … } /// empty - Return true when no intervals are mapped. bool empty() const { … } /// start - Return the smallest mapped key in a non-empty map. KeyT start() const { … } /// stop - Return the largest mapped key in a non-empty map. KeyT stop() const { … } /// lookup - Return the mapped value at x or NotFound. ValT lookup(KeyT x, ValT NotFound = ValT()) const { … } /// insert - Add a mapping of [a;b] to y, coalesce with adjacent intervals. /// It is assumed that no key in the interval is mapped to another value, but /// overlapping intervals already mapped to y will be coalesced. void insert(KeyT a, KeyT b, ValT y) { … } /// clear - Remove all entries. void clear(); class const_iterator; class iterator; friend class const_iterator; friend class iterator; const_iterator begin() const { … } iterator begin() { … } const_iterator end() const { … } iterator end() { … } /// find - Return an iterator pointing to the first interval ending at or /// after x, or end(). const_iterator find(KeyT x) const { … } iterator find(KeyT x) { … } /// overlaps(a, b) - Return true if the intervals in this map overlap with the /// interval [a;b]. bool overlaps(KeyT a, KeyT b) const { … } }; /// treeSafeLookup - Return the mapped value at x or NotFound, assuming a /// branched root. template <typename KeyT, typename ValT, unsigned N, typename Traits> ValT IntervalMap<KeyT, ValT, N, Traits>:: treeSafeLookup(KeyT x, ValT NotFound) const { … } // branchRoot - Switch from a leaf root to a branched root. // Return the new (root offset, node offset) corresponding to Position. template <typename KeyT, typename ValT, unsigned N, typename Traits> IntervalMapImpl::IdxPair IntervalMap<KeyT, ValT, N, Traits>:: branchRoot(unsigned Position) { … } // splitRoot - Split the current BranchRoot into multiple Branch nodes. // Return the new (root offset, node offset) corresponding to Position. template <typename KeyT, typename ValT, unsigned N, typename Traits> IntervalMapImpl::IdxPair IntervalMap<KeyT, ValT, N, Traits>:: splitRoot(unsigned Position) { … } /// visitNodes - Visit each external node. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: visitNodes(void (IntervalMap::*f)(IntervalMapImpl::NodeRef, unsigned Height)) { … } template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: deleteNode(IntervalMapImpl::NodeRef Node, unsigned Level) { … } template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: clear() { … } //===----------------------------------------------------------------------===// //--- IntervalMap::const_iterator ----// //===----------------------------------------------------------------------===// template <typename KeyT, typename ValT, unsigned N, typename Traits> class IntervalMap<KeyT, ValT, N, Traits>::const_iterator { … }; /// pathFillFind - Complete path by searching for x. /// @param x Key to search for. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: const_iterator::pathFillFind(KeyT x) { … } /// treeFind - Find in a branched tree. /// @param x Key to search for. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: const_iterator::treeFind(KeyT x) { … } /// treeAdvanceTo - Find position after the current one. /// @param x Key to search for. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: const_iterator::treeAdvanceTo(KeyT x) { … } //===----------------------------------------------------------------------===// //--- IntervalMap::iterator ----// //===----------------------------------------------------------------------===// template <typename KeyT, typename ValT, unsigned N, typename Traits> class IntervalMap<KeyT, ValT, N, Traits>::iterator : public const_iterator { … }; /// canCoalesceLeft - Can the current interval coalesce to the left after /// changing start or value? /// @param Start New start of current interval. /// @param Value New value for current interval. /// @return True when updating the current interval would enable coalescing. template <typename KeyT, typename ValT, unsigned N, typename Traits> bool IntervalMap<KeyT, ValT, N, Traits>:: iterator::canCoalesceLeft(KeyT Start, ValT Value) { … } /// canCoalesceRight - Can the current interval coalesce to the right after /// changing stop or value? /// @param Stop New stop of current interval. /// @param Value New value for current interval. /// @return True when updating the current interval would enable coalescing. template <typename KeyT, typename ValT, unsigned N, typename Traits> bool IntervalMap<KeyT, ValT, N, Traits>:: iterator::canCoalesceRight(KeyT Stop, ValT Value) { … } /// setNodeStop - Update the stop key of the current node at level and above. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::setNodeStop(unsigned Level, KeyT Stop) { … } template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::setStart(KeyT a) { … } template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::setStop(KeyT b) { … } template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::setValue(ValT x) { … } /// insertNode - insert a node before the current path at level. /// Leave the current path pointing at the new node. /// @param Level path index of the node to be inserted. /// @param Node The node to be inserted. /// @param Stop The last index in the new node. /// @return True if the tree height was increased. template <typename KeyT, typename ValT, unsigned N, typename Traits> bool IntervalMap<KeyT, ValT, N, Traits>:: iterator::insertNode(unsigned Level, IntervalMapImpl::NodeRef Node, KeyT Stop) { … } // insert template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::insert(KeyT a, KeyT b, ValT y) { … } template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::treeInsert(KeyT a, KeyT b, ValT y) { … } /// erase - erase the current interval and move to the next position. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::erase() { … } /// treeErase - erase() for a branched tree. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::treeErase(bool UpdateRoot) { … } /// eraseNode - Erase the current node at Level from its parent and move path to /// the first entry of the next sibling node. /// The node must be deallocated by the caller. /// @param Level 1..height, the root node cannot be erased. template <typename KeyT, typename ValT, unsigned N, typename Traits> void IntervalMap<KeyT, ValT, N, Traits>:: iterator::eraseNode(unsigned Level) { … } /// overflow - Distribute entries of the current node evenly among /// its siblings and ensure that the current node is not full. /// This may require allocating a new node. /// @tparam NodeT The type of node at Level (Leaf or Branch). /// @param Level path index of the overflowing node. /// @return True when the tree height was changed. template <typename KeyT, typename ValT, unsigned N, typename Traits> template <typename NodeT> bool IntervalMap<KeyT, ValT, N, Traits>:: iterator::overflow(unsigned Level) { … } //===----------------------------------------------------------------------===// //--- IntervalMapOverlaps ----// //===----------------------------------------------------------------------===// /// IntervalMapOverlaps - Iterate over the overlaps of mapped intervals in two /// IntervalMaps. The maps may be different, but the KeyT and Traits types /// should be the same. /// /// Typical uses: /// /// 1. Test for overlap: /// bool overlap = IntervalMapOverlaps(a, b).valid(); /// /// 2. Enumerate overlaps: /// for (IntervalMapOverlaps I(a, b); I.valid() ; ++I) { ... } /// template <typename MapA, typename MapB> class IntervalMapOverlaps { … }; } // end namespace llvm #endif // LLVM_ADT_INTERVALMAP_H