//===- RewriteRope.cpp - Rope specialized for rewriter --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the RewriteRope class, which is a powerful string. // //===----------------------------------------------------------------------===// #include "llvm/ADT/RewriteRope.h" #include "llvm/Support/Casting.h" #include <algorithm> #include <cassert> #include <cstring> usingnamespacellvm; /// RewriteRope is a "strong" string class, designed to make insertions and /// deletions in the middle of the string nearly constant time (really, they are /// O(log N), but with a very low constant factor). /// /// The implementation of this datastructure is a conceptual linear sequence of /// RopePiece elements. Each RopePiece represents a view on a separately /// allocated and reference counted string. This means that splitting a very /// long string can be done in constant time by splitting a RopePiece that /// references the whole string into two rope pieces that reference each half. /// Once split, another string can be inserted in between the two halves by /// inserting a RopePiece in between the two others. All of this is very /// inexpensive: it takes time proportional to the number of RopePieces, not the /// length of the strings they represent. /// /// While a linear sequences of RopePieces is the conceptual model, the actual /// implementation captures them in an adapted B+ Tree. Using a B+ tree (which /// is a tree that keeps the values in the leaves and has where each node /// contains a reasonable number of pointers to children/values) allows us to /// maintain efficient operation when the RewriteRope contains a *huge* number /// of RopePieces. The basic idea of the B+ Tree is that it allows us to find /// the RopePiece corresponding to some offset very efficiently, and it /// automatically balances itself on insertions of RopePieces (which can happen /// for both insertions and erases of string ranges). /// /// The one wrinkle on the theory is that we don't attempt to keep the tree /// properly balanced when erases happen. Erases of string data can both insert /// new RopePieces (e.g. when the middle of some other rope piece is deleted, /// which results in two rope pieces, which is just like an insert) or it can /// reduce the number of RopePieces maintained by the B+Tree. In the case when /// the number of RopePieces is reduced, we don't attempt to maintain the /// standard 'invariant' that each node in the tree contains at least /// 'WidthFactor' children/values. For our use cases, this doesn't seem to /// matter. /// /// The implementation below is primarily implemented in terms of three classes: /// RopePieceBTreeNode - Common base class for: /// /// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece /// nodes. This directly represents a chunk of the string with those /// RopePieces concatenated. /// RopePieceBTreeInterior - An interior node in the B+ Tree, which manages /// up to '2*WidthFactor' other nodes in the tree. namespace { //===----------------------------------------------------------------------===// // RopePieceBTreeNode Class //===----------------------------------------------------------------------===// /// RopePieceBTreeNode - Common base class of RopePieceBTreeLeaf and /// RopePieceBTreeInterior. This provides some 'virtual' dispatching methods /// and a flag that determines which subclass the instance is. Also /// important, this node knows the full extend of the node, including any /// children that it has. This allows efficient skipping over entire subtrees /// when looking for an offset in the BTree. class RopePieceBTreeNode { … }; //===----------------------------------------------------------------------===// // RopePieceBTreeLeaf Class //===----------------------------------------------------------------------===// /// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece /// nodes. This directly represents a chunk of the string with those /// RopePieces concatenated. Since this is a B+Tree, all values (in this case /// instances of RopePiece) are stored in leaves like this. To make iteration /// over the leaves efficient, they maintain a singly linked list through the /// NextLeaf field. This allows the B+Tree forward iterator to be constant /// time for all increments. class RopePieceBTreeLeaf : public RopePieceBTreeNode { … }; } // namespace /// split - Split the range containing the specified offset so that we are /// guaranteed that there is a place to do an insertion at the specified /// offset. The offset is relative, so "0" is the start of the node. /// /// If there is no space in this subtree for the extra piece, the extra tree /// node is returned and must be inserted into a parent. RopePieceBTreeNode *RopePieceBTreeLeaf::split(unsigned Offset) { … } /// insert - Insert the specified RopePiece into this tree node at the /// specified offset. The offset is relative, so "0" is the start of the node. /// /// If there is no space in this subtree for the extra piece, the extra tree /// node is returned and must be inserted into a parent. RopePieceBTreeNode *RopePieceBTreeLeaf::insert(unsigned Offset, const RopePiece &R) { … } /// erase - Remove NumBytes from this node at the specified offset. We are /// guaranteed that there is a split at Offset. void RopePieceBTreeLeaf::erase(unsigned Offset, unsigned NumBytes) { … } //===----------------------------------------------------------------------===// // RopePieceBTreeInterior Class //===----------------------------------------------------------------------===// namespace { /// RopePieceBTreeInterior - This represents an interior node in the B+Tree, /// which holds up to 2*WidthFactor pointers to child nodes. class RopePieceBTreeInterior : public RopePieceBTreeNode { … }; } // namespace /// split - Split the range containing the specified offset so that we are /// guaranteed that there is a place to do an insertion at the specified /// offset. The offset is relative, so "0" is the start of the node. /// /// If there is no space in this subtree for the extra piece, the extra tree /// node is returned and must be inserted into a parent. RopePieceBTreeNode *RopePieceBTreeInterior::split(unsigned Offset) { … } /// insert - Insert the specified ropepiece into this tree node at the /// specified offset. The offset is relative, so "0" is the start of the /// node. /// /// If there is no space in this subtree for the extra piece, the extra tree /// node is returned and must be inserted into a parent. RopePieceBTreeNode *RopePieceBTreeInterior::insert(unsigned Offset, const RopePiece &R) { … } /// HandleChildPiece - A child propagated an insertion result up to us. /// Insert the new child, and/or propagate the result further up the tree. RopePieceBTreeNode * RopePieceBTreeInterior::HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS) { … } /// erase - Remove NumBytes from this node at the specified offset. We are /// guaranteed that there is a split at Offset. void RopePieceBTreeInterior::erase(unsigned Offset, unsigned NumBytes) { … } //===----------------------------------------------------------------------===// // RopePieceBTreeNode Implementation //===----------------------------------------------------------------------===// void RopePieceBTreeNode::Destroy() { … } /// split - Split the range containing the specified offset so that we are /// guaranteed that there is a place to do an insertion at the specified /// offset. The offset is relative, so "0" is the start of the node. /// /// If there is no space in this subtree for the extra piece, the extra tree /// node is returned and must be inserted into a parent. RopePieceBTreeNode *RopePieceBTreeNode::split(unsigned Offset) { … } /// insert - Insert the specified ropepiece into this tree node at the /// specified offset. The offset is relative, so "0" is the start of the /// node. /// /// If there is no space in this subtree for the extra piece, the extra tree /// node is returned and must be inserted into a parent. RopePieceBTreeNode *RopePieceBTreeNode::insert(unsigned Offset, const RopePiece &R) { … } /// erase - Remove NumBytes from this node at the specified offset. We are /// guaranteed that there is a split at Offset. void RopePieceBTreeNode::erase(unsigned Offset, unsigned NumBytes) { … } //===----------------------------------------------------------------------===// // RopePieceBTreeIterator Implementation //===----------------------------------------------------------------------===// static const RopePieceBTreeLeaf *getCN(const void *P) { … } // begin iterator. RopePieceBTreeIterator::RopePieceBTreeIterator(const void *n) { … } void RopePieceBTreeIterator::MoveToNextPiece() { … } //===----------------------------------------------------------------------===// // RopePieceBTree Implementation //===----------------------------------------------------------------------===// static RopePieceBTreeNode *getRoot(void *P) { … } RopePieceBTree::RopePieceBTree() { … } RopePieceBTree::RopePieceBTree(const RopePieceBTree &RHS) { … } RopePieceBTree::~RopePieceBTree() { … } unsigned RopePieceBTree::size() const { … } void RopePieceBTree::clear() { … } void RopePieceBTree::insert(unsigned Offset, const RopePiece &R) { … } void RopePieceBTree::erase(unsigned Offset, unsigned NumBytes) { … } //===----------------------------------------------------------------------===// // RewriteRope Implementation //===----------------------------------------------------------------------===// /// MakeRopeString - This copies the specified byte range into some instance of /// RopeRefCountString, and return a RopePiece that represents it. This uses /// the AllocBuffer object to aggregate requests for small strings into one /// allocation instead of doing tons of tiny allocations. RopePiece RewriteRope::MakeRopeString(const char *Start, const char *End) { … }