//===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// Symbol index queries consist of specific requirements for the requested /// symbol, such as high fuzzy matching score, scope, type etc. The lists of all /// symbols matching some criteria (e.g. belonging to "clang::clangd::" scope) /// are expressed in a form of Search Tokens which are stored in the inverted /// index. Inverted index maps these tokens to the posting lists - sorted (by /// symbol quality) sequences of symbol IDs matching the token, e.g. scope token /// "clangd::clangd::" is mapped to the list of IDs of all symbols which are /// declared in this namespace. Search queries are build from a set of /// requirements which can be combined with each other forming the query trees. /// The leafs of such trees are posting lists, and the nodes are operations on /// these posting lists, e.g. intersection or union. Efficient processing of /// these multi-level queries is handled by Iterators. Iterators advance through /// all leaf posting lists producing the result of search query, which preserves /// the sorted order of IDs. Having the resulting IDs sorted is important, /// because it allows receiving a certain number of the most valuable items /// (e.g. symbols with highest quality which was the sorting key in the first /// place) without processing all items with requested properties (this might /// not be computationally effective if search request is not very restrictive). // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <memory> #include <vector> namespace clang { namespace clangd { namespace dex { /// Symbol position in the list of all index symbols sorted by a pre-computed /// symbol quality. DocID; /// Iterator is the interface for Query Tree node. The simplest type of Iterator /// is DocumentIterator which is simply a wrapper around PostingList iterator /// and serves as the Query Tree leaf. More sophisticated examples of iterators /// can manage intersection, union of the elements produced by other iterators /// (their children) to form a multi-level Query Tree. The interface is designed /// to be extensible in order to support multiple types of iterators. class Iterator { … }; /// Advances the iterator until it is exhausted. Returns pairs of document IDs /// with the corresponding boosting score. /// /// Boosting can be seen as a compromise between retrieving too many items and /// calculating finals score for each of them (which might be very expensive) /// and not retrieving enough items so that items with very high final score /// would not be processed. Boosting score is a computationally efficient way /// to acquire preliminary scores of requested items. std::vector<std::pair<DocID, float>> consume(Iterator &It); namespace detail { // Variadic template machinery. inline void populateChildren(std::vector<std::unique_ptr<Iterator>> &) { … } template <typename... TailT> void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children, std::unique_ptr<Iterator> Head, TailT... Tail) { … } } // namespace detail // A corpus is a set of documents, and a factory for iterators over them. class Corpus { … }; } // namespace dex } // namespace clangd } // namespace clang #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H