llvm/clang-tools-extra/clangd/index/dex/Iterator.h

//===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Symbol index queries consist of specific requirements for the requested
/// symbol, such as high fuzzy matching score, scope, type etc. The lists of all
/// symbols matching some criteria (e.g. belonging to "clang::clangd::" scope)
/// are expressed in a form of Search Tokens which are stored in the inverted
/// index. Inverted index maps these tokens to the posting lists - sorted (by
/// symbol quality) sequences of symbol IDs matching the token, e.g. scope token
/// "clangd::clangd::" is mapped to the list of IDs of all symbols which are
/// declared in this namespace. Search queries are build from a set of
/// requirements which can be combined with each other forming the query trees.
/// The leafs of such trees are posting lists, and the nodes are operations on
/// these posting lists, e.g. intersection or union. Efficient processing of
/// these multi-level queries is handled by Iterators. Iterators advance through
/// all leaf posting lists producing the result of search query, which preserves
/// the sorted order of IDs. Having the resulting IDs sorted is important,
/// because it allows receiving a certain number of the most valuable items
/// (e.g. symbols with highest quality which was the sorting key in the first
/// place) without processing all items with requested properties (this might
/// not be computationally effective if search request is not very restrictive).
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H

#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
#include <vector>

namespace clang {
namespace clangd {
namespace dex {

/// Symbol position in the list of all index symbols sorted by a pre-computed
/// symbol quality.
DocID;

/// Iterator is the interface for Query Tree node. The simplest type of Iterator
/// is DocumentIterator which is simply a wrapper around PostingList iterator
/// and serves as the Query Tree leaf. More sophisticated examples of iterators
/// can manage intersection, union of the elements produced by other iterators
/// (their children) to form a multi-level Query Tree. The interface is designed
/// to be extensible in order to support multiple types of iterators.
class Iterator {};

/// Advances the iterator until it is exhausted. Returns pairs of document IDs
/// with the corresponding boosting score.
///
/// Boosting can be seen as a compromise between retrieving too many items and
/// calculating finals score for each of them (which might be very expensive)
/// and not retrieving enough items so that items with very high final score
/// would not be processed. Boosting score is a computationally efficient way
/// to acquire preliminary scores of requested items.
std::vector<std::pair<DocID, float>> consume(Iterator &It);

namespace detail {
// Variadic template machinery.
inline void populateChildren(std::vector<std::unique_ptr<Iterator>> &) {}
template <typename... TailT>
void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
                      std::unique_ptr<Iterator> Head, TailT... Tail) {}
} // namespace detail

// A corpus is a set of documents, and a factory for iterators over them.
class Corpus {};

} // namespace dex
} // namespace clangd
} // namespace clang

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H