//===--- FuzzyMatch.h - Approximate identifier matching ---------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements fuzzy-matching of strings against identifiers. // It indicates both the existence and quality of a match: // 'eb' matches both 'emplace_back' and 'embed', the former has a better score. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include <optional> namespace clang { namespace clangd { // Utilities for word segmentation. // FuzzyMatcher already incorporates this logic, so most users don't need this. // // A name like "fooBar_baz" consists of several parts foo, bar, baz. // Aligning segmentation of word and pattern improves the fuzzy-match. // For example: [lol] matches "LaughingOutLoud" better than "LionPopulation" // // First we classify each character into types (uppercase, lowercase, etc). // Then we look at the sequence: e.g. [upper, lower] is the start of a segment. // We distinguish the types of characters that affect segmentation. // It's not obvious how to segment digits, we treat them as lowercase letters. // As we don't decode UTF-8, we treat bytes over 127 as lowercase too. // This means we require exact (case-sensitive) match for those characters. enum CharType : unsigned char { … }; // A CharTypeSet is a bitfield representing all the character types in a word. // Its bits are 1<<Empty, 1<<Lower, etc. CharTypeSet; // Each character's Role is the Head or Tail of a segment, or a Separator. // e.g. XMLHttpRequest_Async // +--+---+------ +---- // ^Head ^Tail ^Separator enum CharRole : unsigned char { … }; // Compute segmentation of Text. // Character roles are stored in Roles (Roles.size() must equal Text.size()). // The set of character types encountered is returned, this may inform // heuristics for dealing with poorly-segmented identifiers like "strndup". CharTypeSet calculateRoles(llvm::StringRef Text, llvm::MutableArrayRef<CharRole> Roles); // A matcher capable of matching and scoring strings against a single pattern. // It's optimized for matching against many strings - match() does not allocate. class FuzzyMatcher { … }; } // namespace clangd } // namespace clang #endif