Lexer.cpp | Explore in Territory

//===- Lexer.cpp - C Language Family Lexer --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//  This file implements the Lexer and Token interfaces.
//
//===----------------------------------------------------------------------===//

#include "clang/Lex/Lexer.h"
#include "UnicodeCharSets.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/MultipleIncludeOpt.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/Token.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Unicode.h"
#include "llvm/Support/UnicodeCharRanges.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <optional>
#include <string>
#include <tuple>
#include <utility>

#ifdef __SSE4_2__
#include <nmmintrin.h>
#endif

usingnamespaceclang;

//===----------------------------------------------------------------------===//
// Token Class Implementation
//===----------------------------------------------------------------------===//

/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { … }

/// getObjCKeywordID - Return the ObjC keyword kind.
tok::ObjCKeywordKind Token::getObjCKeywordID() const { … }

/// Determine whether the token kind starts a simple-type-specifier.
bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { … }

//===----------------------------------------------------------------------===//
// Lexer Class Implementation
//===----------------------------------------------------------------------===//

void Lexer::anchor() { … }

void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
                      const char *BufEnd) { … }

/// Lexer constructor - Create a new lexer object for the specified buffer
/// with the specified preprocessor managing the lexing process.  This lexer
/// assumes that the associated file buffer and Preprocessor objects will
/// outlive it, so it doesn't take ownership of either of them.
Lexer::Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile,
             Preprocessor &PP, bool IsFirstIncludeOfFile)
    : … { … }

/// Lexer constructor - Create a new raw lexer object.  This object is only
/// suitable for calls to 'LexFromRawLexer'.  This lexer assumes that the text
/// range will outlive it, so it doesn't take ownership of it.
Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts,
             const char *BufStart, const char *BufPtr, const char *BufEnd,
             bool IsFirstIncludeOfFile)
    : … { … }

/// Lexer constructor - Create a new raw lexer object.  This object is only
/// suitable for calls to 'LexFromRawLexer'.  This lexer assumes that the text
/// range will outlive it, so it doesn't take ownership of it.
Lexer::Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,
             const SourceManager &SM, const LangOptions &langOpts,
             bool IsFirstIncludeOfFile)
    : … { … }

void Lexer::resetExtendedTokenMode() { … }

/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
/// _Pragma expansion.  This has a variety of magic semantics that this method
/// sets up.  It returns a new'd Lexer that must be delete'd when done.
///
/// On entrance to this routine, TokStartLoc is a macro location which has a
/// spelling loc that indicates the bytes to be lexed for the token and an
/// expansion location that indicates where all lexed tokens should be
/// "expanded from".
///
/// TODO: It would really be nice to make _Pragma just be a wrapper around a
/// normal lexer that remaps tokens as they fly by.  This would require making
/// Preprocessor::Lex virtual.  Given that, we could just dump in a magic lexer
/// interface that could handle this stuff.  This would pull GetMappedTokenLoc
/// out of the critical path of the lexer!
///
Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
                                 SourceLocation ExpansionLocStart,
                                 SourceLocation ExpansionLocEnd,
                                 unsigned TokLen, Preprocessor &PP) { … }

void Lexer::seek(unsigned Offset, bool IsAtStartOfLine) { … }

template <typename T> static void StringifyImpl(T &Str, char Quote) { … }

std::string Lexer::Stringify(StringRef Str, bool Charify) { … }

void Lexer::Stringify(SmallVectorImpl<char> &Str) { … }

//===----------------------------------------------------------------------===//
// Token Spelling
//===----------------------------------------------------------------------===//

/// Slow case of getSpelling. Extract the characters comprising the
/// spelling of this token from the provided input buffer.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
                              const LangOptions &LangOpts, char *Spelling) { … }

/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
/// token are the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding.  In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
StringRef Lexer::getSpelling(SourceLocation loc,
                             SmallVectorImpl<char> &buffer,
                             const SourceManager &SM,
                             const LangOptions &options,
                             bool *invalid) { … }

/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
/// token are the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding.  In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
                               const LangOptions &LangOpts, bool *Invalid) { … }

/// getSpelling - This method is used to get the spelling of a token into a
/// preallocated buffer, instead of as an std::string.  The caller is required
/// to allocate enough space for the token, which is guaranteed to be at least
/// Tok.getLength() bytes long.  The actual length of the token is returned.
///
/// Note that this method may do two possible things: it may either fill in
/// the buffer specified with characters, or it may *change the input pointer*
/// to point to a constant buffer with the data already in it (avoiding a
/// copy).  The caller is not allowed to modify the returned buffer pointer
/// if an internal buffer is returned.
unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
                            const SourceManager &SourceMgr,
                            const LangOptions &LangOpts, bool *Invalid) { … }

/// MeasureTokenLength - Relex the token at the specified location and return
/// its length in bytes in the input file.  If the token needs cleaning (e.g.
/// includes a trigraph or an escaped newline) then this count includes bytes
/// that are part of that.
unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
                                   const SourceManager &SM,
                                   const LangOptions &LangOpts) { … }

/// Relex the token at the specified location.
/// \returns true if there was a failure, false on success.
bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
                        const SourceManager &SM,
                        const LangOptions &LangOpts,
                        bool IgnoreWhiteSpace) { … }

/// Returns the pointer that points to the beginning of line that contains
/// the given offset, or null if the offset if invalid.
static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) { … }

static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
                                              const SourceManager &SM,
                                              const LangOptions &LangOpts) { … }

SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
                                          const SourceManager &SM,
                                          const LangOptions &LangOpts) { … }

namespace {

enum PreambleDirectiveKind { … };

} // namespace

PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
                                      const LangOptions &LangOpts,
                                      unsigned MaxLines) { … }

unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo,
                                     const SourceManager &SM,
                                     const LangOptions &LangOpts) { … }

/// Computes the source location just past the end of the
/// token at this source location.
///
/// This routine can be used to produce a source location that
/// points just past the end of the token referenced by \p Loc, and
/// is generally used when a diagnostic needs to point just after a
/// token where it expected something different that it received. If
/// the returned source location would not be meaningful (e.g., if
/// it points into a macro), this routine returns an invalid
/// source location.
///
/// \param Offset an offset from the end of the token, where the source
/// location should refer to. The default offset (0) produces a source
/// location pointing just past the end of the token; an offset of 1 produces
/// a source location pointing to the last character in the token, etc.
SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
                                          const SourceManager &SM,
                                          const LangOptions &LangOpts) { … }

/// Returns true if the given MacroID location points at the first
/// token of the macro expansion.
bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
                                      const SourceManager &SM,
                                      const LangOptions &LangOpts,
                                      SourceLocation *MacroBegin) { … }

/// Returns true if the given MacroID location points at the last
/// token of the macro expansion.
bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
                                    const SourceManager &SM,
                                    const LangOptions &LangOpts,
                                    SourceLocation *MacroEnd) { … }

static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
                                             const SourceManager &SM,
                                             const LangOptions &LangOpts) { … }

// Assumes that `Loc` is in an expansion.
static bool isInExpansionTokenRange(const SourceLocation Loc,
                                    const SourceManager &SM) { … }

CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
                                         const SourceManager &SM,
                                         const LangOptions &LangOpts) { … }

StringRef Lexer::getSourceText(CharSourceRange Range,
                               const SourceManager &SM,
                               const LangOptions &LangOpts,
                               bool *Invalid) { … }

StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
                                       const SourceManager &SM,
                                       const LangOptions &LangOpts) { … }

StringRef Lexer::getImmediateMacroNameForDiagnostics(
    SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { … }

bool Lexer::isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts) { … }

bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { … }

StringRef Lexer::getIndentationForLine(SourceLocation Loc,
                                       const SourceManager &SM) { … }

//===----------------------------------------------------------------------===//
// Diagnostics forwarding code.
//===----------------------------------------------------------------------===//

/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
/// lexer buffer was all expanded at a single point, perform the mapping.
/// This is currently only used for _Pragma implementation, so it is the slow
/// path of the hot getSourceLocation method.  Do not allow it to be inlined.
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(
    Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen);
static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
                                        SourceLocation FileLoc,
                                        unsigned CharNo, unsigned TokLen) { … }

/// getSourceLocation - Return a source location identifier for the specified
/// offset in the current file.
SourceLocation Lexer::getSourceLocation(const char *Loc,
                                        unsigned TokLen) const { … }

/// Diag - Forwarding function for diagnostics.  This translate a source
/// position in the current buffer into a SourceLocation object for rendering.
DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const { … }

//===----------------------------------------------------------------------===//
// Trigraph and Escaped Newline Handling Code.
//===----------------------------------------------------------------------===//

/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
static char GetTrigraphCharForLetter(char Letter) { … }

/// DecodeTrigraphChar - If the specified character is a legal trigraph when
/// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled,
/// return the result character.  Finally, emit a warning about trigraph use
/// whether trigraphs are enabled or not.
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs) { … }

/// getEscapedNewLineSize - Return the size of the specified escaped newline,
/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a
/// trigraph equivalent on entry to this function.
unsigned Lexer::getEscapedNewLineSize(const char *Ptr) { … }

/// SkipEscapedNewLines - If P points to an escaped newline (or a series of
/// them), skip over them and return the first non-escaped-newline found,
/// otherwise return P.
const char *Lexer::SkipEscapedNewLines(const char *P) { … }

std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
                                          const SourceManager &SM,
                                          const LangOptions &LangOpts) { … }

/// Checks that the given token is the first token that occurs after the
/// given location (this excludes comments and whitespace). Returns the location
/// immediately after the specified token. If the token is not found or the
/// location is inside a macro, the returned source location will be invalid.
SourceLocation Lexer::findLocationAfterToken(
    SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM,
    const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) { … }

/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
/// get its size, and return it.  This is tricky in several cases:
///   1. If currently at the start of a trigraph, we warn about the trigraph,
///      then either return the trigraph (skipping 3 chars) or the '?',
///      depending on whether trigraphs are enabled or not.
///   2. If this is an escaped newline (potentially with whitespace between
///      the backslash and newline), implicitly skip the newline and return
///      the char after it.
///
/// This handles the slow/uncommon case of the getCharAndSize method.  Here we
/// know that we can accumulate into Size, and that we have already incremented
/// Ptr by Size bytes.
///
/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
/// be updated to match.
Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) { … }

/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
/// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size,
/// and that we have already incremented Ptr by Size bytes.
///
/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
/// be updated to match.
Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
                                                 const LangOptions &LangOpts) { … }

//===----------------------------------------------------------------------===//
// Helper methods for lexing.
//===----------------------------------------------------------------------===//

/// Routine that indiscriminately sets the offset into the source file.
void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { … }

static bool isUnicodeWhitespace(uint32_t Codepoint) { … }

static llvm::SmallString<5> codepointAsHexString(uint32_t C) { … }

// To mitigate https://github.com/llvm/llvm-project/issues/54732,
// we allow "Mathematical Notation Characters" in identifiers.
// This is a proposed profile that extends the XID_Start/XID_continue
// with mathematical symbols, superscipts and subscripts digits
// found in some production software.
// https://www.unicode.org/L2/L2022/22230-math-profile.pdf
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts,
                                      bool IsStart, bool &IsExtension) { … }

static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts,
                            bool &IsExtension) { … }

static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts,
                                     bool &IsExtension) { … }

static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C,
                                          CharSourceRange Range) { … }

static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin,
                                            const char *End) { … }

static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
                                      CharSourceRange Range, bool IsFirst) { … }

/// After encountering UTF-8 character C and interpreting it as an identifier
/// character, check whether it's a homoglyph for a common non-identifier
/// source character that is unlikely to be an intentional identifier
/// character and warn if so.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
                                       CharSourceRange Range) { … }

static void diagnoseInvalidUnicodeCodepointInIdentifier(
    DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint,
    CharSourceRange Range, bool IsFirst) { … }

bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
                                    Token &Result) { … }

bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr, Token &Result) { … }

bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,
                                      const char *CurPtr) { … }

static const char *
fastParseASCIIIdentifier(const char *CurPtr,
                         [[maybe_unused]] const char *BufferEnd) { … }

bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { … }

/// isHexaLiteral - Return true if Start points to a hex constant.
/// in microsoft mode (where this is supposed to be several different tokens).
bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) { … }

/// LexNumericConstant - Lex the remainder of a integer or floating point
/// constant. From[-1] is the first character lexed.  Return the end of the
/// constant.
bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { … }

/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
/// in C++11, or warn on a ud-suffix in C++98.
const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
                               bool IsStringLiteral) { … }

/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
/// either " or L" or u8" or u" or U".
bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
                             tok::TokenKind Kind) { … }

/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
/// having lexed R", LR", u8R", uR", or UR".
bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
                                tok::TokenKind Kind) { … }

/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
/// after having lexed the '<' character.  This is used for #include filenames.
bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { … }

void Lexer::codeCompleteIncludedFile(const char *PathStart,
                                     const char *CompletionPoint,
                                     bool IsAngled) { … }

/// LexCharConstant - Lex the remainder of a character constant, after having
/// lexed either ' or L' or u8' or u' or U'.
bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
                            tok::TokenKind Kind) { … }

/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
/// Update BufferPtr to point to the next non-whitespace character and return.
///
/// This method forms a token and returns true if KeepWhitespaceMode is enabled.
bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
                           bool &TokAtPhysicalStartOfLine) { … }

/// We have just read the // characters from input.  Skip until we find the
/// newline character that terminates the comment.  Then update BufferPtr and
/// return.
///
/// If we're in KeepCommentMode or any CommentHandler has inserted
/// some tokens, this will store the first token and return true.
bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
                            bool &TokAtPhysicalStartOfLine) { … }

/// If in save-comment mode, package up this Line comment in an appropriate
/// way and return it.
bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) { … }

/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
/// character (either \\n or \\r) is part of an escaped newline sequence.  Issue
/// a diagnostic if so.  We know that the newline is inside of a block comment.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L,
                                                  bool Trigraphs) { … }

#ifdef __SSE2__
#include <emmintrin.h>
#elif __ALTIVEC__
#include <altivec.h>
#undef bool
#endif

/// We have just read from input the / and * characters that started a comment.
/// Read until we find the * and / characters that terminate the comment.
/// Note that we don't bother decoding trigraphs or escaped newlines in block
/// comments, because they cannot cause the comment to end.  The only thing
/// that can happen is the comment could end with an escaped newline between
/// the terminating * and /.
///
/// If we're in KeepCommentMode or any CommentHandler has inserted
/// some tokens, this will store the first token and return true.
bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
                             bool &TokAtPhysicalStartOfLine) { … }

//===----------------------------------------------------------------------===//
// Primary Lexing Entry Points
//===----------------------------------------------------------------------===//

/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
/// uninterpreted string.  This switches the lexer out of directive mode.
void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { … }

/// LexEndOfFile - CurPtr points to the end of this file.  Handle this
/// condition, reporting diagnostics and handling other edge cases as required.
/// This returns true if Result contains a token, false if PP.Lex should be
/// called again.
bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { … }

/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
/// the specified lexer will return a tok::l_paren token, 0 if it is something
/// else and 2 if there are no more tokens in the buffer controlled by the
/// lexer.
unsigned Lexer::isNextPPTokenLParen() { … }

/// Find the end of a version control conflict marker.
static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
                                   ConflictMarkerKind CMK) { … }

/// IsStartOfConflictMarker - If the specified pointer is the start of a version
/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
/// and recover nicely.  This returns true if it is a conflict marker and false
/// if not.
bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { … }

/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if
/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it
/// is the end of a conflict marker.  Handle it by ignoring up until the end of
/// the line.  This returns true if it is a conflict marker and false if not.
bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { … }

static const char *findPlaceholderEnd(const char *CurPtr,
                                      const char *BufferEnd) { … }

bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) { … }

bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { … }

std::optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,
                                                 const char *SlashLoc,
                                                 Token *Result) { … }

std::optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,
                                               const char *SlashLoc,
                                               Token *Result) { … }

uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
                           Token *Result) { … }

bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C,
                                   const char *CurPtr) { … }

void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { … }

bool Lexer::Lex(Token &Result) { … }

/// LexTokenInternal - This implements a simple C family lexer.  It is an
/// extremely performance critical piece of code.  This assumes that the buffer
/// has a null character at the end of the file.  This returns a preprocessing
/// token, not a normal token, as such, it is an internal interface.  It assumes
/// that the Flags of result have been cleared before calling this.
bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { … }

const char *Lexer::convertDependencyDirectiveToken(
    const dependency_directives_scan::Token &DDTok, Token &Result) { … }

bool Lexer::LexDependencyDirectiveToken(Token &Result) { … }

bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { … }
llvm/clang/lib/Lex/Lexer.cpp