YAMLParser.cpp | Explore in Territory

//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//  This file implements a YAML parser.
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/YAMLParser.h"
#include "llvm/ADT/AllocatorList.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/Unicode.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <map>
#include <memory>
#include <string>
#include <system_error>
#include <utility>

usingnamespacellvm;
usingnamespaceyaml;

enum UnicodeEncodingForm { … };

/// EncodingInfo - Holds the encoding type and length of the byte order mark if
///                it exists. Length is in {0, 2, 3, 4}.
EncodingInfo;

/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
///                      encoding form of \a Input.
///
/// @param Input A string of length 0 or more.
/// @returns An EncodingInfo indicating the Unicode encoding form of the input
///          and how long the byte order mark is if one exists.
static EncodingInfo getUnicodeEncoding(StringRef Input) { … }

/// Pin the vtables to this file.
void Node::anchor() { … }
void NullNode::anchor() { … }
void ScalarNode::anchor() { … }
void BlockScalarNode::anchor() { … }
void KeyValueNode::anchor() { … }
void MappingNode::anchor() { … }
void SequenceNode::anchor() { … }
void AliasNode::anchor() { … }

namespace llvm {
namespace yaml {

/// Token - A single YAML token.
struct Token { … };

} // end namespace yaml
} // end namespace llvm

TokenQueueT;

namespace {

/// This struct is used to track simple keys.
///
/// Simple keys are handled by creating an entry in SimpleKeys for each Token
/// which could legally be the start of a simple key. When peekNext is called,
/// if the Token To be returned is referenced by a SimpleKey, we continue
/// tokenizing until that potential simple key has either been found to not be
/// a simple key (we moved on to the next line or went further than 1024 chars).
/// Or when we run into a Value, and then insert a Key token (and possibly
/// others) before the SimpleKey's Tok.
struct SimpleKey { … };

} // end anonymous namespace

/// The Unicode scalar value of a UTF-8 minimal well-formed code unit
///        subsequence and the subsequence's length in code units (uint8_t).
///        A length of 0 represents an error.
UTF8Decoded;

static UTF8Decoded decodeUTF8(StringRef Range) { … }

namespace llvm {
namespace yaml {

/// Scans YAML tokens from a MemoryBuffer.
class Scanner { … };

} // end namespace yaml
} // end namespace llvm

/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
static void encodeUTF8( uint32_t UnicodeScalarValue
                      , SmallVectorImpl<char> &Result) { … }

bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { … }

bool yaml::scanTokens(StringRef Input) { … }

std::string yaml::escape(StringRef Input, bool EscapePrintable) { … }

std::optional<bool> yaml::parseBool(StringRef S) { … }

Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
                 std::error_code *EC)
    : … { … }

Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
                 std::error_code *EC)
    : … { … }

void Scanner::init(MemoryBufferRef Buffer) { … }

Token &Scanner::peekNext() { … }

Token Scanner::getNext() { … }

StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { … }

StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { … }

StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { … }

StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { … }

StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { … }

StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
                                       , StringRef::iterator Position) { … }

void Scanner::advanceWhile(SkipWhileFunc Func) { … }

static bool is_ns_hex_digit(const char C) { … }

static bool is_ns_word_char(const char C) { … }

void Scanner::scan_ns_uri_char() { … }

bool Scanner::consume(uint32_t Expected) { … }

void Scanner::skip(uint32_t Distance) { … }

bool Scanner::isBlankOrBreak(StringRef::iterator Position) { … }

bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) { … }

bool Scanner::isLineEmpty(StringRef Line) { … }

bool Scanner::consumeLineBreakIfPresent() { … }

void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
                                    , unsigned AtColumn
                                    , bool IsRequired) { … }

void Scanner::removeStaleSimpleKeyCandidates() { … }

void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { … }

bool Scanner::unrollIndent(int ToColumn) { … }

bool Scanner::rollIndent( int ToColumn
                        , Token::TokenKind Kind
                        , TokenQueueT::iterator InsertPoint) { … }

void Scanner::skipComment() { … }

void Scanner::scanToNextToken() { … }

bool Scanner::scanStreamStart() { … }

bool Scanner::scanStreamEnd() { … }

bool Scanner::scanDirective() { … }

bool Scanner::scanDocumentIndicator(bool IsStart) { … }

bool Scanner::scanFlowCollectionStart(bool IsSequence) { … }

bool Scanner::scanFlowCollectionEnd(bool IsSequence) { … }

bool Scanner::scanFlowEntry() { … }

bool Scanner::scanBlockEntry() { … }

bool Scanner::scanKey() { … }

bool Scanner::scanValue() { … }

// Forbidding inlining improves performance by roughly 20%.
// FIXME: Remove once llvm optimizes this to the faster version without hints.
LLVM_ATTRIBUTE_NOINLINE static bool
wasEscaped(StringRef::iterator First, StringRef::iterator Position);

// Returns whether a character at 'Position' was escaped with a leading '\'.
// 'First' specifies the position of the first character in the string.
static bool wasEscaped(StringRef::iterator First,
                       StringRef::iterator Position) { … }

bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { … }

bool Scanner::scanPlainScalar() { … }

bool Scanner::scanAliasOrAnchor(bool IsAlias) { … }

bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
                                        char &ChompingIndicator,
                                        unsigned &IndentIndicator,
                                        bool &IsDone) { … }

char Scanner::scanBlockStyleIndicator() { … }

char Scanner::scanBlockChompingIndicator() { … }

/// Get the number of line breaks after chomping.
///
/// Return the number of trailing line breaks to emit, depending on
/// \p ChompingIndicator.
static unsigned getChompedLineBreaks(char ChompingIndicator,
                                     unsigned LineBreaks, StringRef Str) { … }

unsigned Scanner::scanBlockIndentationIndicator() { … }

bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
                                    unsigned &IndentIndicator, bool &IsDone) { … }

bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
                                    unsigned BlockExitIndent,
                                    unsigned &LineBreaks, bool &IsDone) { … }

bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
                                    unsigned BlockExitIndent, bool &IsDone) { … }

bool Scanner::scanBlockScalar(bool IsLiteral) { … }

bool Scanner::scanTag() { … }

bool Scanner::fetchMoreTokens() { … }

Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
               std::error_code *EC)
    : … { … }

Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
               std::error_code *EC)
    : … { … }

Stream::~Stream() = default;

bool Stream::failed() { … }

void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) { … }

void Stream::printError(const SMRange &Range, const Twine &Msg,
                        SourceMgr::DiagKind Kind) { … }

document_iterator Stream::begin() { … }

document_iterator Stream::end() { … }

void Stream::skip() { … }

Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
           StringRef T)
    : … { … }

std::string Node::getVerbatimTag() const { … }

Token &Node::peekNext() { … }

Token Node::getNext() { … }

Node *Node::parseBlockNode() { … }

BumpPtrAllocator &Node::getAllocator() { … }

void Node::setError(const Twine &Msg, Token &Tok) const { … }

bool Node::failed() const { … }

StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { … }

/// parseScalarValue - A common parsing routine for all flow scalar styles.
/// It handles line break characters by itself, adds regular content characters
/// to the result, and forwards escaped sequences to the provided routine for
/// the style-specific processing.
///
/// \param UnquotedValue - An input value without quotation marks.
/// \param Storage - A storage for the result if the input value is multiline or
/// contains escaped characters.
/// \param LookupChars - A set of special characters to search in the input
/// string. Should include line break characters and the escape character
/// specific for the processing scalar style, if any.
/// \param UnescapeCallback - This is called when the escape character is found
/// in the input.
/// \returns - The unfolded and unescaped value.
static StringRef
parseScalarValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage,
                 StringRef LookupChars,
                 std::function<StringRef(StringRef, SmallVectorImpl<char> &)>
                     UnescapeCallback) { … }

StringRef
ScalarNode::getDoubleQuotedValue(StringRef RawValue,
                                 SmallVectorImpl<char> &Storage) const { … }

StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,
                                           SmallVectorImpl<char> &Storage) { … }

StringRef ScalarNode::getPlainValue(StringRef RawValue,
                                    SmallVectorImpl<char> &Storage) { … }

Node *KeyValueNode::getKey() { … }

Node *KeyValueNode::getValue() { … }

void MappingNode::increment() { … }

void SequenceNode::increment() { … }

Document::Document(Stream &S) : … { … }

bool Document::skip()  { … }

Token &Document::peekNext() { … }

Token Document::getNext() { … }

void Document::setError(const Twine &Message, Token &Location) const { … }

bool Document::failed() const { … }

Node *Document::parseBlockNode() { … }

bool Document::parseDirectives() { … }

void Document::parseYAMLDirective() { … }

void Document::parseTAGDirective() { … }

bool Document::expectToken(int TK) { … }
llvm/llvm/lib/Support/YAMLParser.cpp