earley-parser.h | Explore in Territory

// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_TORQUE_EARLEY_PARSER_H_
#define V8_TORQUE_EARLEY_PARSER_H_

#include <map>
#include <memory>
#include <optional>
#include <vector>

#include "src/base/contextual.h"
#include "src/torque/source-positions.h"
#include "src/torque/utils.h"

namespace v8::internal::torque {

class Symbol;
class Item;

class ParseResultHolderBase { … };

enum class ParseResultHolderBase::TypeId { … };

ParseResultTypeId;

template <class T>
class ParseResultHolder : public ParseResultHolderBase { … };

template <class T>
T& ParseResultHolderBase::Cast() { … }

template <class T>
const T& ParseResultHolderBase::Cast() const { … }

class ParseResult { … };

InputPosition;

struct MatchedInput { … };

class ParseResultIterator { … };

struct LexerResult { … };

Action;

inline std::optional<ParseResult> DefaultAction(
    ParseResultIterator* child_results) { … }

template <class T, Action action>
inline Action AsSingletonVector() { … }

// A rule of the context-free grammar. Each rule can have an action attached to
// it, which is executed after the parsing is finished.
class Rule final { … };

// A Symbol represents a terminal or a non-terminal of the grammar.
// It stores the list of rules, which have this symbol as the
// left-hand side.
// Terminals have an empty list of rules, they are created by the Lexer
// instead of from rules.
// Symbols need to reside at stable memory addresses, because the addresses are
// used in the parser.
class Symbol { … };

// Items are the core datastructure of Earley's algorithm.
// They consist of a (partially) matched rule, a marked position inside of the
// right-hand side of the rule (traditionally written as a dot) and an input
// range from {start} to {pos} that matches the symbols of the right-hand side
// that are left of the mark. In addition, they store a child and a left-sibling
// pointer to reconstruct the AST in the end.
class Item { … };

inline std::optional<ParseResult> Symbol::RunAction(const Item* item,
                                                    const LexerResult& tokens) { … }

V8_EXPORT_PRIVATE const Item* RunEarleyAlgorithm(
    Symbol* start, const LexerResult& tokens,
    std::unordered_set<Item, base::hash<Item>>* processed);

inline std::optional<ParseResult> ParseTokens(Symbol* start,
                                              const LexerResult& tokens) { … }

// The lexical syntax is dynamically defined while building the grammar by
// adding patterns and keywords to the Lexer.
// The term keyword here can stand for any fixed character sequence, including
// operators and parentheses.
// Each pattern or keyword automatically gets a terminal symbol associated with
// it. These symbols form the result of the lexing.
// Patterns and keywords are matched using the longest match principle. If the
// longest matching pattern coincides with a keyword, the keyword symbol is
// chosen instead of the pattern.
// In addition, there is a single whitespace pattern which is consumed but does
// not become part of the token list.
class Lexer { … };

// A grammar can have a result, which is the results of the start symbol.
// Grammar is intended to be subclassed, with Symbol members forming the
// mutually recursive rules of the grammar.
class Grammar { … };

}  // namespace v8::internal::torque

#endif  // V8_TORQUE_EARLEY_PARSER_H_
chromium/v8/src/torque/earley-parser.h