// Copyright 2018 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_TORQUE_EARLEY_PARSER_H_ #define V8_TORQUE_EARLEY_PARSER_H_ #include <map> #include <memory> #include <optional> #include <vector> #include "src/base/contextual.h" #include "src/torque/source-positions.h" #include "src/torque/utils.h" namespace v8::internal::torque { class Symbol; class Item; class ParseResultHolderBase { … }; enum class ParseResultHolderBase::TypeId { … }; ParseResultTypeId; template <class T> class ParseResultHolder : public ParseResultHolderBase { … }; template <class T> T& ParseResultHolderBase::Cast() { … } template <class T> const T& ParseResultHolderBase::Cast() const { … } class ParseResult { … }; InputPosition; struct MatchedInput { … }; class ParseResultIterator { … }; struct LexerResult { … }; Action; inline std::optional<ParseResult> DefaultAction( ParseResultIterator* child_results) { … } template <class T, Action action> inline Action AsSingletonVector() { … } // A rule of the context-free grammar. Each rule can have an action attached to // it, which is executed after the parsing is finished. class Rule final { … }; // A Symbol represents a terminal or a non-terminal of the grammar. // It stores the list of rules, which have this symbol as the // left-hand side. // Terminals have an empty list of rules, they are created by the Lexer // instead of from rules. // Symbols need to reside at stable memory addresses, because the addresses are // used in the parser. class Symbol { … }; // Items are the core datastructure of Earley's algorithm. // They consist of a (partially) matched rule, a marked position inside of the // right-hand side of the rule (traditionally written as a dot) and an input // range from {start} to {pos} that matches the symbols of the right-hand side // that are left of the mark. In addition, they store a child and a left-sibling // pointer to reconstruct the AST in the end. class Item { … }; inline std::optional<ParseResult> Symbol::RunAction(const Item* item, const LexerResult& tokens) { … } V8_EXPORT_PRIVATE const Item* RunEarleyAlgorithm( Symbol* start, const LexerResult& tokens, std::unordered_set<Item, base::hash<Item>>* processed); inline std::optional<ParseResult> ParseTokens(Symbol* start, const LexerResult& tokens) { … } // The lexical syntax is dynamically defined while building the grammar by // adding patterns and keywords to the Lexer. // The term keyword here can stand for any fixed character sequence, including // operators and parentheses. // Each pattern or keyword automatically gets a terminal symbol associated with // it. These symbols form the result of the lexing. // Patterns and keywords are matched using the longest match principle. If the // longest matching pattern coincides with a keyword, the keyword symbol is // chosen instead of the pattern. // In addition, there is a single whitespace pattern which is consumed but does // not become part of the token list. class Lexer { … }; // A grammar can have a result, which is the results of the start symbol. // Grammar is intended to be subclassed, with Symbol members forming the // mutually recursive rules of the grammar. class Grammar { … }; } // namespace v8::internal::torque #endif // V8_TORQUE_EARLEY_PARSER_H_