//===--- LRGraph.h - Build an LR automaton ------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // LR parsers are bottom-up parsers -- they scan the input from left to right, // and collect the right-hand side of a production rule (called handle) on top // of the stack, then replace (reduce) the handle with the nonterminal defined // by the production rule. // // This file defines LRGraph, a deterministic handle-finding finite-state // automaton, which is a key component in LR parsers to recognize any of // handles in the grammar efficiently. We build the LR table (ACTION and GOTO // Table) based on the LRGraph. // // LRGraph can be constructed for any context-free grammars. // Even for a LR-ambiguous grammar, we can construct a deterministic FSA, but // interpretation of the FSA is nondeterministic -- we might in a state where // we can continue searching an handle and identify a handle (called // shift/reduce conflicts), or identify more than one handle (callled // reduce/reduce conflicts). // // LRGraph is a common model for all variants of LR automatons, from the most // basic one LR(0), the powerful SLR(1), LR(1) which uses a one-token lookahead // in making decisions. //===----------------------------------------------------------------------===// #ifndef CLANG_PSEUDO_GRAMMAR_LRGRAPH_H #define CLANG_PSEUDO_GRAMMAR_LRGRAPH_H #include "clang-pseudo/grammar/Grammar.h" #include "llvm/ADT/Hashing.h" #include <vector> namespace clang { namespace pseudo { // An LR item -- a grammar rule with a dot at some position of the body. // e.g. a production rule A := X Y yields 3 items: // A := . X Y // A := X . Y // A := X Y . // An item indicates how much of a production rule has been recognized at a // position (described by dot), for example, A := X . Y indicates that we have // recognized the X part from the input, and we hope next to see the input // derivable from Y. class Item { … }; // A state represents a node in the LR automaton graph. It is an item set, which // contains all possible rules that the LR parser may be parsing in that state. // // Conceptually, If we knew in advance what we're parsing, at any point we're // partway through parsing a production, sitting on a stack of partially parsed // productions. But because we don't know, there could be *several* productions // we're partway through. The set of possibilities is the parser state, and we // precompute all the transitions between these states. struct State { … }; // LRGraph is a deterministic finite state automaton for LR parsing. // // Intuitively, an LR automaton is a transition graph. The graph has a // collection of nodes, called States. Each state corresponds to a particular // item set, which represents a condition that could occur during the process of // parsing a production. Edges are directed from one state to another. Each edge // is labeled by a grammar symbol (terminal or nonterminal). // // LRGraph is used to construct the LR parsing table which is a core // data-structure driving the LR parser. class LRGraph { … }; } // namespace pseudo } // namespace clang namespace llvm { // Support clang::pseudo::Item as DenseMap keys. template <> struct DenseMapInfo<clang::pseudo::Item> { … }; } // namespace llvm #endif // CLANG_PSEUDO_GRAMMAR_LRGRAPH_H