//===--- GLR.cpp -----------------------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang-pseudo/GLR.h" #include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include <algorithm> #include <memory> #include <optional> #include <queue> #define DEBUG_TYPE … namespace clang { namespace pseudo { namespace { Token::Index findRecoveryEndpoint(ExtensionID Strategy, Token::Index Begin, const TokenStream &Tokens, const Language &Lang) { … } } // namespace void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads, unsigned &TokenIndex, const ParseParams &Params, const Language &Lang, std::vector<const GSS::Node *> &NewHeads) { … } StateID; llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) { … } // Apply all pending shift actions. // In theory, LR parsing doesn't have shift/shift conflicts on a single head. // But we may have multiple active heads, and each head has a shift action. // // We merge the stack -- if multiple heads will reach the same state after // shifting a token, we shift only once by combining these heads. // // E.g. we have two heads (2, 3) in the GSS, and will shift both to reach 4: // 0---1---2 // └---3 // After the shift action, the GSS is: // 0---1---2---4 // └---3---┘ void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads, const ForestNode &NewTok, const ParseParams &Params, const Language &Lang, std::vector<const GSS::Node *> &NewHeads) { … } namespace { // A KeyedQueue yields pairs of keys and values in order of the keys. KeyedQueue; template <typename T> void sortAndUnique(std::vector<T> &Vec) { … } // Perform reduces until no more are possible. // // Generally this means walking up from the heads gathering ForestNodes that // will match the RHS of the rule we're reducing into a sequence ForestNode, // and ending up at a base node. // Then we push a new GSS node onto that base, taking care to: // - pack alternative sequence ForestNodes into an ambiguous ForestNode. // - use the same GSS node for multiple heads if the parse state matches. // // Examples of reduction: // Before (simple): // 0--1(expr)--2(semi) // After reducing 2 by `stmt := expr semi`: // 0--3(stmt) // 3 is goto(0, stmt) // // Before (splitting due to R/R conflict): // 0--1(IDENTIFIER) // After reducing 1 by `class-name := IDENTIFIER` & `enum-name := IDENTIFIER`: // 0--2(class-name) // 2 is goto(0, class-name) // └--3(enum-name) // 3 is goto(0, enum-name) // // Before (splitting due to multiple bases): // 0--2(class-name)--4(STAR) // └--3(enum-name)---┘ // After reducing 4 by `ptr-operator := STAR`: // 0--2(class-name)--5(ptr-operator) // 5 is goto(2, ptr-operator) // └--3(enum-name)---6(ptr-operator) // 6 is goto(3, ptr-operator) // // Before (joining due to same goto state, multiple bases): // 0--1(cv-qualifier)--3(class-name) // └--2(cv-qualifier)--4(enum-name) // After reducing 3 by `type-name := class-name` and // 4 by `type-name := enum-name`: // 0--1(cv-qualifier)--5(type-name) // 5 is goto(1, type-name) and // └--2(cv-qualifier)--┘ // goto(2, type-name) // // Before (joining due to same goto state, the same base): // 0--1(class-name)--3(STAR) // └--2(enum-name)--4(STAR) // After reducing 3 by `pointer := class-name STAR` and // 2 by`enum-name := class-name STAR`: // 0--5(pointer) // 5 is goto(0, pointer) // // (This is a functor rather than a function to allow it to reuse scratch // storage across calls). class GLRReduce { … }; } // namespace ForestNode &glrParse(const ParseParams &Params, SymbolID StartSymbol, const Language &Lang) { … } void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead, const ParseParams &Params, const Language &Lang) { … } const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol, llvm::ArrayRef<const Node *> Parents) { … } GSS::Node *GSS::allocate(unsigned Parents) { … } void GSS::destroy(Node *N) { … } unsigned GSS::gc(std::vector<const Node *> &&Queue) { … } } // namespace pseudo } // namespace clang