cpython/Tools/peg_generator/pegen/metagrammar.gram

@subheader """\
from ast import literal_eval

from pegen.grammar import (
    Alt,
    Cut,
    Forced,
    Gather,
    Group,
    Item,
    Lookahead,
    LookaheadOrCut,
    MetaTuple,
    MetaList,
    NameLeaf,
    NamedItem,
    NamedItemList,
    NegativeLookahead,
    Opt,
    Plain,
    PositiveLookahead,
    Repeat0,
    Repeat1,
    Rhs,
    Rule,
    RuleList,
    RuleName,
    Grammar,
    StringLeaf,
)
"""

start[Grammar]: grammar ENDMARKER { grammar }

grammar[Grammar]:
    | metas rules { Grammar(rules, metas) }
    | rules { Grammar(rules, []) }

metas[MetaList]:
    | meta metas { [meta] + metas }
    | meta { [meta] }

meta[MetaTuple]:
    | "@" NAME NEWLINE { (name.string, None) }
    | "@" a=NAME b=NAME NEWLINE { (a.string, b.string) }
    | "@" NAME STRING NEWLINE { (name.string, literal_eval(string.string)) }

rules[RuleList]:
    | rule rules { [rule] + rules }
    | rule { [rule] }

rule[Rule]:
    | rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT {
          Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) }
    | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT {
          Rule(rulename[0], rulename[1], more_alts, memo=opt) }
    | rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }

rulename[RuleName]:
    | NAME annotation { (name.string, annotation) }
    | NAME { (name.string, None) }

# In the future this may return something more complicated
memoflag[str]:
    | '(' "memo" ')' { "memo" }

alts[Rhs]:
    | alt "|" alts { Rhs([alt] + alts.alts)}
    | alt { Rhs([alt]) }

more_alts[Rhs]:
    | "|" alts NEWLINE more_alts { Rhs(alts.alts + more_alts.alts) }
    | "|" alts NEWLINE { Rhs(alts.alts) }

alt[Alt]:
    | items '$' action { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=action) }
    | items '$' { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=None) }
    | items action { Alt(items, action=action) }
    | items { Alt(items, action=None) }

items[NamedItemList]:
    | named_item items { [named_item] + items }
    | named_item { [named_item] }

named_item[NamedItem]:
    | NAME annotation '=' ~ item {NamedItem(name.string, item, annotation)}
    | NAME '=' ~ item {NamedItem(name.string, item)}
    | item {NamedItem(None, item)}
    | forced=forced_atom {NamedItem(None, forced)}
    | it=lookahead {NamedItem(None, it)}

forced_atom[Forced]:
    | '&''&' ~ atom {Forced(atom)}

lookahead[LookaheadOrCut]:
    | '&' ~ atom {PositiveLookahead(atom)}
    | '!' ~ atom {NegativeLookahead(atom)}
    | '~' {Cut()}

item[Item]:
    | '[' ~ alts ']' {Opt(alts)}
    |  atom '?' {Opt(atom)}
    |  atom '*' {Repeat0(atom)}
    |  atom '+' {Repeat1(atom)}
    |  sep=atom '.' node=atom '+' {Gather(sep, node)}
    |  atom {atom}

atom[Plain]:
    | '(' ~ alts ')' {Group(alts)}
    | NAME {NameLeaf(name.string) }
    | STRING {StringLeaf(string.string)}

# Mini-grammar for the actions and annotations

action[str]: "{" ~ target_atoms "}" { target_atoms }
annotation[str]: "[" ~ target_atoms "]" { target_atoms }

target_atoms[str]:
    | target_atom target_atoms { target_atom + " " + target_atoms }
    | target_atom { target_atom }

target_atom[str]:
    | "{" ~ atoms=target_atoms? "}" { "{" + (atoms or "") + "}" }
    | "[" ~ atoms=target_atoms? "]" { "[" + (atoms or "") + "]" }
    | NAME "*" { name.string + "*" }
    | NAME { name.string }
    | NUMBER { number.string }
    | STRING { string.string }
    | "?" { "?" }
    | ":" { ":" }
    | !"}" !"]" OP { op.string }