import unittest
from test import test_tools
from typing import Dict, Set
test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.testutil import parse_string
from pegen.first_sets import FirstSetCalculator
from pegen.grammar import Grammar
class TestFirstSets(unittest.TestCase):
def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
grammar: Grammar = parse_string(grammar_source, GrammarParser)
return FirstSetCalculator(grammar.rules).calculate()
def test_alternatives(self) -> None:
grammar = """
start: expr NEWLINE? ENDMARKER
expr: A | B
A: 'a' | '-'
B: 'b' | '+'
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"A": {"'a'", "'-'"},
"B": {"'+'", "'b'"},
"expr": {"'+'", "'a'", "'b'", "'-'"},
"start": {"'+'", "'a'", "'b'", "'-'"},
},
)
def test_optionals(self) -> None:
grammar = """
start: expr NEWLINE
expr: ['a'] ['b'] 'c'
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"expr": {"'c'", "'a'", "'b'"},
"start": {"'c'", "'a'", "'b'"},
},
)
def test_repeat_with_separator(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_optional_operator(self) -> None:
grammar = """
start: sum NEWLINE
sum: (term)? 'b'
term: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"term": {"NUMBER"},
"sum": {"NUMBER", "'b'"},
"start": {"'b'", "NUMBER"},
},
)
def test_optional_literal(self) -> None:
grammar = """
start: sum NEWLINE
sum: '+' ? term
term: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"term": {"NUMBER"},
"sum": {"'+'", "NUMBER"},
"start": {"'+'", "NUMBER"},
},
)
def test_optional_after(self) -> None:
grammar = """
start: term NEWLINE
term: NUMBER ['+']
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_optional_before(self) -> None:
grammar = """
start: term NEWLINE
term: ['+'] NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
)
def test_repeat_0(self) -> None:
grammar = """
start: thing* "+" NEWLINE
thing: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
)
def test_repeat_0_with_group(self) -> None:
grammar = """
start: ('+' '-')* term NEWLINE
term: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
)
def test_repeat_1(self) -> None:
grammar = """
start: thing+ '-' NEWLINE
thing: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_repeat_1_with_group(self) -> None:
grammar = """
start: ('+' term)+ term NEWLINE
term: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
)
def test_gather(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_positive_lookahead(self) -> None:
grammar = """
start: expr NEWLINE
expr: &'a' opt
opt: 'a' | 'b' | 'c'
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"expr": {"'a'"},
"start": {"'a'"},
"opt": {"'b'", "'c'", "'a'"},
},
)
def test_negative_lookahead(self) -> None:
grammar = """
start: expr NEWLINE
expr: !'a' opt
opt: 'a' | 'b' | 'c'
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"opt": {"'b'", "'a'", "'c'"},
"expr": {"'b'", "'c'"},
"start": {"'b'", "'c'"},
},
)
def test_left_recursion(self) -> None:
grammar = """
start: expr NEWLINE
expr: ('-' term | expr '+' term | term)
term: NUMBER
foo: 'foo'
bar: 'bar'
baz: 'baz'
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"expr": {"NUMBER", "'-'"},
"term": {"NUMBER"},
"start": {"NUMBER", "'-'"},
"foo": {"'foo'"},
"bar": {"'bar'"},
"baz": {"'baz'"},
},
)
def test_advance_left_recursion(self) -> None:
grammar = """
start: NUMBER | sign start
sign: ['-']
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
)
def test_mutual_left_recursion(self) -> None:
grammar = """
start: foo 'E'
foo: bar 'A' | 'B'
bar: foo 'C' | 'D'
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"foo": {"'D'", "'B'"},
"bar": {"'D'"},
"start": {"'D'", "'B'"},
},
)
def test_nasty_left_recursion(self) -> None:
# TODO: Validate this
grammar = """
start: target '='
target: maybe '+' | NAME
maybe: maybe '-' | target
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
)
def test_nullable_rule(self) -> None:
grammar = """
start: sign thing $
sign: ['-']
thing: NUMBER
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"sign": {"", "'-'"},
"thing": {"NUMBER"},
"start": {"NUMBER", "'-'"},
},
)
def test_epsilon_production_in_start_rule(self) -> None:
grammar = """
start: ['-'] $
"""
self.assertEqual(
self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
)
def test_multiple_nullable_rules(self) -> None:
grammar = """
start: sign thing other another $
sign: ['-']
thing: ['+']
other: '*'
another: '/'
"""
self.assertEqual(
self.calculate_first_sets(grammar),
{
"sign": {"", "'-'"},
"thing": {"'+'", ""},
"start": {"'+'", "'-'", "'*'"},
"other": {"'*'"},
"another": {"'/'"},
},
)