# Copyright 2014 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generates a syntax tree from a Mojo IDL file."""
# Breaking parser stanzas is unhelpful so allow longer lines.
# pylint: disable=line-too-long
import os.path
import sys
from mojom import fileutil
from mojom.error import Error
from mojom.parse import ast
from mojom.parse.lexer import Lexer
fileutil.AddLocalRepoThirdPartyDirToModulePath()
from ply import lex
from ply import yacc
_MAX_ORDINAL_VALUE = 0xffffffff
_MAX_ARRAY_SIZE = 0xffffffff
class ParseError(Error):
"""Class for errors from the parser."""
def __init__(self, filename, message, lineno=None, snippet=None):
Error.__init__(
self,
filename,
message,
lineno=lineno,
addenda=([snippet] if snippet else None))
# We have methods which look like they could be functions:
# pylint: disable=R0201
class Parser:
def __init__(self, lexer, source, filename):
self.tokens = lexer.tokens
self.source = source
self.filename = filename
def _set_lexstate(self, p, start_token, end_token):
"""Sets the lexer state in `p[0]` to track line and input stream positions.
If `start_token` or `end_token` are AST nodes rather than LexTokens, this
function uses the values from the node.
Args:
p: The YaccProduction for the parse function. `p[0]` is the output that
will be modified.
start_token: The index in `p` to use for the starting line and lex
positions.
end_token: The index in `p` to use for the ending line and lex positions.
"""
p[0].filename = self.filename
p[0].start = ast.Location(p.lineno(start_token), p.lexpos(start_token))
if not p[0].start.lexpos and isinstance(p[start_token], ast.NodeBase):
p[0].start = p[start_token].start
lexpos = p.lexpos(end_token)
if lexpos:
p[0].end = ast.Location(p.lineno(end_token), lexpos + len(p[end_token]))
elif isinstance(p[end_token], ast.NodeBase):
p[0].end = p[end_token].end
# Names of functions
#
# In general, we name functions after the left-hand-side of the rule(s) that
# they handle. E.g., |p_foo_bar| for a rule |foo_bar : ...|.
#
# There may be multiple functions handling rules for the same left-hand-side;
# then we name the functions |p_foo_bar_N| (for left-hand-side |foo_bar|),
# where N is a number (numbered starting from 1). Note that using multiple
# functions is actually more efficient than having single functions handle
# multiple rules (and, e.g., distinguishing them by examining |len(p)|).
#
# It's also possible to have a function handling multiple rules with different
# left-hand-sides. We do not do this.
#
# See http://www.dabeaz.com/ply/ply.html#ply_nn25 for more details.
# TODO(vtl): Get rid of the braces in the module "statement". (Consider
# renaming "module" -> "package".) Then we'll be able to have a single rule
# for root (by making module "optional").
def p_root_1(self, p):
"""root : """
p[0] = ast.Mojom(None, ast.ImportList(), [])
def p_root_2(self, p):
"""root : root module"""
if p[1].module is not None:
raise ParseError(self.filename,
"Multiple \"module\" statements not allowed:",
p[2].start.line,
snippet=self._GetSnippet(p[2].start.line))
if p[1].import_list.items or p[1].definition_list:
raise ParseError(
self.filename,
"\"module\" statements must precede imports and definitions:",
p[2].start.line,
snippet=self._GetSnippet(p[2].start.line))
p[0] = p[1]
p[0].module = p[2]
def p_root_3(self, p):
"""root : root import"""
if p[1].definition_list:
raise ParseError(self.filename,
"\"import\" statements must precede definitions:",
p[2].start.line,
snippet=self._GetSnippet(p[2].start.line))
p[0] = p[1]
p[0].import_list.Append(p[2])
def p_root_4(self, p):
"""root : root definition"""
p[0] = p[1]
p[0].definition_list.append(p[2])
def p_import(self, p):
"""import : attribute_section IMPORT STRING_LITERAL SEMI"""
# 'eval' the literal to strip the quotes.
# TODO(vtl): This eval is dubious. We should unquote/unescape ourselves.
p[0] = ast.Import(p[1], eval(p[3]))
self._set_lexstate(p, 2, 4)
def p_module(self, p):
"""module : attribute_section MODULE identifier SEMI"""
p[0] = ast.Module(p[3], p[1])
self._set_lexstate(p, 2, 4)
def p_definition(self, p):
"""definition : struct
| union
| interface
| enum
| const
| feature"""
p[0] = p[1]
def p_attribute_section_1(self, p):
"""attribute_section : """
p[0] = None
def p_attribute_section_2(self, p):
"""attribute_section : LBRACKET attribute_list RBRACKET"""
p[0] = p[2]
self._set_lexstate(p, 1, 3)
def p_attribute_list_1(self, p):
"""attribute_list : """
p[0] = ast.AttributeList()
def p_attribute_list_2(self, p):
"""attribute_list : nonempty_attribute_list"""
p[0] = p[1]
def p_nonempty_attribute_list_1(self, p):
"""nonempty_attribute_list : attribute"""
p[0] = ast.AttributeList(p[1])
def p_nonempty_attribute_list_2(self, p):
"""nonempty_attribute_list : nonempty_attribute_list COMMA attribute"""
p[0] = p[1]
p[0].Append(p[3])
def p_attribute_1(self, p):
"""attribute : name EQUALS identifier"""
p[0] = ast.Attribute(p[1], p[3])
self._set_lexstate(p, 1, 3)
def p_attribute_2(self, p):
"""attribute : name EQUALS evaled_literal
| name EQUALS name"""
p[0] = ast.Attribute(p[1], p[3])
self._set_lexstate(p, 1, 3)
def p_attribute_3(self, p):
"""attribute : name"""
p[0] = ast.Attribute(p[1], True)
self._set_lexstate(p, 1, 1)
def p_evaled_literal(self, p):
"""evaled_literal : literal"""
# 'eval' the literal to strip the quotes. Handle keywords "true" and "false"
# specially since they cannot directly be evaluated to python boolean
# values.
if p[1] == "true":
p[0] = True
elif p[1] == "false":
p[0] = False
else:
p[0] = eval(p[1].value)
def p_struct_1(self, p):
"""struct : attribute_section STRUCT name LBRACE struct_body RBRACE SEMI"""
p[0] = ast.Struct(p[3], p[1], p[5])
self._set_lexstate(p, 2, 7)
def p_struct_2(self, p):
"""struct : attribute_section STRUCT name SEMI"""
p[0] = ast.Struct(p[3], p[1], None)
self._set_lexstate(p, 2, 4)
def p_struct_body_1(self, p):
"""struct_body : """
p[0] = ast.StructBody()
def p_struct_body_2(self, p):
"""struct_body : struct_body const
| struct_body enum
| struct_body struct_field"""
p[0] = p[1]
p[0].Append(p[2])
def p_struct_field(self, p):
"""struct_field : attribute_section typename name ordinal default SEMI"""
p[0] = ast.StructField(p[3], p[1], p[4], p[2], p[5])
self._set_lexstate(p, 2, 6)
def p_feature(self, p):
"""feature : attribute_section FEATURE name LBRACE feature_body RBRACE SEMI"""
p[0] = ast.Feature(p[3], p[1], p[5])
self._set_lexstate(p, 2, 7)
def p_feature_body_1(self, p):
"""feature_body : """
p[0] = ast.FeatureBody()
def p_feature_body_2(self, p):
"""feature_body : feature_body const"""
p[0] = p[1]
p[0].Append(p[2])
def p_union(self, p):
"""union : attribute_section UNION name LBRACE union_body RBRACE SEMI"""
p[0] = ast.Union(p[3], p[1], p[5])
self._set_lexstate(p, 2, 7)
def p_union_body_1(self, p):
"""union_body : """
p[0] = ast.UnionBody()
def p_union_body_2(self, p):
"""union_body : union_body union_field"""
p[0] = p[1]
p[1].Append(p[2])
def p_union_field(self, p):
"""union_field : attribute_section typename name ordinal SEMI"""
p[0] = ast.UnionField(p[3], p[1], p[4], p[2])
self._set_lexstate(p, 2, 5)
def p_default_1(self, p):
"""default : """
p[0] = None
def p_default_2(self, p):
"""default : EQUALS constant"""
p[0] = p[2]
def p_interface(self, p):
"""interface : attribute_section INTERFACE name LBRACE interface_body RBRACE SEMI"""
p[0] = ast.Interface(p[3], p[1], p[5])
self._set_lexstate(p, 2, 7)
def p_interface_body_1(self, p):
"""interface_body : """
p[0] = ast.InterfaceBody()
def p_interface_body_2(self, p):
"""interface_body : interface_body const
| interface_body enum
| interface_body method"""
p[0] = p[1]
p[0].Append(p[2])
def p_response_1(self, p):
"""response : """
p[0] = None
def p_response_2(self, p):
"""response : RESPONSE LPAREN parameter_list RPAREN"""
p[0] = p[3]
def p_method(self, p):
"""method : attribute_section name ordinal LPAREN parameter_list RPAREN response SEMI"""
p[0] = ast.Method(p[2], p[1], p[3], p[5], p[7])
self._set_lexstate(p, 2, 8)
def p_parameter_list_1(self, p):
"""parameter_list : """
p[0] = ast.ParameterList()
def p_parameter_list_2(self, p):
"""parameter_list : nonempty_parameter_list"""
p[0] = p[1]
p[0].end = p[0].items[-1].end
def p_nonempty_parameter_list_1(self, p):
"""nonempty_parameter_list : parameter"""
p[0] = ast.ParameterList(p[1])
def p_nonempty_parameter_list_2(self, p):
"""nonempty_parameter_list : nonempty_parameter_list COMMA parameter"""
p[0] = p[1]
p[0].Append(p[3])
def p_parameter(self, p):
"""parameter : attribute_section typename name ordinal"""
p[0] = ast.Parameter(p[3], p[1], p[4], p[2])
self._set_lexstate(p, 2, 3)
def p_typename(self, p):
"""typename : nonnullable_typename QSTN
| nonnullable_typename"""
p[0] = ast.Typename(p[1], nullable=len(p) != 2)
self._set_lexstate(p, 1, 1)
def p_nonnullable_typename(self, p):
"""nonnullable_typename : basictypename
| array
| fixed_array
| associative_array"""
p[0] = p[1]
def p_basictypename(self, p):
"""basictypename : remotetype
| receivertype
| associatedremotetype
| associatedreceivertype
| identifier
| handletype"""
p[0] = p[1]
def p_remotetype(self, p):
"""remotetype : PENDING_REMOTE LANGLE identifier RANGLE"""
p[0] = ast.Remote(p[3])
self._set_lexstate(p, 1, 4)
def p_receivertype(self, p):
"""receivertype : PENDING_RECEIVER LANGLE identifier RANGLE"""
p[0] = ast.Receiver(p[3])
self._set_lexstate(p, 1, 4)
def p_associatedremotetype(self, p):
"""associatedremotetype : PENDING_ASSOCIATED_REMOTE LANGLE identifier RANGLE"""
p[0] = ast.Remote(p[3], associated=True)
self._set_lexstate(p, 1, 4)
def p_associatedreceivertype(self, p):
"""associatedreceivertype : PENDING_ASSOCIATED_RECEIVER LANGLE identifier RANGLE"""
p[0] = ast.Receiver(p[3], associated=True)
self._set_lexstate(p, 1, 4)
def p_handletype(self, p):
"""handletype : HANDLE
| HANDLE LANGLE name RANGLE"""
if len(p) == 2:
p[0] = ast.Identifier(p[1])
self._set_lexstate(p, 1, 1)
else:
if p[3].name not in ('data_pipe_consumer', 'data_pipe_producer',
'message_pipe', 'shared_buffer', 'platform'):
raise ParseError(self.filename,
"Invalid handle type %r:" % p[3].name,
lineno=p[3].start.line,
snippet=self._GetSnippet(p.lineno(1)))
p[0] = ast.Identifier(f"handle<{p[3]}>")
self._set_lexstate(p, 1, 4)
def p_array(self, p):
"""array : ARRAY LANGLE typename RANGLE"""
p[0] = ast.Array(p[3])
self._set_lexstate(p, 1, 4)
def p_fixed_array(self, p):
"""fixed_array : ARRAY LANGLE typename COMMA INT_CONST_DEC RANGLE"""
value = int(p[5])
if value == 0 or value > _MAX_ARRAY_SIZE:
raise ParseError(
self.filename,
"Fixed array size %d invalid:" % value,
lineno=p.lineno(5),
snippet=self._GetSnippet(p.lineno(5)))
p[0] = ast.Array(p[3], fixed_size=value)
self._set_lexstate(p, 1, 6)
def p_associative_array(self, p):
"""associative_array : MAP LANGLE identifier COMMA typename RANGLE"""
p[0] = ast.Map(p[3], p[5])
self._set_lexstate(p, 1, 6)
def p_ordinal_1(self, p):
"""ordinal : """
p[0] = None
def p_ordinal_2(self, p):
"""ordinal : ORDINAL"""
value = int(p[1][1:])
if value > _MAX_ORDINAL_VALUE:
raise ParseError(
self.filename,
"Ordinal value %d too large:" % value,
lineno=p.lineno(1),
snippet=self._GetSnippet(p.lineno(1)))
p[0] = ast.Ordinal(value)
self._set_lexstate(p, 1, 1)
def p_enum_1(self, p):
"""enum : attribute_section ENUM name LBRACE enum_value_list RBRACE SEMI
| attribute_section ENUM name LBRACE \
nonempty_enum_value_list COMMA RBRACE SEMI"""
p[0] = ast.Enum(p[3], p[1], p[5])
self._set_lexstate(p, 2, 7 if len(p) == 8 else 8)
def p_enum_2(self, p):
"""enum : attribute_section ENUM name SEMI"""
p[0] = ast.Enum(p[3], p[1], None)
self._set_lexstate(p, 2, 4)
def p_enum_value_list_1(self, p):
"""enum_value_list : """
p[0] = ast.EnumValueList()
def p_enum_value_list_2(self, p):
"""enum_value_list : nonempty_enum_value_list"""
p[0] = p[1]
def p_nonempty_enum_value_list_1(self, p):
"""nonempty_enum_value_list : enum_value"""
p[0] = ast.EnumValueList(p[1])
def p_nonempty_enum_value_list_2(self, p):
"""nonempty_enum_value_list : nonempty_enum_value_list COMMA enum_value"""
p[0] = p[1]
p[0].Append(p[3])
def p_enum_value(self, p):
"""enum_value : attribute_section name
| attribute_section name EQUALS int
| attribute_section name EQUALS identifier"""
p[0] = ast.EnumValue(p[2], p[1], p[4] if len(p) == 5 else None)
self._set_lexstate(p, 2, len(p) - 1)
def p_const(self, p):
"""const : attribute_section CONST typename name EQUALS constant SEMI"""
p[0] = ast.Const(p[4], p[1], p[3], p[6])
self._set_lexstate(p, 2, 7)
def p_constant(self, p):
"""constant : literal
| identifier"""
p[0] = p[1]
def p_identifier(self, p):
"""identifier : name
| name DOT identifier"""
p[0] = ast.Identifier(''.join(map(str, p[1:])))
self._set_lexstate(p, 1, len(p) - 1)
# Allow 'feature' to be a name literal not just a keyword.
def p_name(self, p):
"""name : NAME
| FEATURE"""
p[0] = ast.Name(p[1])
self._set_lexstate(p, 1, 1)
def p_literal(self, p):
"""literal : int
| float
| TRUE
| FALSE
| DEFAULT
| STRING_LITERAL"""
if isinstance(p[1], ast.Literal):
p[0] = p[1]
else:
p[0] = ast.Literal(p.slice[1].type, p[1])
self._set_lexstate(p, 1, 1)
def p_int(self, p):
"""int : int_const
| PLUS int_const
| MINUS int_const"""
p[0] = ast.Literal('int', ''.join(map(str, p[1:])))
self._set_lexstate(p, 1, len(p) - 1)
def p_int_const(self, p):
"""int_const : INT_CONST_DEC
| INT_CONST_HEX"""
p[0] = ast.Literal('int', p[1])
self._set_lexstate(p, 1, 1)
def p_float(self, p):
"""float : FLOAT_CONST
| PLUS FLOAT_CONST
| MINUS FLOAT_CONST"""
p[0] = ast.Literal('float', ''.join(p[1:]))
self._set_lexstate(p, 1, len(p) - 1)
def p_error(self, e):
if e is None:
# Unexpected EOF.
# TODO(vtl): Can we figure out what's missing?
raise ParseError(self.filename, "Unexpected end of file")
if e.value == 'feature':
raise ParseError(self.filename,
"`feature` is reserved for a future mojom keyword",
lineno=e.lineno,
snippet=self._GetSnippet(e.lineno))
raise ParseError(
self.filename,
"Unexpected %r:" % e.value,
lineno=e.lineno,
snippet=self._GetSnippet(e.lineno))
def _GetSnippet(self, lineno):
return self.source.split('\n')[lineno - 1]
def Parse(source, filename, with_comments=False):
"""Parse source file to AST.
Args:
source: The source text as a str (Python 2 or 3) or unicode (Python 2).
filename: The filename that |source| originates from.
with_comments: If True, parses comments and attaches them to AST nodes.
Otherwise, they are discarded.
Returns:
The AST as a mojom.parse.ast.Mojom object.
"""
lexer = Lexer(filename)
parser = Parser(lexer, source, filename)
lex.lex(object=lexer)
yacc.yacc(module=parser, debug=0, write_tables=0)
tree = yacc.parse(source)
if with_comments:
_AssignComments(tree, lexer.line_comments, lexer.suffix_comments)
return tree
def _AssignComments(tree, line_comments, suffix_comments):
"""Attaches comments to AST nodes in `tree`. Uses the algorithm described at
https://jayconrod.com/posts/129/preserving-comments-when-parsing-and-formatting-code
by performing two tree traversals for assigning line and suffix comments.
"""
walker = _AstWalker()
walker.Walk(tree)
# The only types of nodes that may not have lexpos state.
no_lexpos_nodes = (ast.Mojom, ast.Module, ast.ParameterList)
# Assign comments on their own line to the node that immediately follows.
comment_index = 0
for node in walker.pre:
if not node.start[0]:
assert isinstance(node, no_lexpos_nodes), f'Missing lexpos for {node}'
continue
while comment_index < len(line_comments):
comment = line_comments[comment_index]
if node.start.lexpos > comment.lexpos:
if isinstance(node, _BodyEnd):
# If this is the end of an enclosing scope, then attach it to the
# "after" comments section.
node.node.append_comment(after=comment)
else:
# The comment precedes this node.
node.append_comment(before=comment)
comment_index += 1
else:
break
# Any remaining line comments are at the end of the file.
walker.pre[0].comments_after = line_comments[comment_index:]
# Assign suffix comments in reverse order.
for node in reversed(walker.post):
if not node.start[0]:
assert isinstance(node, no_lexpos_nodes), f'Missing lexpos for {node}'
continue
# Don't assign suffix comments to block nodes.
if node.start.line != node.end.line:
continue
comment_index = len(suffix_comments) - 1
while comment_index >= 0:
comment = suffix_comments[comment_index]
if node.start.line == comment.lineno:
if isinstance(node, _BodyEnd):
# Transform suffix comments on the body end to be at the end of
# the inner enclosing node.
node.node.append_comment(after=comment)
else:
node.append_comment(suffix=comment)
del suffix_comments[comment_index]
comment_index -= 1
# Iterating in reverse means comments order should be
# reversed.
if node.comments_suffix:
node.comments_suffix = list(reversed(node.comments_suffix))
# Attach any remaining suffix comments to the first sub-node of the
# ast.Mojom.
if suffix_comments:
for comment in suffix_comments:
walker.post[-2].append_comment(before=comment)
class _BodyEnd(ast.NodeBase):
"""A synthetic AST node used to demarcate the end of an AST node's children.
This can be thought of as the trailing semicolon of a struct or interface
definition.
"""
def __init__(self, node):
super().__init__(filename=node.filename)
self.start = node.end
self.end = ast.Location(node.end.line, node.end.lexpos + 1)
self.node = node
class _AstWalker:
"""Iterates the AST nodes to which comments should be attached in pre- and
post-order traversal, storing the results in two lists. Nodes of type
NodeListBase are un-rolled by the parent node. To identify the end of scopes,
a _BodyEnd node is inserted. Not all AST nodes are emitted, if comment nodes
should be associated with a higher AST node instead of the lower leaf.
"""
def __init__(self):
self.pre = []
self.post = []
def Walk(self, node):
if node is None:
return
self.pre.append(node)
if isinstance(node, ast.Mojom):
self.Walk(node.module)
for item in node.import_list:
self.Walk(item)
for item in node.definition_list:
self.Walk(item)
elif isinstance(node, ast.Module):
pass
elif isinstance(node, ast.Import):
pass
elif isinstance(node, ast.Interface):
for item in node.body:
self.Walk(item)
self.Walk(_BodyEnd(node))
elif isinstance(node, ast.Method):
for param in node.parameter_list:
self.Walk(param)
if node.response_parameter_list:
for param in node.response_parameter_list:
self.Walk(param)
elif isinstance(node, ast.Parameter):
pass
elif isinstance(node, ast.Const):
pass
elif isinstance(node, ast.Enum):
if node.enum_value_list:
for item in node.enum_value_list:
self.Walk(item)
self.Walk(_BodyEnd(node))
elif isinstance(node, ast.EnumValue):
pass
elif isinstance(node, (ast.Struct, ast.Feature)):
if node.body:
for item in node.body:
self.Walk(item)
self.Walk(_BodyEnd(node))
elif isinstance(node, ast.StructField):
pass
elif isinstance(node, ast.Union):
for item in node.body:
self.Walk(item)
self.Walk(_BodyEnd(node))
elif isinstance(node, ast.UnionField):
pass
elif isinstance(node, ast.AttributeList):
for item in node:
self.Walk(item)
elif isinstance(node, _BodyEnd):
pass
else:
raise ValueError(f'Unexpected AST node {repr(node)}')
self.post.append(node)