chromium/tools/json_schema_compiler/idl_schema.py

#!/usr/bin/env python3
# Copyright 2012 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import itertools
import json
import os.path
import pprint
import re
import sys

from json_parse import OrderedDict

# This file is a peer to json_schema.py. Each of these files understands a
# certain format describing APIs (either JSON or IDL), reads files written
# in that format into memory, and emits them as a Python array of objects
# corresponding to those APIs, where the objects are formatted in a way that
# the JSON schema compiler understands. compiler.py drives both idl_schema.py
# and json_schema.py.

# idl_parser expects to be able to import certain files in its directory,
# so let's set things up the way it wants.
_idl_generators_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                    os.pardir, os.pardir, 'ppapi', 'generators')
if _idl_generators_path in sys.path:
  import idl_parser
else:
  sys.path.insert(0, _idl_generators_path)
  try:
    import idl_parser
  finally:
    sys.path.pop(0)


def ProcessComment(comment):
  '''
  Convert a comment into a parent comment and a list of parameter comments.

  Function comments are of the form:
    Function documentation. May contain HTML and multiple lines.

    |arg1_name|: Description of arg1. Use <var>argument</var> to refer
    to other arguments.
    |arg2_name|: Description of arg2...

  Newlines are removed, and leading and trailing whitespace is stripped.

  Args:
    comment: The string from a Comment node.

  Returns: A tuple that looks like:
    (
      "The processed comment, minus all |parameter| mentions.",
      {
        'parameter_name_1': "The comment that followed |parameter_name_1|:",
        ...
      }
    )
  '''

  def add_paragraphs(content):
    paragraphs = content.split('\n\n')
    if len(paragraphs) < 2:
      return content
    return '<p>' + '</p><p>'.join(p.strip() for p in paragraphs) + '</p>'

  # Find all the parameter comments of the form '|name|: comment'.
  parameter_starts = list(re.finditer(r' *\|([^|]*)\| *: *', comment))

  # Get the parent comment (everything before the first parameter comment.
  first_parameter_location = (parameter_starts[0].start()
                              if parameter_starts else len(comment))
  parent_comment = (add_paragraphs(
      comment[:first_parameter_location].strip()).replace('\n', ''))

  params = OrderedDict()
  for (cur_param, next_param) in itertools.zip_longest(parameter_starts,
                                                       parameter_starts[1:]):
    param_name = cur_param.group(1)

    # A parameter's comment goes from the end of its introduction to the
    # beginning of the next parameter's introduction.
    param_comment_start = cur_param.end()
    param_comment_end = next_param.start() if next_param else len(comment)
    params[param_name] = (add_paragraphs(
        comment[param_comment_start:param_comment_end].strip()).replace(
            '\n', ''))

  return (parent_comment, params)


class Callspec(object):
  '''
  Given a Callspec node representing an IDL function declaration, converts into
  a tuple:
      (name, list of function parameters, return type, async return)
  '''

  def __init__(self, callspec_node, comment):
    self.node = callspec_node
    self.comment = comment

  def process(self, use_returns_async, callbacks):
    parameters = []
    return_type = None
    returns_async = None
    if self.node.GetProperty('TYPEREF') not in ('void', None):
      return_type = Typeref(self.node.GetProperty('TYPEREF'), self.node.parent,
                            {
                                'name': self.node.GetName()
                            }).process(callbacks)
      # The IDL parser doesn't allow specifying return types as optional.
      # Instead we infer any object return values to be optional.
      # TODO(asargent): fix the IDL parser to support optional return types.
      if return_type.get('type') == 'object' or '$ref' in return_type:
        return_type['optional'] = True
    for node in self.node.GetChildren():
      parameter = Param(node).process(callbacks)
      if parameter['name'] in self.comment:
        parameter['description'] = self.comment[parameter['name']]
      parameters.append(parameter)
    # At the moment all functions in IDL with an asynchronous return are defined
    # with a trailing callback in their parameters, but in our schema model we
    # represent this with a separate returns 'async_field'. If there is a
    # trailing callback, pop it off into the returns asyc property.
    # Note: We only do this for interface types of 'Functions' and 'Properties',
    # not for 'Events' and IDL callback definitions (specified by the
    # |use_returns_async parameter|) or for Function definitions with trailing
    # callbacks which are not asynchronous returns (specified by the
    # trailingCallbackIsFunctionParameter extended attribute).
    # TODO(tjudkins): Once IDL definitions are changed to describe returning
    # promises, we can condition on that instead.
    if (use_returns_async
        and not self.node.GetProperty('trailingCallbackIsFunctionParameter')
        and len(parameters) > 0 and parameters[-1].get('type') == 'function'):
      returns_async = parameters.pop()
      # The returns_async field is inherently a function, so doesn't need type
      # specified on it.
      returns_async.pop('type')
      does_not_support_promises = self.node.GetProperty(
          'doesNotSupportPromises')
      if does_not_support_promises is not None:
        returns_async['does_not_support_promises'] = does_not_support_promises
      else:
        assert return_type is None, (
            'Function "%s" cannot support promises and also have a '
            'return value.' % self.node.GetName())
    else:
      assert not self.node.GetProperty('doesNotSupportPromises'), (
          'Callspec "%s" does not need to specify [doesNotSupportPromises] if '
          'it does not have a trailing callback' % self.node.GetName())

    return (self.node.GetName(), parameters, return_type, returns_async)


class Param(object):
  '''
  Given a Param node representing a function parameter, converts into a Python
  dictionary that the JSON schema compiler expects to see.
  '''

  def __init__(self, param_node):
    self.node = param_node

  def process(self, callbacks):
    return Typeref(self.node.GetProperty('TYPEREF'), self.node, {
        'name': self.node.GetName()
    }).process(callbacks)


class Dictionary(object):
  '''
  Given an IDL Dictionary node, converts into a Python dictionary that the JSON
  schema compiler expects to see.
  '''

  def __init__(self, dictionary_node):
    self.node = dictionary_node

  def process(self, callbacks):
    properties = OrderedDict()
    for node in self.node.GetChildren():
      if node.cls == 'Member':
        k, v = Member(node).process(callbacks)
        properties[k] = v
    result = {
        'id': self.node.GetName(),
        'properties': properties,
        'type': 'object'
    }
    # If this has the `ignoreAdditionalProperties` extended attribute, copy it
    # into the resulting object with a value of True.
    if self.node.GetProperty('ignoreAdditionalProperties'):
      result['ignoreAdditionalProperties'] = True

    if self.node.GetProperty('nodoc'):
      result['nodoc'] = True
    elif self.node.GetProperty('inline_doc'):
      result['inline_doc'] = True
    elif self.node.GetProperty('noinline_doc'):
      result['noinline_doc'] = True
    return result


class Member(object):
  '''
  Given an IDL dictionary or interface member, converts into a name/value pair
  where the value is a Python dictionary that the JSON schema compiler expects
  to see.
  '''

  def __init__(self, member_node):
    self.node = member_node

  def process(self,
              callbacks,
              functions_are_properties=False,
              use_returns_async=False):
    properties = OrderedDict()
    name = self.node.GetName()
    if self.node.GetProperty('deprecated'):
      properties['deprecated'] = self.node.GetProperty('deprecated')

    for property_name in [
        'nodoc', 'nocompile', 'nodart', 'serializableFunction'
    ]:
      if self.node.GetProperty(property_name):
        properties[property_name] = True

    if self.node.GetProperty('OPTIONAL'):
      properties['optional'] = True

    if self.node.GetProperty('platforms'):
      properties['platforms'] = list(self.node.GetProperty('platforms'))

    for option_name, sanitizer in [('maxListeners', int),
                                   ('supportsFilters', lambda s: s == 'true'),
                                   ('supportsListeners', lambda s: s == 'true'),
                                   ('supportsRules', lambda s: s == 'true')]:
      if self.node.GetProperty(option_name):
        if 'options' not in properties:
          properties['options'] = {}
        properties['options'][option_name] = sanitizer(
            self.node.GetProperty(option_name))
    type_override = None
    parameter_comments = OrderedDict()
    for node in self.node.GetChildren():
      if node.cls == 'Comment':
        (parent_comment, parameter_comments) = ProcessComment(node.GetName())
        properties['description'] = parent_comment
      elif node.cls == 'Callspec':
        name, parameters, return_type, returns_async = Callspec(
            node, parameter_comments).process(use_returns_async, callbacks)
        if functions_are_properties:
          # If functions are treated as properties (which will happen if the
          # interface is named Properties) then this isn't a function, it's a
          # property which is encoded as a function with no arguments. The
          # property type is the return type. This is an egregious hack in lieu
          # of the IDL parser supporting 'const'.
          assert parameters == [], (
              'Property "%s" must be no-argument functions '
              'with a non-void return type' % name)
          assert return_type is not None, (
              'Property "%s" must be no-argument functions '
              'with a non-void return type' % name)
          assert 'type' in return_type, (
              'Property return type "%s" from "%s" must specify a '
              'fundamental IDL type.' % (pprint.pformat(return_type), name))
          type_override = return_type['type']
        else:
          type_override = 'function'
          properties['parameters'] = parameters
          if return_type is not None:
            properties['returns'] = return_type
          if returns_async is not None:
            properties['returns_async'] = returns_async

    properties['name'] = name
    if type_override is not None:
      properties['type'] = type_override
    else:
      properties = Typeref(self.node.GetProperty('TYPEREF'), self.node,
                           properties).process(callbacks)
    value = self.node.GetProperty('value')
    if value is not None:
      # IDL always returns values as strings, so cast to their real type.
      properties['value'] = self.cast_from_json_type(properties['type'], value)
    return name, properties

  def cast_from_json_type(self, json_type, string_value):
    '''Casts from string |string_value| to a real Python type based on a JSON
    Schema type |json_type|. For example, a string value of '42' and a JSON
    Schema type 'integer' will cast to int('42') ==> 42.
    '''
    if json_type == 'integer':
      return int(string_value)
    if json_type == 'number':
      return float(string_value)
    # Add more as necessary.
    assert json_type == 'string', (
        'No rule exists to cast JSON Schema type "%s" to its equivalent '
        'Python type for value "%s". You must add a new rule here.' %
        (json_type, string_value))
    return string_value


class Typeref(object):
  '''
  Given a TYPEREF property representing the type of dictionary member or
  function parameter, converts into a Python dictionary that the JSON schema
  compiler expects to see.
  '''

  def __init__(self, typeref, parent, additional_properties):
    self.typeref = typeref
    self.parent = parent
    self.additional_properties = additional_properties

  def process(self, callbacks):
    properties = self.additional_properties
    result = properties

    if self.parent.GetPropertyLocal('OPTIONAL'):
      properties['optional'] = True

    # The IDL parser denotes array types by adding a child 'Array' node onto
    # the Param node in the Callspec.
    for sibling in self.parent.GetChildren():
      if sibling.cls == 'Array' and sibling.GetName() == self.parent.GetName():
        properties['type'] = 'array'
        properties['items'] = OrderedDict()
        properties = properties['items']
        break

    if self.typeref == 'DOMString':
      properties['type'] = 'string'
    elif self.typeref == 'boolean':
      properties['type'] = 'boolean'
    elif self.typeref == 'double':
      properties['type'] = 'number'
    elif self.typeref == 'long':
      properties['type'] = 'integer'
    elif self.typeref == 'any':
      properties['type'] = 'any'
    elif self.typeref == 'object':
      properties['type'] = 'object'
      if 'additionalProperties' not in properties:
        properties['additionalProperties'] = OrderedDict()
      properties['additionalProperties']['type'] = 'any'
      instance_of = self.parent.GetProperty('instanceOf')
      if instance_of:
        properties['isInstanceOf'] = instance_of
    elif self.typeref == 'ArrayBuffer':
      properties['type'] = 'binary'
      properties['isInstanceOf'] = 'ArrayBuffer'
    elif self.typeref == 'ArrayBufferView':
      properties['type'] = 'binary'
      # We force the APIs to specify instanceOf since ArrayBufferView isn't an
      # instantiable type, therefore we don't specify isInstanceOf here.
    elif self.typeref == 'FileEntry':
      properties['type'] = 'object'
      properties['isInstanceOf'] = 'FileEntry'
      if 'additionalProperties' not in properties:
        properties['additionalProperties'] = OrderedDict()
      properties['additionalProperties']['type'] = 'any'
    elif self.parent.GetPropertyLocal('Union'):
      properties['choices'] = [
          Typeref(node.GetProperty('TYPEREF'), node,
                  OrderedDict()).process(callbacks)
          for node in self.parent.GetChildren() if node.cls == 'Option'
      ]
    elif self.typeref is None:
      properties['type'] = 'function'
    else:
      if self.typeref in callbacks:
        # Do not override name and description if they are already specified.
        name = properties.get('name', None)
        description = properties.get('description', None)
        properties.update(callbacks[self.typeref])
        if description is not None:
          properties['description'] = description
        if name is not None:
          properties['name'] = name
      else:
        properties['$ref'] = self.typeref
    return result


class Enum(object):
  '''
  Given an IDL Enum node, converts into a Python dictionary that the JSON
  schema compiler expects to see.
  '''

  def __init__(self, enum_node):
    self.node = enum_node
    self.description = ''

  def process(self):
    enum = []
    for node in self.node.GetChildren():
      if node.cls == 'EnumItem':
        enum_value = {'name': node.GetName()}
        if node.GetProperty('nodoc'):
          enum_value['nodoc'] = True
        for child in node.GetChildren():
          if child.cls == 'Comment':
            enum_value['description'] = ProcessComment(child.GetName())[0]
          else:
            raise ValueError('Did not process %s %s' % (child.cls, child))
        enum.append(enum_value)
      elif node.cls == 'Comment':
        self.description = ProcessComment(node.GetName())[0]
      else:
        sys.exit('Did not process %s %s' % (node.cls, node))
    result = {
        'id': self.node.GetName(),
        'description': self.description,
        'type': 'string',
        'enum': enum
    }
    for property_name in [
        'cpp_enum_prefix_override', 'inline_doc', 'noinline_doc', 'nodoc'
    ]:
      if self.node.GetProperty(property_name):
        result[property_name] = self.node.GetProperty(property_name)
    if self.node.GetProperty('deprecated'):
      result['deprecated'] = self.node.GetProperty('deprecated')
    return result


class Namespace(object):
  '''
  Given an IDLNode representing an IDL namespace, converts into a Python
  dictionary that the JSON schema compiler expects to see.
  '''

  def __init__(self,
               namespace_node,
               description,
               nodoc=False,
               platforms=None,
               compiler_options=None,
               deprecated=None,
               documentation_options=None):
    self.namespace = namespace_node
    self.nodoc = nodoc
    self.platforms = platforms
    self.compiler_options = compiler_options
    self.events = []
    self.functions = []
    self.properties = OrderedDict()
    self.manifest_keys = None
    self.types = []
    self.callbacks = OrderedDict()
    self.description = description
    self.deprecated = deprecated
    self.documentation_options = documentation_options

  def process(self):
    for node in self.namespace.GetChildren():
      if node.cls == 'Dictionary' and node.GetName() == 'ManifestKeys':
        self.manifest_keys = Dictionary(node).process(
            self.callbacks)['properties']
      elif node.cls == 'Dictionary':
        self.types.append(Dictionary(node).process(self.callbacks))
      elif node.cls == 'Callback':
        k, v = Member(node).process(self.callbacks)
        self.callbacks[k] = v
      elif node.cls == 'Interface' and node.GetName() == 'Functions':
        self.functions = self.process_interface(node)
      elif node.cls == 'Interface' and node.GetName() == 'Events':
        self.events = self.process_interface(node)
      elif node.cls == 'Interface' and node.GetName() == 'Properties':
        properties_as_list = self.process_interface(
            node, functions_are_properties=True)
        for prop in properties_as_list:
          # Properties are given as key-value pairs, but IDL will parse
          # it as a list. Convert back to key-value pairs.
          prop_name = prop.pop('name')
          assert not prop_name in self.properties, (
              'Property "%s" cannot be specified more than once.' % prop_name)
          self.properties[prop_name] = prop
      elif node.cls == 'Enum':
        self.types.append(Enum(node).process())
      else:
        sys.exit('Did not process %s %s' % (node.cls, node))
    compiler_options = self.compiler_options or {}
    documentation_options = self.documentation_options or {}
    return {
        'namespace': self.namespace.GetName(),
        'description': self.description,
        'nodoc': self.nodoc,
        'types': self.types,
        'functions': self.functions,
        'properties': self.properties,
        'manifest_keys': self.manifest_keys,
        'events': self.events,
        'platforms': self.platforms,
        'compiler_options': compiler_options,
        'deprecated': self.deprecated,
        'documentation_options': documentation_options
    }

  def process_interface(self, node, functions_are_properties=False):
    members = []
    # Callspec definitions for Functions and Properties with an asynchronous
    # return are defined with a trailing callback, but during parsing we move
    # the details to a returns_async field. We only want to do this for Function
    # and Property definitions, not for Event or IDL callback definitions.
    # TODO(tjudkins): Once IDL definitions are changed to describe returning
    # promises, we can condition on that rather than this special casing here.
    use_returns_async = node.GetName() in ['Functions', 'Properties']
    for member in node.GetChildren():
      if member.cls == 'Member':
        _, properties = Member(member).process(
            self.callbacks,
            functions_are_properties=functions_are_properties,
            use_returns_async=use_returns_async)
        members.append(properties)
    return members


class IDLSchema(object):
  '''
  Given a list of IDLNodes and IDLAttributes, converts into a Python list
  of api_defs that the JSON schema compiler expects to see.
  '''

  def __init__(self, idl):
    self.idl = idl

  def process(self):
    namespaces = []
    nodoc = False
    description = None
    platforms = None
    compiler_options = {}
    deprecated = None
    documentation_options = {}
    for node in self.idl:
      if node.cls == 'Namespace':
        if not description:
          # TODO(kalman): Go back to throwing an error here.
          print('%s must have a namespace-level comment. This will '
                'appear on the API summary page.' % node.GetName())
          description = ''
        namespace = Namespace(node,
                              description,
                              nodoc,
                              platforms=platforms,
                              compiler_options=compiler_options or None,
                              deprecated=deprecated,
                              documentation_options=documentation_options)
        namespaces.append(namespace.process())
        nodoc = False
        platforms = None
        compiler_options = None
      elif node.cls == 'Copyright':
        continue
      elif node.cls == 'Comment':
        description = node.GetName()
      elif node.cls == 'ExtAttribute':
        if node.name == 'nodoc':
          nodoc = bool(node.value)
        elif node.name == 'platforms':
          platforms = list(node.value)
        elif node.name == 'implemented_in':
          compiler_options['implemented_in'] = node.value
        elif node.name == 'generate_error_messages':
          compiler_options['generate_error_messages'] = True
        elif node.name == 'deprecated':
          deprecated = str(node.value)
        elif node.name == 'documentation_title':
          documentation_options['title'] = node.value
        elif node.name == 'documentation_namespace':
          documentation_options['namespace'] = node.value
        elif node.name == 'documented_in':
          documentation_options['documented_in'] = node.value
        else:
          continue
      else:
        sys.exit('Did not process %s %s' % (node.cls, node))
    return namespaces


def Load(filename):
  '''
  Given the filename of an IDL file, parses it and returns an equivalent
  Python dictionary in a format that the JSON schema compiler expects to see.
  '''

  with open(filename, 'rb') as handle:
    contents = handle.read().decode('utf-8')

  return Process(contents, filename)


def Process(contents, filename):
  '''
  Processes the contents of a file and returns an equivalent Python dictionary
  in a format that the JSON schema compiler expects to see. (Separate from
  Load primarily for testing purposes.)
  '''

  idl = idl_parser.IDLParser().ParseData(contents, filename)
  idl_schema = IDLSchema(idl)
  return idl_schema.process()


def Main():
  '''
  Dump a json serialization of parse result for the IDL files whose names
  were passed in on the command line.
  '''
  if len(sys.argv) > 1:
    for filename in sys.argv[1:]:
      schema = Load(filename)
      print(json.dumps(schema, indent=2))
  else:
    contents = sys.stdin.read()
    for i, char in enumerate(contents):
      if not char.isascii():
        raise Exception(
            'Non-ascii character "%s" (ord %d) found at offset %d.' %
            (char, ord(char), i))
    idl = idl_parser.IDLParser().ParseData(contents, '<stdin>')
    schema = IDLSchema(idl).process()
    print(json.dumps(schema, indent=2))


if __name__ == '__main__':
  Main()