chromium/testing/libfuzzer/fuzzers/generate_javascript_parser_proto.py

#!/usr/bin/env python3

# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Script for generating .proto and a conversion .cc file for a templated library
based JavaScript parser fuzzer.
"""

import sys


def ParseWord(word_string):
  # Every part of the word is either a string surrounded by "" or a placeholder
  # $<int>.

  word_string = word_string.lstrip().rstrip()

  parts = []
  while len(word_string) > 0:
    if word_string[0] == '"':
      end_ix = 1 + word_string[1:].index('"')
      parts.append(word_string[1:end_ix])
      word_string = word_string[(end_ix + 1):]
    elif word_string[0] == '$':
      if ' ' in word_string:
        end_ix = word_string.index(' ')
      else:
        end_ix = len(word_string)
      parts.append(int(word_string[1:end_ix]))
      word_string = word_string[end_ix:]
    else:
      assert False
    word_string = word_string.lstrip()
  return parts


def GenerateProtoContents(words):
  contents = ''
  for ix in range(len(words)):
    contents += '    token_value_' + str(ix) + ' = ' + str(ix) + ';\n'
  return contents


def GenerateConversionContents(words):
  contents = ''
  ix = 0
  for word in words:
    contents += '    case ' + str(ix) + ':\n'
    max_part = -1
    first = True
    building_string = ''
    for part in word:
      if not first:
        building_string += ' + std::string(" ") + '
      if isinstance(part, str):
        building_string += 'std::string("' + part + '")'
      else:
        if part > max_part:
          max_part = part
        building_string += ('token_to_string(token.inner_tokens(' + str(part) +
                            '), depth)')
      first = False
    if max_part >= 0:
      contents += ('      if (token.inner_tokens().size() < ' +
                   str(max_part + 1) + ') return std::string("");\n')
    contents += '      return ' + building_string + ';\n'
    ix += 1
  return contents


def ReadDictionary(filename):
  with open(filename) as input_file:
    lines = input_file.readlines()
  words = []
  for line in lines:
    if not line.startswith('#'):
      word = ParseWord(line)
      if len(word) > 0:
        words.append(word)
  return words


def main(argv):
  output_proto_file = argv[1]
  output_cc_file = argv[2]
  input_dict_file = argv[3]

  words = ReadDictionary(input_dict_file)

  proto_header = ('// Generated by generate_javascript_parser_proto.py.\n'
                  '\n'
                  'syntax = "proto2";\n'
                  'package javascript_parser_proto_fuzzer;\n'
                  '\n'
                  'message Token {\n'
                  '  enum Value {\n')

  proto_footer = ('  }\n'
                  '  required Value value = 1;\n'
                  '  repeated Token inner_tokens = 2;\n'
                  '}\n'
                  '\n'
                  'message Source {\n'
                  '  required bool is_module = 1;\n'
                  '  repeated Token tokens = 2;\n'
                  '}\n')

  proto_contents = proto_header + GenerateProtoContents(words) + proto_footer

  with open(output_proto_file, 'w') as f:
    f.write(proto_contents)

  conversion_header = (
      '// Generated by generate_javascript_parser_proto.py.\n'
      '\n'
      '#include "testing/libfuzzer/fuzzers/'
      'javascript_parser_proto_to_string.h"\n'
      '\n'
      '// Bound calls to token_to_string to prevent memory usage from growing\n'
      '// too much.\n'
      'const int kMaxRecursiveDepth = 9;\n'
      '\n'
      'std::string token_to_string(\n'
      '    const javascript_parser_proto_fuzzer::Token& token, int depth)'
      ' {\n'
      '  if (++depth == kMaxRecursiveDepth) return std::string("");\n'
      '  switch(token.value()) {\n')

  conversion_footer = ('    default: break;\n'
                       '  }\n'
                       '  return std::string("");\n'
                       '}\n')

  conversion_contents = (conversion_header + GenerateConversionContents(words) +
                         conversion_footer)

  with open(output_cc_file, 'w') as f:
    f.write(conversion_contents)


if __name__ == '__main__':
  main(sys.argv)