chromium/tools/binary_size/trybot_commit_size_checker.py

#!/usr/bin/env python3
# Copyright 2018 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Creates several files used by the size trybot to monitor size regressions.

To test locally:
1. Run diagnose_bloat.py to create some entries in out/binary-size-results
2. Run this script with:
HASH1=some hash within out/binary-size-results
HASH2=some hash within out/binary-size-results
mkdir tmp
tools/binary_size/trybot_commit_size_checker.py \
    --author Batman \
    --review-subject "Testing 123" \
    --review-url "https://google.com" \
    --size-config-json-name \
        out/binary-size-build/config/Trichrome_size_config.json \
    --before-dir out/binary-size-results/$HASH1 \
    --after-dir out/binary-size-results/$HASH2 \
    --results-path output.json \
    --staging-dir tmp \
    --local-test
"""

import argparse
import collections
import json
import logging
import os
import pathlib
import re
import sys

sys.path.append(str(pathlib.Path(__file__).parent / 'libsupersize'))
import archive
import diagnose_bloat
import diff
import describe
import dex_disassembly
import file_format
import models
import native_disassembly

_RESOURCE_SIZES_LOG = 'resource_sizes_log'
_RESOURCE_SIZES_64_LOG = 'resource_sizes_64_log'
_MAIN_LOG_NAMES = (_RESOURCE_SIZES_LOG, _RESOURCE_SIZES_64_LOG)
_BASE_RESOURCE_SIZES_LOG = 'base_resource_sizes_log'
_MUTABLE_CONSTANTS_LOG = 'mutable_contstants_log'
_FOR_TESTING_LOG = 'for_test_log'
_DEX_SYMBOLS_LOG = 'dex_symbols_log'
_SIZEDIFF_FILENAME = 'supersize_diff.sizediff'
_HTML_REPORT_URL = (
    'https://chrome-supersize.firebaseapp.com/viewer.html?load_url={{' +
    _SIZEDIFF_FILENAME + '}}')
_MAX_PAK_INCREASE = 1024
_TRYBOT_MD_URL = ('https://chromium.googlesource.com/chromium/src/+/main/docs/'
                  'speed/binary_size/android_binary_size_trybot.md')


_PROGUARD_CLASS_MAPPING_RE = re.compile(r'(?P<original_name>[^ ]+)'
                                        r' -> '
                                        r'(?P<obfuscated_name>[^:]+):')
_PROGUARD_FIELD_MAPPING_RE = re.compile(r'(?P<type>[^ ]+) '
                                        r'(?P<original_name>[^ (]+)'
                                        r' -> '
                                        r'(?P<obfuscated_name>[^:]+)')
_PROGUARD_METHOD_MAPPING_RE = re.compile(
    # line_start:line_end: (optional)
    r'((?P<line_start>\d+):(?P<line_end>\d+):)?'
    r'(?P<return_type>[^ ]+)'  # original method return type
    # original method class name (if exists)
    r' (?:(?P<original_method_class>[a-zA-Z_\d.$]+)\.)?'
    r'(?P<original_method_name>[^.\(]+)'
    r'\((?P<params>[^\)]*)\)'  # original method params
    r'(?:[^ ]*)'  # original method line numbers (ignored)
    r' -> '
    r'(?P<obfuscated_name>.+)')  # obfuscated method name


class _SizeDelta(collections.namedtuple(
    'SizeDelta', ['name', 'units', 'expected', 'actual'])):

  @property
  def explanation(self):
    ret = '{}: {} {} (max is {} {})'.format(
        self.name, self.actual, self.units, self.expected, self.units)
    return ret

  def IsAllowable(self):
    return self.actual <= self.expected

  def IsLargeImprovement(self):
    return (self.actual * -1) >= self.expected

  def __lt__(self, other):
    return self.name < other.name


# See https://crbug.com/1426694
def _MaxSizeIncrease(author, subject):
  if 'AFDO' in subject or 'PGO Profile' in subject:
    return 1024 * 1024
  if 'Update V8' in subject:
    return 100 * 1024
  if 'autoroll' in author:
    return 50 * 1024
  return 16 * 1024


def _SymbolDiffHelper(title_fragment, symbols):
  added = symbols.WhereDiffStatusIs(models.DIFF_STATUS_ADDED)
  removed = symbols.WhereDiffStatusIs(models.DIFF_STATUS_REMOVED)
  both = (added + removed).SortedByName()
  lines = []
  if len(both) > 0:
    for group in both.GroupedByContainer():
      counts = group.CountsByDiffStatus()
      lines += [
          '===== {} Added & Removed ({}) ====='.format(
              title_fragment, group.full_name),
          'Added: {}'.format(counts[models.DIFF_STATUS_ADDED]),
          'Removed: {}'.format(counts[models.DIFF_STATUS_REMOVED]),
          ''
      ]
      lines.extend(describe.GenerateLines(group, summarize=False))
      lines += ['']

  return lines, len(added) - len(removed)


def _CreateMutableConstantsDelta(symbols):
  symbols = (
      symbols.WhereInSection('d').WhereNameMatches(r'\bk[A-Z]|\b[A-Z_]+$').
      WhereFullNameMatches('abi:logically_const').Inverted())
  lines, net_added = _SymbolDiffHelper('Mutable Constants', symbols)

  return lines, _SizeDelta('Mutable Constants', 'symbols', 0, net_added)


def _CreateMethodCountDelta(symbols, max_increase):
  symbols = symbols.WhereIsOnDemand(False)
  method_symbols = symbols.WhereInSection(models.SECTION_DEX_METHOD)
  method_lines, net_method_added = _SymbolDiffHelper('Methods', method_symbols)
  class_symbols = symbols.WhereInSection(models.SECTION_DEX).Filter(
      lambda s: not s.IsStringLiteral() and '#' not in s.name)
  class_lines, _ = _SymbolDiffHelper('Classes', class_symbols)
  lines = []
  if class_lines:
    lines.extend(class_lines)
    lines.extend(['', ''])  # empty lines added for clarity
  if method_lines:
    lines.extend(method_lines)

  return lines, _SizeDelta('Dex Methods Count', 'methods', max_increase,
                           net_method_added)


def _CreateResourceSizesDelta(before_dir, after_dir, max_increase):
  sizes_diff = diagnose_bloat.ResourceSizesDiff(
      filename='resource_sizes_32.json')
  sizes_diff.ProduceDiff(before_dir, after_dir)

  return sizes_diff.Summary(), _SizeDelta('Normalized APK Size', 'bytes',
                                          max_increase,
                                          sizes_diff.summary_stat.value)


def _CreateBaseModuleResourceSizesDelta(before_dir, after_dir, max_increase):
  sizes_diff = diagnose_bloat.ResourceSizesDiff(
      filename='resource_sizes_32.json', include_sections=['base'])
  sizes_diff.ProduceDiff(before_dir, after_dir)

  return sizes_diff.DetailedResults(), _SizeDelta(
      'Base Module Size', 'bytes', max_increase,
      sizes_diff.CombinedSizeChangeForSection('base'))


def _CreateResourceSizes64Delta(before_dir, after_dir, max_increase):
  sizes_diff = diagnose_bloat.ResourceSizesDiff(
      filename='resource_sizes_64.json')
  sizes_diff.ProduceDiff(before_dir, after_dir)

  # Allow 4x growth of arm64 before blocking CLs.
  return sizes_diff.Summary(), _SizeDelta('Normalized APK Size (arm64)',
                                          'bytes', max_increase * 4,
                                          sizes_diff.summary_stat.value)


def _CreateSupersizeDiff(before_size_path, after_size_path, review_subject,
                         review_url):
  before = archive.LoadAndPostProcessSizeInfo(before_size_path)
  after = archive.LoadAndPostProcessSizeInfo(after_size_path)
  if review_subject:
    after.build_config[models.BUILD_CONFIG_TITLE] = review_subject
  if review_url:
    after.build_config[models.BUILD_CONFIG_URL] = review_url
  delta_size_info = diff.Diff(before, after, sort=True)

  lines = list(describe.GenerateLines(delta_size_info))
  return lines, delta_size_info


def _CreateUncompressedPakSizeDeltas(symbols):
  pak_symbols = symbols.Filter(lambda s:
      s.size > 0 and
      bool(s.flags & models.FLAG_UNCOMPRESSED) and
      s.section_name == models.SECTION_PAK_NONTRANSLATED)
  return [
      _SizeDelta('Uncompressed Pak Entry "{}"'.format(pak.full_name), 'bytes',
                 _MAX_PAK_INCREASE, pak.after_symbol.size)
      for pak in pak_symbols
  ]


def _IsForTestSymbol(value):
  return 'ForTest' in value or 'FOR_TEST' in value


def IterForTestingSymbolsFromMapping(contents):
  current_class_orig = None
  for line in contents.splitlines(keepends=True):
    if line.isspace() or '#' in line:
      continue
    if not line.startswith(' '):
      match = _PROGUARD_CLASS_MAPPING_RE.search(line)
      if match is None:
        raise Exception('Malformed class mapping')
      current_class_orig = match.group('original_name')
      if _IsForTestSymbol(current_class_orig):
        yield current_class_orig
      continue

    assert current_class_orig is not None
    line = line.strip()
    match = _PROGUARD_METHOD_MAPPING_RE.search(line)
    if match:
      method_name = match.group('original_method_name')
      class_name = match.group('original_method_class') or current_class_orig
      if _IsForTestSymbol(method_name) or _IsForTestSymbol(class_name):
        yield f'{class_name}#{method_name}'
      continue

    match = _PROGUARD_FIELD_MAPPING_RE.search(line)
    if match:
      field_name = match.group('original_name')
      if _IsForTestSymbol(field_name) or _IsForTestSymbol(current_class_orig):
        yield f'{current_class_orig}#{field_name}'


def _ExtractForTestingSymbolsFromMappings(mapping_paths):
  symbols = set()
  for mapping_path in mapping_paths:
    with open(mapping_path) as f:
      symbols.update(IterForTestingSymbolsFromMapping(f.read()))
  return symbols


def _CreateTestingSymbolsDeltas(before_mapping_paths, after_mapping_paths):
  before_symbols = _ExtractForTestingSymbolsFromMappings(before_mapping_paths)
  after_symbols = _ExtractForTestingSymbolsFromMappings(after_mapping_paths)
  added_symbols = list(after_symbols.difference(before_symbols))
  removed_symbols = list(before_symbols.difference(after_symbols))
  lines = []
  if added_symbols:
    lines.append('Added Symbols Named "ForTest"')
    lines.extend(added_symbols)
    lines.extend(['', ''])  # empty lines added for clarity
  if removed_symbols:
    lines.append('Removed Symbols Named "ForTest"')
    lines.extend(removed_symbols)
    lines.extend(['', ''])  # empty lines added for clarity
  return lines, _SizeDelta('Added symbols named "ForTest"', 'symbols', 0,
                           len(added_symbols) - len(removed_symbols))


def _GenerateBinarySizePluginDetails(metrics):
  binary_size_listings = []
  for delta, log_name in metrics:
    # Give more friendly names to Normalized APK Size metrics.
    name = delta.name
    if log_name == _RESOURCE_SIZES_LOG:
      # The Gerrit plugin looks for this name to put it in the summary.
      name = 'Android Binary Size'
    elif log_name == _RESOURCE_SIZES_64_LOG:
      name = 'Android Binary Size (arm64 high end) (TrichromeLibrary64.apk)'
    listing = {
        'name': name,
        'delta': '{} {}'.format(_FormatNumber(delta.actual), delta.units),
        'limit': '{} {}'.format(_FormatNumber(delta.expected), delta.units),
        'log_name': log_name,
        'allowed': delta.IsAllowable(),
        'large_improvement': delta.IsLargeImprovement(),
    }
    # Always show the Normalized APK Size.
    if log_name in _MAIN_LOG_NAMES or delta.actual != 0:
      binary_size_listings.append(listing)
  binary_size_listings.sort(key=lambda x: x['name'])

  binary_size_extras = [
      {
          'text': 'APK Breakdown',
          'url': _HTML_REPORT_URL
      },
  ]

  return {
      'listings': binary_size_listings,
      'extras': binary_size_extras,
  }


def _FormatNumber(number):
  # Adds a sign for positive numbers and puts commas in large numbers
  return '{:+,}'.format(number)


# TODO(crbug.com/40256106): If missing and file is x32y, return xy; else
# return original filename. Basically allows comparing x_32 targets with x
# targets built under 32bit target_cpu without failing the script due to
# different file names. Remove once migration is complete.
def _UseAlterantiveIfMissing(path):
  if not os.path.isfile(path):
    parent, name = os.path.split(path)
    path = os.path.join(parent, name.replace('32', '', 1))
  return path


def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--author', required=True, help='CL author')
  parser.add_argument('--review-subject', help='Review subject')
  parser.add_argument('--review-url', help='Review URL')
  parser.add_argument('--size-config-json-name',
                      required=True,
                      help='Filename of JSON with configs for '
                      'binary size measurement.')
  parser.add_argument(
      '--before-dir',
      required=True,
      help='Directory containing the APK from reference build.')
  parser.add_argument(
      '--after-dir',
      required=True,
      help='Directory containing APK for the new build.')
  parser.add_argument(
      '--results-path',
      required=True,
      help='Output path for the trybot result .json file.')
  parser.add_argument(
      '--staging-dir',
      required=True,
      help='Directory to write summary files to.')
  parser.add_argument(
      '--local-test',
      action='store_true',
      help='Allow input directories to be diagnose_bloat.py ones.')
  args = parser.parse_args()

  logging.basicConfig(level=logging.INFO,
                      format='%(levelname).1s %(relativeCreated)6d %(message)s')

  before_path = pathlib.Path(args.before_dir)
  after_path = pathlib.Path(args.after_dir)

  before_path_resolver = lambda p: str(before_path / os.path.basename(p))
  after_path_resolver = lambda p: str(after_path / os.path.basename(p))

  if args.local_test:
    config_path = args.size_config_json_name
  else:
    config_path = after_path_resolver(args.size_config_json_name)

  with open(config_path, 'rt') as fh:
    config = json.load(fh)

  if args.local_test:
    size_filename = 'Trichrome.minimal.apks.size'
  else:
    size_filename = config['supersize_input_file'] + '.size'

  before_mapping_paths = [
      _UseAlterantiveIfMissing(before_path_resolver(f))
      for f in config['mapping_files']
  ]
  after_mapping_paths = [
      _UseAlterantiveIfMissing(after_path_resolver(f))
      for f in config['mapping_files']
  ]

  max_size_increase = _MaxSizeIncrease(args.author, args.review_subject)
  # We do not care as much about method count anymore, so this limit is set
  # such that it is very unlikely to be hit.
  max_methods_increase = 200 if '-autoroll' not in args.author else 800

  logging.info('Creating Supersize diff')
  supersize_diff_lines, delta_size_info = _CreateSupersizeDiff(
      _UseAlterantiveIfMissing(before_path_resolver(size_filename)),
      _UseAlterantiveIfMissing(after_path_resolver(size_filename)),
      args.review_subject, args.review_url)

  changed_symbols = delta_size_info.raw_symbols.WhereDiffStatusIs(
      models.DIFF_STATUS_UNCHANGED).Inverted()

  logging.info('Checking dex symbols')
  dex_delta_lines, dex_delta = _CreateMethodCountDelta(changed_symbols,
                                                       max_methods_increase)
  size_deltas = {dex_delta}
  metrics = {(dex_delta, _DEX_SYMBOLS_LOG)}

  # Look for native symbols called "kConstant" that are not actually constants.
  # C++ syntax makes this an easy mistake, and having symbols in .data uses more
  # RAM than symbols in .rodata (at least for multi-process apps).
  logging.info('Checking for mutable constants in native symbols')
  mutable_constants_lines, mutable_constants_delta = (
      _CreateMutableConstantsDelta(changed_symbols))
  size_deltas.add(mutable_constants_delta)
  metrics.add((mutable_constants_delta, _MUTABLE_CONSTANTS_LOG))

  # Look for symbols with 'ForTest' in their name.
  logging.info('Checking for DEX symbols named "ForTest"')
  testing_symbols_lines, test_symbols_delta = _CreateTestingSymbolsDeltas(
      before_mapping_paths, after_mapping_paths)
  size_deltas.add(test_symbols_delta)
  metrics.add((test_symbols_delta, _FOR_TESTING_LOG))

  # Check for uncompressed .pak file entries being added to avoid unnecessary
  # bloat.
  logging.info('Checking pak symbols')
  size_deltas.update(_CreateUncompressedPakSizeDeltas(changed_symbols))

  # Normalized APK Size is the main metric we use to monitor binary size.
  logging.info('Creating sizes diff')
  resource_sizes_lines, resource_sizes_delta = (_CreateResourceSizesDelta(
      args.before_dir, args.after_dir, max_size_increase))
  size_deltas.add(resource_sizes_delta)
  metrics.add((resource_sizes_delta, _RESOURCE_SIZES_LOG))

  logging.info('Creating base module sizes diff')
  base_resource_sizes_lines, base_resource_sizes_delta = (
      _CreateBaseModuleResourceSizesDelta(args.before_dir, args.after_dir,
                                          max_size_increase))
  size_deltas.add(base_resource_sizes_delta)
  metrics.add((base_resource_sizes_delta, _BASE_RESOURCE_SIZES_LOG))

  config_64 = config.get('to_resource_sizes_py_64')
  if config_64:
    logging.info('Creating 64-bit sizes diff')
    resource_sizes_64_lines, resource_sizes_64_delta = (
        _CreateResourceSizes64Delta(args.before_dir, args.after_dir,
                                    max_size_increase))
    size_deltas.add(resource_sizes_64_delta)
    metrics.add((resource_sizes_64_delta, _RESOURCE_SIZES_64_LOG))

  logging.info('Adding disassembly to dex symbols')
  dex_disassembly.AddDisassembly(delta_size_info, before_path_resolver,
                                 after_path_resolver)
  logging.info('Adding disassembly to native symbols')
  native_disassembly.AddDisassembly(delta_size_info, before_path_resolver,
                                    after_path_resolver)

  # .sizediff can be consumed by the html viewer.
  logging.info('Creating HTML Report')
  sizediff_path = os.path.join(args.staging_dir, _SIZEDIFF_FILENAME)
  file_format.SaveDeltaSizeInfo(delta_size_info, sizediff_path)

  passing_deltas = set(d for d in size_deltas if d.IsAllowable())
  failing_deltas = size_deltas - passing_deltas

  failing_checks_text = '\n'.join(d.explanation for d in sorted(failing_deltas))
  passing_checks_text = '\n'.join(d.explanation for d in sorted(passing_deltas))
  checks_text = """\
FAILING Checks:
{}

PASSING Checks:
{}

To understand what those checks are and how to pass them, see:
{}

""".format(failing_checks_text, passing_checks_text, _TRYBOT_MD_URL)

  status_code = int(bool(failing_deltas))
  see_docs_lines = ['\n', f'For more details: {_TRYBOT_MD_URL}\n']

  summary = '<br>' + checks_text.replace('\n', '<br>')
  links_json = [
      {
          'name': 'Binary Size Details (arm32)',
          'lines': resource_sizes_lines + see_docs_lines,
          'log_name': _RESOURCE_SIZES_LOG,
      },
      {
          'name': 'Base Module Binary Size Details',
          'lines': base_resource_sizes_lines + see_docs_lines,
          'log_name': _BASE_RESOURCE_SIZES_LOG,
      },
      {
          'name': 'Mutable Constants Diff',
          'lines': mutable_constants_lines + see_docs_lines,
          'log_name': _MUTABLE_CONSTANTS_LOG,
      },
      {
          'name': 'ForTest Symbols Diff',
          'lines': testing_symbols_lines + see_docs_lines,
          'log_name': _FOR_TESTING_LOG,
      },
      {
          'name': 'Dex Class and Method Diff',
          'lines': dex_delta_lines + see_docs_lines,
          'log_name': _DEX_SYMBOLS_LOG,
      },
      {
          'name': 'SuperSize Text Diff',
          'lines': supersize_diff_lines,
      },
      {
          'name': 'SuperSize HTML Diff',
          'url': _HTML_REPORT_URL,
      },
  ]
  if config_64:
    links_json[2:2] = [
        {
            'name': 'Binary Size Details (arm64)',
            'lines': resource_sizes_64_lines + see_docs_lines,
            'log_name': _RESOURCE_SIZES_64_LOG,
        },
    ]
  # Remove empty diffs (Mutable Constants, Dex Method, ...).
  links_json = [o for o in links_json if o.get('lines') or o.get('url')]

  binary_size_plugin_json = _GenerateBinarySizePluginDetails(metrics)

  results_json = {
      'status_code': status_code,
      'summary': summary,
      'archive_filenames': [_SIZEDIFF_FILENAME],
      'links': links_json,
      'gerrit_plugin_details': binary_size_plugin_json,
  }

  with open(args.results_path, 'w') as f:
    json.dump(results_json, f)


if __name__ == '__main__':
  main()