chromium/tools/binary_size/libsupersize/archive.py

# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Main Python API for analyzing binary size."""

import argparse
import collections
import dataclasses
import functools
import logging
import os
import posixpath
import re
import shlex
import subprocess
import time
import zipfile

import apk
import apkanalyzer
import archive_util
import data_quality
import describe
import dex_deobfuscate
import dir_metadata
import file_format
import function_signature
import json_config_parser
import models
import native
import pakfile
import parallel
import path_util
import readelf
import zip_util


@dataclasses.dataclass
class NativeSpec:
  # One (or more) of apk_so_path, map_path, elf_path must be non-None.
  # Path within the .apk of the .so file. Non-None only when apk_spec is.
  apk_so_path: str = None
  # Path to linker map file (if present).
  map_path: str = None
  # Path to unstripped ELF file (if present).
  elf_path: str = None
  # Whether to create symbols for each string literal.
  track_string_literals: bool = True
  # component to use for all symbols.
  component: str = None
  # Regular expression that will match generated files.
  gen_dir_regex: str = None
  # source_path prefix to use for all symbols.
  source_path_prefix: str = None

  @property
  def algorithm(self):
    if self.map_path:
      return 'linker_map'
    if self.elf_path:
      return 'dwarf'
    return 'sections'


@dataclasses.dataclass
class PakSpec:
  # One of pak_paths or apk_pak_paths must be non-None.
  pak_paths: list = None
  apk_pak_paths: list = None
  pak_info_path: str = None


@dataclasses.dataclass
class ApkSpec:
  # Path the .apk file. Never None.
  # This is a temp file when .apks is being analyzed.
  apk_path: str
  # Path to .minimal.apks (when analyzing bundles).
  minimal_apks_path: str = None
  # Proguard mapping path.
  mapping_path: str = None
  # Path to the .pathmap.txt file for the apk. Used to deobfuscate res/ files.
  resources_pathmap_path: str = None
  # Name of the apk split when .apks is being analyzed.
  split_name: str = None
  # Path such as: out/Release/size-info/BaseName
  size_info_prefix: str = None
  # Whether to break down classes.dex.
  analyze_dex: bool = True
  # Whether to create symbols for each string literal.
  track_string_literals: bool = True
  # Dict of apk_path -> source_path, provided by json config.
  path_defaults: dict = None
  # Component to use for symbols when not specified by DIR_METADATA, provided by
  # json config.
  default_component: str = ''
  # Paths to not create .other symbols for.
  ignore_apk_paths: set = dataclasses.field(default_factory=set)


@dataclasses.dataclass
class ContainerSpec:
  container_name: str
  apk_spec: ApkSpec
  pak_spec: PakSpec
  native_spec: NativeSpec
  source_directory: str
  output_directory: str


def _NormalizeNames(raw_symbols):
  """Ensures that all names are formatted in a useful way.

  This includes:
    - Deriving |name| and |template_name| from |full_name|.
    - Stripping of return types (for functions).
    - Moving "vtable for" and the like to be suffixes rather than prefixes.
  """
  found_prefixes = set()
  for symbol in raw_symbols:
    full_name = symbol.full_name

    # See comment in _CalculatePadding() about when this can happen. Don't
    # process names for non-native sections.
    if symbol.IsPak():
      # full_name: "about_ui_resources.grdp: IDR_ABOUT_UI_CREDITS_HTML".
      space_idx = full_name.rindex(' ')
      name = full_name[space_idx + 1:]
      symbol.template_name = name
      symbol.name = name
    elif (full_name.startswith('**') or symbol.IsOverhead()
          or symbol.IsOther()):
      symbol.template_name = full_name
      symbol.name = full_name
    elif symbol.IsStringLiteral():  # Handles native and DEX strings.
      symbol.full_name = full_name
      symbol.template_name = full_name
      symbol.name = full_name
    elif symbol.IsDex():
      symbol.full_name, symbol.template_name, symbol.name = (
          function_signature.ParseJava(full_name))
    elif symbol.IsNative():
      # Remove [clone] suffix, and set flag accordingly.
      # Search from left-to-right, as multiple [clone]s can exist.
      # Example name suffixes:
      #     [clone .part.322]  # GCC
      #     [clone .isra.322]  # GCC
      #     [clone .constprop.1064]  # GCC
      #     [clone .11064]  # clang
      # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation
      idx = full_name.find(' [clone ')
      if idx != -1:
        full_name = full_name[:idx]
        symbol.flags |= models.FLAG_CLONE

      # Clones for C symbols.
      if symbol.section == 't':
        idx = full_name.rfind('.')
        if idx != -1 and full_name[idx + 1:].isdigit():
          new_name = full_name[:idx]
          # Generated symbols that end with .123 but are not clones.
          # Find these via:
          # size_info.symbols.WhereInSection('t').WhereIsGroup().SortedByCount()
          if new_name not in ('__tcf_0', 'startup'):
            full_name = new_name
            symbol.flags |= models.FLAG_CLONE
            # Remove .part / .isra / .constprop.
            idx = full_name.rfind('.', 0, idx)
            if idx != -1:
              full_name = full_name[:idx]

      # E.g.: vtable for FOO
      idx = full_name.find(' for ', 0, 30)
      if idx != -1:
        found_prefixes.add(full_name[:idx + 4])
        full_name = '{} [{}]'.format(full_name[idx + 5:], full_name[:idx])

      # E.g.: virtual thunk to FOO
      idx = full_name.find(' to ', 0, 30)
      if idx != -1:
        found_prefixes.add(full_name[:idx + 3])
        full_name = '{} [{}]'.format(full_name[idx + 4:], full_name[:idx])

      # Strip out return type, and split out name, template_name.
      # Function parsing also applies to non-text symbols.
      # E.g. Function statics.
      symbol.full_name, symbol.template_name, symbol.name = (
          function_signature.Parse(full_name))

      # Remove anonymous namespaces (they just harm clustering).
      symbol.template_name = symbol.template_name.replace(
          '(anonymous namespace)::', '')
      symbol.full_name = symbol.full_name.replace(
          '(anonymous namespace)::', '')
      non_anonymous_name = symbol.name.replace('(anonymous namespace)::', '')
      if symbol.name != non_anonymous_name:
        symbol.flags |= models.FLAG_ANONYMOUS
        symbol.name = non_anonymous_name

    # Allow using "is" to compare names (and should help with RAM). This applies
    # to all symbols.
    function_signature.InternSameNames(symbol)

  logging.debug('Found name prefixes of: %r', found_prefixes)


def LoadAndPostProcessSizeInfo(path, file_obj=None):
  """Returns a SizeInfo for the given |path|."""
  logging.debug('Loading results from: %s', path)
  size_info = file_format.LoadSizeInfo(path, file_obj=file_obj)
  logging.info('Normalizing symbol names')
  _NormalizeNames(size_info.raw_symbols)
  logging.info('Loaded %d symbols', len(size_info.raw_symbols))
  return size_info


def LoadAndPostProcessDeltaSizeInfo(path, file_obj=None):
  """Returns a tuple of SizeInfos for the given |path|."""
  logging.debug('Loading results from: %s', path)
  before_size_info, after_size_info, _, _ = (file_format.LoadDeltaSizeInfo(
      path, file_obj=file_obj))
  logging.info('Normalizing symbol names')
  _NormalizeNames(before_size_info.raw_symbols)
  _NormalizeNames(after_size_info.raw_symbols)
  logging.info('Loaded %d + %d symbols', len(before_size_info.raw_symbols),
               len(after_size_info.raw_symbols))
  return before_size_info, after_size_info


def CreateBuildConfig(output_directory, source_directory, url=None, title=None):
  """Creates the dict to use for SizeInfo.build_info."""
  logging.debug('Constructing build_config')
  build_config = {}
  if output_directory:
    gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn'))
    build_config[models.BUILD_CONFIG_GN_ARGS] = gn_args
    build_config[models.BUILD_CONFIG_OUT_DIRECTORY] = os.path.relpath(
        output_directory, start=source_directory)
  git_rev = _DetectGitRevision(source_directory)
  if git_rev:
    build_config[models.BUILD_CONFIG_GIT_REVISION] = git_rev
  if url is not None:
    build_config[models.BUILD_CONFIG_URL] = url
  if title is not None:
    build_config[models.BUILD_CONFIG_TITLE] = title

  return build_config


def _CreateMetadata(container_spec, elf_info):
  logging.debug('Constructing metadata')
  metadata = {}
  apk_spec = container_spec.apk_spec
  native_spec = container_spec.native_spec
  output_directory = container_spec.output_directory

  # Ensure all paths are relative to output directory to make them hermetic.
  if output_directory:
    shorten_path = lambda path: os.path.relpath(path, output_directory)
  else:
    # If output directory is unavailable, just store basenames.
    shorten_path = os.path.basename

  if apk_spec:
    apk_metadata = apk.CreateMetadata(apk_spec=apk_spec,
                                      include_file_details=not native_spec,
                                      shorten_path=shorten_path)
    assert not (metadata.keys() & apk_metadata.keys())
    metadata.update(apk_metadata)

  if native_spec:
    native_metadata = native.CreateMetadata(native_spec=native_spec,
                                            elf_info=elf_info,
                                            shorten_path=shorten_path)
    assert not (metadata.keys() & native_metadata.keys())
    metadata.update(native_metadata)

  logging.debug('Constructing metadata (done)')
  return metadata


def _CreatePakSymbols(*, pak_spec, pak_id_map, apk_spec, output_directory):
  logging.debug('Creating Pak symbols')
  section_ranges = {}
  if apk_spec:
    assert apk_spec.size_info_prefix
    # Can modify |section_ranges|.
    raw_symbols = pakfile.CreatePakSymbolsFromApk(section_ranges,
                                                  apk_spec.apk_path,
                                                  pak_spec.apk_pak_paths,
                                                  apk_spec.size_info_prefix,
                                                  pak_id_map)
  else:
    # Can modify |section_ranges|.
    raw_symbols = pakfile.CreatePakSymbolsFromFiles(section_ranges,
                                                    pak_spec.pak_paths,
                                                    pak_spec.pak_info_path,
                                                    output_directory,
                                                    pak_id_map)
  return section_ranges, raw_symbols


def _CreateContainerSymbols(container_spec, apk_file_manager,
                            apk_analyzer_results, pak_id_map,
                            component_overrides, dex_deobfuscator_cache):
  container_name = container_spec.container_name
  apk_spec = container_spec.apk_spec
  pak_spec = container_spec.pak_spec
  native_spec = container_spec.native_spec
  output_directory = container_spec.output_directory
  source_directory = container_spec.source_directory

  logging.info('Starting on container: %s', container_spec)

  raw_symbols = []
  section_sizes = {}
  metrics_by_file = {}
  default_component = apk_spec.default_component if apk_spec else ''

  def add_syms(section_ranges,
               new_raw_symbols,
               source_path_prefix=None,
               component=None,
               paths_already_normalized=False):
    new_section_sizes = {
        k: size
        for k, (address, size) in section_ranges.items()
    }
    if models.SECTION_OTHER in new_section_sizes:
      section_sizes[models.SECTION_OTHER] = section_sizes.get(
          models.SECTION_OTHER, 0) + new_section_sizes[models.SECTION_OTHER]
      del new_section_sizes[models.SECTION_OTHER]

    assert not (set(section_sizes) & set(new_section_sizes)), (
        'Section collision: {}\n\n {}'.format(section_sizes, new_section_sizes))
    section_sizes.update(new_section_sizes)

    # E.g.: native.CreateSymbols() already calls NormalizePaths().
    if not paths_already_normalized:
      archive_util.NormalizePaths(new_raw_symbols)

    if source_path_prefix:
      # Prefix the source_path for all symbols that have a source_path assigned,
      # and that don't have it set to $APK or $GOOGLE3.
      for s in new_raw_symbols:
        if s.source_path and s.source_path[0] != '$':
          s.source_path = source_path_prefix + s.source_path

    if component is not None:
      for s in new_raw_symbols:
        s.component = component
    else:
      dir_metadata.PopulateComponents(new_raw_symbols,
                                      source_directory,
                                      component_overrides,
                                      default_component=default_component)
    raw_symbols.extend(new_raw_symbols)

  elf_info = None
  if native_spec:
    section_ranges, native_symbols, elf_info, native_metrics_by_file = (
        native.CreateSymbols(apk_spec=apk_spec,
                             native_spec=native_spec,
                             output_directory=output_directory,
                             pak_id_map=pak_id_map))
    add_syms(section_ranges,
             native_symbols,
             source_path_prefix=native_spec.source_path_prefix,
             component=native_spec.component,
             paths_already_normalized=True)
    metrics_by_file.update(native_metrics_by_file)
  elif apk_spec and apk_spec.analyze_dex:
    logging.info('Analyzing DEX')
    apk_infolist = apk_file_manager.InfoList(apk_spec.apk_path)
    dex_total_size = sum(i.file_size for i in apk_infolist
                         if i.filename.endswith('.dex'))
    if dex_total_size > 0:
      mapping_path = apk_spec.mapping_path  # May be None.
      class_deobfuscation_map = (
          dex_deobfuscator_cache.GetForMappingFile(mapping_path))
      section_ranges, dex_symbols, dex_metrics_by_file = (
          apkanalyzer.CreateDexSymbols(apk_spec.apk_path,
                                       apk_analyzer_results[container_name],
                                       dex_total_size, class_deobfuscation_map,
                                       apk_spec.size_info_prefix,
                                       apk_spec.track_string_literals))
      add_syms(section_ranges, dex_symbols)
      metrics_by_file.update(dex_metrics_by_file)

  if pak_spec:
    section_ranges, pak_symbols = _CreatePakSymbols(
        pak_spec=pak_spec,
        pak_id_map=pak_id_map,
        apk_spec=apk_spec,
        output_directory=output_directory)
    add_syms(section_ranges, pak_symbols)
  apk_metadata = {}

  # This function can get called multiple times for the same APK file, to
  # process .so files that are treated as containers. The |not native_spec|
  # condition below skips these cases to prevent redundant symbol creation.
  if not native_spec and apk_spec:
    logging.info('Analyzing ARSC')
    arsc_section_ranges, arsc_symbols, arsc_metrics_by_file = (
        apk.CreateArscSymbols(apk_spec))
    add_syms(arsc_section_ranges, arsc_symbols)
    metrics_by_file.update(arsc_metrics_by_file)

    other_section_ranges, other_symbols, apk_metadata, apk_metrics_by_file = (
        apk.CreateApkOtherSymbols(apk_spec))
    add_syms(other_section_ranges, other_symbols)
    metrics_by_file.update(apk_metrics_by_file)

  metadata = _CreateMetadata(container_spec, elf_info)
  assert not (metadata.keys() & apk_metadata.keys())
  metadata.update(apk_metadata)
  container = models.Container(name=container_name,
                               metadata=metadata,
                               section_sizes=section_sizes,
                               metrics_by_file=metrics_by_file)
  for symbol in raw_symbols:
    symbol.container = container

  return raw_symbols


def _DetectGitRevision(directory):
  """Runs git rev-parse to get the SHA1 hash of the current revision.

  Args:
    directory: Path to directory where rev-parse command will be run.

  Returns:
    A string with the SHA1 hash, or None if an error occured.
  """
  try:
    git_rev = subprocess.check_output(
        ['git', '-C', directory, 'rev-parse', 'HEAD']).decode('ascii')
    return git_rev.rstrip()
  except Exception:
    logging.warning('Failed to detect git revision for file metadata.')
    return None


def _ParseGnArgs(args_path):
  """Returns a list of normalized "key=value" strings."""
  args = {}
  with open(args_path) as f:
    for l in f:
      # Strips #s even if within string literal. Not a problem in practice.
      parts = l.split('#')[0].split('=')
      if len(parts) != 2:
        continue
      args[parts[0].strip()] = parts[1].strip()
  return ["%s=%s" % x for x in sorted(args.items())]


def _AddContainerArguments(parser, is_top_args=False):
  """Add arguments applicable to a single container."""

  # Main file argument: Exactly one should be specified (perhaps via -f).
  # _IdentifyInputFile() should be kept updated.
  group = parser.add_argument_group(title='Main Input')
  group = group.add_mutually_exclusive_group(required=True)
  group.add_argument('-f',
                     metavar='FILE',
                     help='Auto-identify input file type.')
  group.add_argument('--apk-file',
                     help='.apk file to measure. Other flags can generally be '
                     'derived when this is used.')
  group.add_argument('--minimal-apks-file',
                     help='.minimal.apks file to measure. Other flags can '
                     'generally be derived when this is used.')
  group.add_argument('--elf-file', help='Path to input ELF file.')
  group.add_argument('--map-file',
                     help='Path to input .map(.gz) file. Defaults to '
                     '{{elf_file}}.map(.gz)?. If given without '
                     '--elf-file, no size metadata will be recorded.')
  group.add_argument('--pak-file',
                     action='append',
                     default=[],
                     dest='pak_files',
                     help='Paths to pak files.')
  if is_top_args:
    group.add_argument('--ssargs-file',
                       help='Path to SuperSize multi-container arguments file.')

  group = parser.add_argument_group(title='What to Analyze')
  group.add_argument('--java-only',
                     action='store_true',
                     help='Run on only Java symbols')
  group.add_argument('--native-only',
                     action='store_true',
                     help='Run on only native symbols')
  group.add_argument('--no-java',
                     action='store_true',
                     help='Do not run on Java symbols')
  group.add_argument('--no-native',
                     action='store_true',
                     help='Do not run on native symbols')
  if is_top_args:
    group.add_argument('--container-filter',
                       help='Regular expression for which containers to create')

  group = parser.add_argument_group(title='Analysis Options for Native Code')
  group.add_argument('--no-map-file',
                     dest='ignore_linker_map',
                     action='store_true',
                     help='Use debug information to capture symbol sizes '
                     'instead of linker map file.')
  # Used by tests to override path to APK-discovered files.
  group.add_argument('--aux-elf-file', help=argparse.SUPPRESS)
  group.add_argument(
      '--aux-map-file',
      help='Path to linker map to use when --elf-file is provided')

  group = parser.add_argument_group(title='APK options')
  group.add_argument('--mapping-file',
                     help='Proguard .mapping file for deobfuscation.')
  group.add_argument('--resources-pathmap-file',
                     help='.pathmap.txt file that contains a maping from '
                     'original resource paths to shortened resource paths.')
  group.add_argument('--abi-filter',
                     dest='abi_filters',
                     action='append',
                     help='For apks with multiple ABIs, break down native '
                     'libraries for this ABI. Defaults to 64-bit when both '
                     '32 and 64 bit are present.')

  group = parser.add_argument_group(title='Analysis Options for Pak Files')
  group.add_argument('--pak-info-file',
                     help='This file should contain all ids found in the pak '
                     'files that have been passed in. If not specified, '
                     '${pak_file}.info is assumed.')

  group = parser.add_argument_group(title='Analysis Options (shared)')
  group.add_argument('--source-directory',
                     help='Custom path to the root source directory.')
  group.add_argument('--output-directory',
                     help='Path to the root build directory.')
  group.add_argument('--symbols-dir',
                     default='lib.unstripped',
                     help='Relative path containing unstripped .so files '
                     '(for symbols) w.r.t. the output directory.')
  group.add_argument('--no-string-literals',
                     action='store_true',
                     help=('Do not create symbols for string literals '
                           '(applies to DEX and Native).'))
  if is_top_args:
    group.add_argument('--json-config', help='Path to a supersize.json.')
    group.add_argument('--no-output-directory',
                       action='store_true',
                       help='Do not auto-detect --output-directory.')
    group.add_argument('--check-data-quality',
                       action='store_true',
                       help='Perform sanity checks to ensure there is no '
                       'missing data.')


def AddArguments(parser):
  parser.add_argument('size_file', help='Path to output .size file.')
  parser.add_argument('--title',
                      help='Value for the "title" build_config entry.')
  parser.add_argument('--url', help='Value for the "url" build_config entry.')
  _AddContainerArguments(parser, is_top_args=True)


def _IdentifyInputFile(args, on_config_error):
  """Identifies main input file type from |args.f|, and updates |args|.

  Identification is performed on filename alone, i.e., the file need not exist.
  The result is written to a field in |args|. If the field exists then it
  simply gets overwritten.

  If '.' is missing from |args.f| then --elf-file is assumed.

  Returns:
    The primary input file.
"""
  if args.f:
    if args.f.endswith('.minimal.apks'):
      args.minimal_apks_file = args.f
    elif args.f.endswith('.apk'):
      args.apk_file = args.f
    elif args.f.endswith('.so') or '.' not in os.path.basename(args.f):
      args.elf_file = args.f
    elif args.f.endswith('.map') or args.f.endswith('.map.gz'):
      args.map_file = args.f
    elif args.f.endswith('.pak'):
      args.pak_files.append(args.f)
    elif args.f.endswith('.ssargs'):
      # Fails if trying to nest them, which should never happen.
      args.ssargs_file = args.f
    else:
      on_config_error('Cannot identify file ' + args.f)
    args.f = None

  ret = [
      args.apk_file, args.elf_file, args.minimal_apks_file,
      args.__dict__.get('ssargs_file'), args.map_file
  ] + (args.pak_files or [])
  ret = [v for v in ret if v]
  if not ret:
    on_config_error(
        'Must pass at least one of --apk-file, --minimal-apks-file, '
        '--elf-file, --map-file, --pak-file, --ssargs-file')
  return ret[0]


def ParseSsargs(lines):
  """Parses .ssargs data.

  An .ssargs file is a text file to specify multiple containers as input to
  SuperSize-archive. After '#'-based comments, start / end whitespaces, and
  empty lines are stripped, each line specifies a distinct container. Format:
  * Positional argument: |name| for the container.
  * Main input file specified by -f, --apk-file, --elf-file, etc.:
    * Can be an absolute path.
    * Can be a relative path. In this case, it's up to the caller to supply the
      base directory.
    * -f switch must not specify another .ssargs file.
  * For supported switches: See _AddContainerArguments().

  Args:
    lines: An iterator containing lines of .ssargs data.
  Returns:
    A list of arguments, one for each container.
  Raises:
    ValueError: Parse error, including input line number.
  """
  sub_args_list = []
  parser = argparse.ArgumentParser(add_help=False)
  parser.error = lambda msg: (_ for _ in ()).throw(ValueError(msg))
  parser.add_argument('name')
  _AddContainerArguments(parser)
  try:
    for lineno, line in enumerate(lines, 1):
      toks = shlex.split(line, comments=True)
      if not toks:  # Skip if line is empty after stripping comments.
        continue
      sub_args_list.append(parser.parse_args(toks))
  except ValueError as e:
    e.args = ('Line %d: %s' % (lineno, e.args[0]), )
    raise e
  return sub_args_list


def _MakeNativeSpec(json_config, **kwargs):
  native_spec = NativeSpec(**kwargs)
  if native_spec.elf_path or native_spec.map_path:
    basename = os.path.basename(native_spec.elf_path or native_spec.map_path)
    native_spec.component = json_config.ComponentForNativeFile(basename)
    native_spec.gen_dir_regex = json_config.GenDirRegexForNativeFile(basename)
    native_spec.source_path_prefix = json_config.SourcePathPrefixForNativeFile(
        basename)

  if not native_spec.map_path:
    # TODO(crbug.com/40757867): Implement string literal tracking without map
    #     files. nm emits some string literal symbols, but most are missing.
    native_spec.track_string_literals = False
    return native_spec

  return native_spec


def _ElfIsMainPartition(elf_path):
  section_ranges = readelf.SectionInfoFromElf(elf_path)
  return models.SECTION_PART_END in section_ranges.keys()


def _DeduceMapPath(elf_path):
  if _ElfIsMainPartition(elf_path):
    map_path = elf_path.replace('.so', '__combined.so') + '.map'
  else:
    map_path = elf_path + '.map'
  if not os.path.exists(map_path):
    map_path += '.gz'
    if not os.path.exists(map_path):
      map_path = None

  if map_path:
    logging.debug('Detected map_path=%s', map_path)
  return map_path


def _CreateNativeSpecs(*, tentative_output_dir, symbols_dir, apk_infolist,
                       elf_path, map_path, abi_filters, auto_abi_filters,
                       track_string_literals, ignore_linker_map, json_config,
                       on_config_error):
  if ignore_linker_map:
    map_path = None
  elif (map_path and not map_path.endswith('.map')
        and not map_path.endswith('.map.gz')):
    on_config_error('Expected --map-file to end with .map or .map.gz')
  elif elf_path and not map_path:
    map_path = _DeduceMapPath(elf_path)

  ret = []
  # if --elf-path or --map-path (rather than --aux-elf-path, --aux-map-path):
  if not apk_infolist:
    if map_path or elf_path:
      ret.append(
          _MakeNativeSpec(json_config,
                          apk_so_path=None,
                          map_path=map_path,
                          elf_path=elf_path,
                          track_string_literals=track_string_literals))
    return abi_filters, ret

  lib_infos = [
      f for f in apk_infolist if f.filename.endswith('.so') and f.file_size > 0
  ]

  # Sort so elf_path/map_path applies largest non-filtered library.
  matches_abi = lambda n: not abi_filters or any(f in n for f in abi_filters)
  lib_infos.sort(key=lambda x: (not matches_abi(x.filename), -x.file_size))

  for lib_info in lib_infos:
    apk_so_path = lib_info.filename
    cur_elf_path = None
    cur_map_path = None
    if not matches_abi(apk_so_path):
      logging.debug('Not breaking down %s: secondary ABI', apk_so_path)
    elif apk_so_path.endswith('_partition.so'):
      # TODO(agrieve): Support symbol breakdowns for partitions (they exist in
      #     the __combined .map file. Debug information (nm output) is shared
      #     with base partition.
      logging.debug('Not breaking down %s: partitioned library', apk_so_path)
    else:
      if elf_path:
        # Consume --aux-elf-file for the largest matching binary.
        cur_elf_path = elf_path
        elf_path = None
      elif tentative_output_dir:
        # TODO(crbug.com/40229168): Remove handling the legacy library prefix
        # 'crazy.' when there is no longer interest in size comparisons for
        # these pre-N APKs.
        cur_elf_path = os.path.join(
            tentative_output_dir, symbols_dir,
            posixpath.basename(apk_so_path.replace('crazy.', '')))
        if os.path.exists(cur_elf_path):
          logging.debug('Detected elf_path=%s', cur_elf_path)
        else:
          # TODO(agrieve): Not able to find libcrashpad_handler_trampoline.so.
          logging.debug('Not breaking down %s because file does not exist: %s',
                        apk_so_path, cur_elf_path)
          cur_elf_path = None

      if map_path:
        # Consume --aux-map-file for first non-skipped elf.
        cur_map_path = map_path
        map_path = None
      elif cur_elf_path and not ignore_linker_map:
        cur_map_path = _DeduceMapPath(cur_elf_path)

      if auto_abi_filters:
        abi_filters = [posixpath.basename(posixpath.dirname(apk_so_path))]
        logging.info('Detected --abi-filter %s', abi_filters[0])
        auto_abi_filters = False

    ret.append(
        _MakeNativeSpec(json_config,
                        apk_so_path=apk_so_path,
                        map_path=cur_map_path,
                        elf_path=cur_elf_path,
                        track_string_literals=track_string_literals))

  return abi_filters, ret


# Cache to prevent excess log messages.
@functools.lru_cache
def _DeduceMappingPath(mapping_path, apk_prefix):
  if apk_prefix:
    if not mapping_path:
      possible_mapping_path = apk_prefix + '.mapping'
      if os.path.exists(possible_mapping_path):
        mapping_path = possible_mapping_path
        logging.debug('Detected --mapping-file=%s', mapping_path)
      else:
        logging.warning('Could not find proguard mapping file at %s',
                        possible_mapping_path)
  return mapping_path


# Cache to prevent excess log messages.
@functools.lru_cache
def _DeducePathmapPath(resources_pathmap_path, apk_prefix):
  if apk_prefix:
    if not resources_pathmap_path:
      possible_pathmap_path = apk_prefix + '.pathmap.txt'
      # This could be pointing to a stale pathmap file if path shortening was
      # previously enabled but is disabled for the current build. However, since
      # current apk/aab will have unshortened paths, looking those paths up in
      # the stale pathmap which is keyed by shortened paths would not find any
      # mapping and thus should not cause any issues.
      if os.path.exists(possible_pathmap_path):
        resources_pathmap_path = possible_pathmap_path
        logging.debug('Detected --resources-pathmap-file=%s',
                      resources_pathmap_path)
      # Path shortening is optional, so do not warn for missing file.
  return resources_pathmap_path


def _ReadMultipleArgsFromStream(lines, base_dir, err_prefix, on_config_error):
  try:
    ret = ParseSsargs(lines)
  except ValueError as e:
    on_config_error('%s: %s' % (err_prefix, e.args[0]))
  for sub_args in ret:
    for k, v in sub_args.__dict__.items():
      # Translate file arguments to be relative to |sub_dir|.
      if (k.endswith('_file') or k == 'f') and isinstance(v, str):
        sub_args.__dict__[k] = os.path.join(base_dir, v)
  return ret


def _ReadMultipleArgsFromFile(ssargs_file, on_config_error):
  with open(ssargs_file, 'r') as fh:
    lines = list(fh)
  err_prefix = 'In file ' + ssargs_file
  # Supply |base_dir| as the directory containing the .ssargs file, to ensure
  # consistent behavior wherever SuperSize-archive runs.
  base_dir = os.path.dirname(os.path.abspath(ssargs_file))
  return _ReadMultipleArgsFromStream(lines, base_dir, err_prefix,
                                     on_config_error)


# Both |top_args| and |sub_args| may be modified.
def _CreateContainerSpecs(apk_file_manager,
                          top_args,
                          sub_args,
                          json_config,
                          base_container_name,
                          on_config_error,
                          split_name=None):
  sub_args.source_directory = (sub_args.source_directory
                               or top_args.source_directory)
  sub_args.output_directory = (sub_args.output_directory
                               or top_args.output_directory)
  analyze_native = not (sub_args.java_only or sub_args.no_native
                        or top_args.java_only or top_args.no_native)
  analyze_dex = not (sub_args.native_only or sub_args.no_java
                     or top_args.native_only or top_args.no_java)

  if split_name:
    apk_path = apk_file_manager.SplitPath(sub_args.minimal_apks_file,
                                          split_name)
    base_container_name = f'{base_container_name}/{split_name}.apk'
    # Make on-demand a part of the name so that:
    # * It's obvious from the name which DFMs are on-demand.
    # * Diffs that change an on-demand status show as adds/removes.
    if _IsOnDemand(apk_path):
      base_container_name += '?'
  else:
    apk_path = sub_args.apk_file

  apk_prefix = sub_args.minimal_apks_file or sub_args.apk_file
  if apk_prefix:
    # Allow either .minimal.apks or just .apks.
    apk_prefix = apk_prefix.replace('.minimal.apks', '.aab')
    apk_prefix = apk_prefix.replace('.apks', '.aab')

  mapping_path = None
  if analyze_dex:
    mapping_path = _DeduceMappingPath(sub_args.mapping_file, apk_prefix)
  resources_pathmap_path = _DeducePathmapPath(sub_args.resources_pathmap_file,
                                              apk_prefix)
  apk_spec = None
  if apk_prefix:
    apk_spec = ApkSpec(apk_path=apk_path,
                       minimal_apks_path=sub_args.minimal_apks_file,
                       mapping_path=mapping_path,
                       resources_pathmap_path=resources_pathmap_path,
                       split_name=split_name)
    if top_args.output_directory:
      apk_spec.size_info_prefix = os.path.join(top_args.output_directory,
                                               'size-info',
                                               os.path.basename(apk_prefix))
    apk_spec.analyze_dex = analyze_dex
    apk_spec.track_string_literals = not (top_args.no_string_literals
                                          or sub_args.no_string_literals)
    apk_spec.default_component = json_config.DefaultComponentForSplit(
        split_name)
    apk_spec.path_defaults = json_config.ApkPathDefaults()

  pak_spec = None
  apk_pak_paths = None
  apk_infolist = None
  if apk_spec:
    apk_infolist = apk_file_manager.InfoList(apk_path)
    apk_pak_paths = [
        f.filename for f in apk_infolist
        if archive_util.RemoveAssetSuffix(f.filename).endswith('.pak')
    ]
  if not top_args.no_output_directory and (apk_pak_paths or sub_args.pak_files):
    pak_spec = PakSpec(pak_paths=sub_args.pak_files,
                       pak_info_path=sub_args.pak_info_file,
                       apk_pak_paths=apk_pak_paths)

  if analyze_native:
    # Allow top-level --abi-filter to override values set in .ssargs.
    abi_filters = top_args.abi_filters or sub_args.abi_filters
    aux_elf_file = sub_args.aux_elf_file
    aux_map_file = sub_args.aux_map_file
    if split_name not in (None, 'base'):
      aux_elf_file = None
      aux_map_file = None

    auto_abi_filters = not abi_filters and split_name == 'base'
    abi_filters, native_specs = _CreateNativeSpecs(
        tentative_output_dir=top_args.output_directory,
        symbols_dir=sub_args.symbols_dir,
        apk_infolist=apk_infolist,
        elf_path=sub_args.elf_file or aux_elf_file,
        map_path=sub_args.map_file or aux_map_file,
        abi_filters=abi_filters,
        auto_abi_filters=auto_abi_filters,
        track_string_literals=not (top_args.no_string_literals
                                   or sub_args.no_string_literals),
        ignore_linker_map=(top_args.ignore_linker_map
                           or sub_args.ignore_linker_map),
        json_config=json_config,
        on_config_error=on_config_error)

    # For app bundles, use a consistent ABI for all splits.
    if auto_abi_filters:
      top_args.abi_filters = abi_filters
  else:
    native_specs = []

  ret = [
      ContainerSpec(container_name=base_container_name,
                    apk_spec=apk_spec,
                    pak_spec=pak_spec,
                    native_spec=None,
                    source_directory=sub_args.source_directory,
                    output_directory=sub_args.output_directory)
  ]
  if apk_spec is None:
    # Special case for when pointed at a single ELF, use just one container.
    assert len(native_specs) <= 1
    ret[0].native_spec = native_specs[0] if native_specs else None
  else:
    apk_spec.ignore_apk_paths.update(s.apk_so_path for s in native_specs)
    if pak_spec and pak_spec.apk_pak_paths:
      apk_spec.ignore_apk_paths.update(pak_spec.apk_pak_paths)
    if apk_spec.analyze_dex:
      apk_spec.ignore_apk_paths.update(i.filename for i in apk_infolist
                                       if i.filename.endswith('.dex'))
    apk_spec.ignore_apk_paths.add(apk.RESOURCES_ARSC_FILE)

    for native_spec in native_specs:
      so_name = posixpath.basename(native_spec.apk_so_path)
      abi = posixpath.basename(posixpath.dirname(native_spec.apk_so_path))
      container_name = f'{base_container_name}/{so_name} ({abi})'
      # Use same apk_spec so that all containers for the apk_spec can be found.
      ret.append(
          ContainerSpec(container_name=container_name,
                        apk_spec=apk_spec,
                        pak_spec=None,
                        native_spec=native_spec,
                        source_directory=sub_args.source_directory,
                        output_directory=sub_args.output_directory))
  return ret


def _IsOnDemand(apk_path):
  # Check if the manifest specifies whether or not to extract native libs.
  output = subprocess.check_output([
      path_util.GetAapt2Path(), 'dump', 'xmltree', '--file',
      'AndroidManifest.xml', apk_path
  ]).decode('ascii')

  def parse_attr(namespace, name):
    # A: http://schemas.android.com/apk/res/android:isFeatureSplit(0x...)=true
    # A: http://schemas.android.com/apk/distribution:onDemand=true
    m = re.search(f'A: (?:.*?/{namespace}:)?{name}' + r'(?:\(.*?\))?=(\w+)',
                  output)
    return m and m.group(1) == 'true'

  is_feature_split = parse_attr('android', 'isFeatureSplit')
  # Can use <dist:on-demand>, or <module dist:onDemand="true">.
  on_demand = parse_attr('distribution', 'onDemand') or 'on-demand' in output
  on_demand = bool(on_demand and is_feature_split)

  return on_demand


def _CreateAllContainerSpecs(apk_file_manager, top_args, json_config,
                             on_config_error):
  main_file = _IdentifyInputFile(top_args, on_config_error)
  if top_args.no_output_directory:
    top_args.output_directory = None
  else:
    output_directory_finder = path_util.OutputDirectoryFinder(
        value=top_args.output_directory,
        any_path_within_output_directory=main_file)
    top_args.output_directory = output_directory_finder.Finalized()

  if not top_args.source_directory:
    top_args.source_directory = path_util.GetSrcRootFromOutputDirectory(
        top_args.output_directory)
    assert top_args.source_directory

  if top_args.ssargs_file:
    sub_args_list = _ReadMultipleArgsFromFile(top_args.ssargs_file,
                                              on_config_error)
  else:
    sub_args_list = [top_args]

  # Do a quick first pass to ensure inputs have been built.
  for sub_args in sub_args_list:
    main_file = _IdentifyInputFile(sub_args, on_config_error)
    if not os.path.exists(main_file):
      raise Exception('Input does not exist: ' + main_file)

  # Each element in |sub_args_list| specifies a container.
  ret = []
  for sub_args in sub_args_list:
    main_file = _IdentifyInputFile(sub_args, on_config_error)
    if hasattr(sub_args, 'name'):
      container_name = sub_args.name
    else:
      container_name = os.path.basename(main_file)
    if set(container_name) & set('<>?'):
      parser.error('Container name cannot have characters in "<>?"')


    if sub_args.minimal_apks_file:
      split_names = apk_file_manager.ExtractSplits(sub_args.minimal_apks_file)
      for split_name in split_names:
        ret += _CreateContainerSpecs(apk_file_manager,
                                     top_args,
                                     sub_args,
                                     json_config,
                                     container_name,
                                     on_config_error,
                                     split_name=split_name)
    else:
      ret += _CreateContainerSpecs(apk_file_manager, top_args, sub_args,
                                   json_config, container_name, on_config_error)
  all_names = [c.container_name for c in ret]
  assert len(set(all_names)) == len(all_names), \
      'Found duplicate container names: ' + '\n'.join(sorted(all_names))

  return ret


def _FilterContainerSpecs(container_specs, container_re=None):
  ret = []
  seen_container_names = set()
  for container_spec in container_specs:
    container_name = container_spec.container_name
    if container_name in seen_container_names:
      raise ValueError('Duplicate container name: {}'.format(container_name))
    seen_container_names.add(container_name)

    if container_re and not container_re.search(container_name):
      logging.info('Skipping filtered container %s', container_name)
      continue
    ret.append(container_spec)
  return ret


def CreateSizeInfo(container_specs, build_config, json_config,
                   apk_file_manager):
  def sort_key(container_spec):
    # Native containers come first to ensure pak_id_map is populated before
    # any pak_spec is encountered.
    if container_spec.native_spec:
      # Do the most complicated container first, since its most likely to fail.
      if container_spec.native_spec.algorithm == 'linker_map':
        native_key = 0
      elif container_spec.native_spec.algorithm == 'dwarf':
        native_key = 1
      else:
        native_key = 2
    else:
      native_key = 3
    return (native_key, container_spec.container_name)

  container_specs.sort(key=sort_key)

  dex_containers = [
      c for c in container_specs
      if not c.native_spec and c.apk_spec and c.apk_spec.analyze_dex
  ]
  # Running ApkAnalyzer concurrently saves ~30 seconds for Monochrome.apks.
  logging.info('Kicking of ApkAnalyzer for %d .apk files', len(dex_containers))
  apk_analyzer_results = {}
  for container_spec in dex_containers:
    apk_analyzer_results[container_spec.container_name] = (
        apkanalyzer.RunApkAnalyzerAsync(container_spec.apk_spec.apk_path,
                                        container_spec.apk_spec.mapping_path))

  raw_symbols_list = []
  pak_id_map = pakfile.PakIdMap()
  dex_deobfuscator_cache = dex_deobfuscate.CachedDexDeobfuscators()
  for container_spec in container_specs:
    raw_symbols = _CreateContainerSymbols(container_spec, apk_file_manager,
                                          apk_analyzer_results, pak_id_map,
                                          json_config.ComponentOverrides(),
                                          dex_deobfuscator_cache)
    assert raw_symbols, f'{container_spec.container_name} had no symbols.'
    raw_symbols_list.append(raw_symbols)

  # Normalize names before sorting.
  logging.info('Normalizing symbol names')
  for raw_symbols in raw_symbols_list:
    _NormalizeNames(raw_symbols)

  # Sorting must happen after normalization.
  logging.info('Sorting symbols')
  for raw_symbols in raw_symbols_list:
    file_format.SortSymbols(raw_symbols)

  logging.debug('Accumulating symbols')
  # Containers should always have at least one symbol.
  container_list = [syms[0].container for syms in raw_symbols_list]
  all_raw_symbols = []
  for raw_symbols in raw_symbols_list:
    all_raw_symbols += raw_symbols

  file_format.CalculatePadding(all_raw_symbols)

  return models.SizeInfo(build_config, container_list, all_raw_symbols)


def Run(top_args, on_config_error):
  path_util.CheckLlvmToolsAvailable()

  if not top_args.size_file.endswith('.size'):
    on_config_error('size_file must end with .size')
  if top_args.check_data_quality:
    start_time = time.time()

  container_re = None
  if top_args.container_filter:
    try:
      container_re = re.compile(top_args.container_filter)
    except Exception as e:
      on_config_error(f'Bad --container-filter input: {e}')

  json_config_path = top_args.json_config
  if not json_config_path:
    json_config_path = path_util.GetDefaultJsonConfigPath()
    logging.info('Using --json-config=%s', json_config_path)
  json_config = json_config_parser.Parse(json_config_path, on_config_error)

  with zip_util.ApkFileManager() as apk_file_manager:
    container_specs = _CreateAllContainerSpecs(apk_file_manager, top_args,
                                               json_config, on_config_error)
    container_specs = _FilterContainerSpecs(container_specs, container_re)

    build_config = CreateBuildConfig(top_args.output_directory,
                                     top_args.source_directory,
                                     url=top_args.url,
                                     title=top_args.title)
    size_info = CreateSizeInfo(container_specs, build_config, json_config,
                               apk_file_manager)

  if logging.getLogger().isEnabledFor(logging.DEBUG):
    for line in data_quality.DescribeSizeInfoCoverage(size_info):
      logging.debug(line)
  logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
  for container in size_info.containers:
    logging.info('Recording metadata: \n  %s',
                 '\n  '.join(describe.DescribeDict(container.metadata)))

  logging.info('Saving result to %s', top_args.size_file)
  file_format.SaveSizeInfo(size_info, top_args.size_file)
  size_in_mb = os.path.getsize(top_args.size_file) / 1024.0 / 1024.0
  logging.info('Done. File size is %.2fMiB.', size_in_mb)

  if top_args.check_data_quality:
    logging.info('Checking data quality')
    data_quality.CheckDataQuality(size_info, not top_args.no_string_literals)
    duration = (time.time() - start_time) / 60
    if duration > 10:
      raise data_quality.QualityCheckError(
          'Command should not take longer than 10 minutes.'
          ' Took {:.1f} minutes.'.format(duration))