chromium/third_party/puffin/scripts/measure_patch_size.py

#!/usr/bin/env python3
#
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#

"""A tool for running diffing tools and measuring patch sizes."""

import argparse
import logging
import os
import subprocess
import sys
import tempfile


class Error(Exception):
  """Puffin general processing error."""


def ParseArguments(argv):
  """Parses and Validates command line arguments.

  Args:
    argv: command line arguments to parse.

  Returns:
    The arguments list.
  """
  parser = argparse.ArgumentParser()

  parser.add_argument('--src-corpus', metavar='DIR',
                      help='The source corpus directory with compressed files.')
  parser.add_argument('--tgt-corpus', metavar='DIR',
                      help='The target corpus directory with compressed files.')
  parser.add_argument('--debug', action='store_true',
                      help='Turns on verbosity.')

  # Parse command-line arguments.
  args = parser.parse_args(argv)

  for corpus in (args.src_corpus, args.tgt_corpus):
    if not corpus or not os.path.isdir(corpus):
      raise Error('Corpus directory {} is non-existent or inaccesible'
                  .format(corpus))
  return args


def main(argv):
  """The main function."""
  args = ParseArguments(argv[1:])

  if args.debug:
    logging.getLogger().setLevel(logging.DEBUG)

  # Construct list of appropriate files.
  src_files = list(filter(os.path.isfile,
                          [os.path.join(args.src_corpus, f)
                           for f in os.listdir(args.src_corpus)]))
  tgt_files = list(filter(os.path.isfile,
                          [os.path.join(args.tgt_corpus, f)
                           for f in os.listdir(args.tgt_corpus)]))

  # Check if all files in src_files have a target file in tgt_files.
  files_mismatch = (set(map(os.path.basename, src_files)) -
                    set(map(os.path.basename, tgt_files)))
  if files_mismatch:
    raise Error('Target files {} do not exist in corpus: {}'
                .format(files_mismatch, args.tgt_corpus))

  for src in src_files:
    with tempfile.NamedTemporaryFile() as puffdiff_patch, \
         tempfile.NamedTemporaryFile() as bsdiff_patch:

      tgt = os.path.join(args.tgt_corpus, os.path.basename(src))

      operation = 'puffdiff'
      cmd = ['puffin',
             '--operation={}'.format(operation),
             '--src_file={}'.format(src),
             '--dst_file={}'.format(tgt),
             '--patch_file={}'.format(puffdiff_patch.name)]
      # Running the puffdiff operation
      if subprocess.call(cmd) != 0:
        raise Error('Puffin failed to do {} command: {}'
                    .format(operation, cmd))

      operation = 'bsdiff'
      cmd = ['bsdiff', '--type', 'bz2', src, tgt, bsdiff_patch.name]
      # Running the bsdiff operation
      if subprocess.call(cmd) != 0:
        raise Error('Failed to do {} command: {}'
                    .format(operation, cmd))

      logging.debug('%s(%d -> %d) : bsdiff(%d), puffdiff(%d)',
                    os.path.basename(src),
                    os.stat(src).st_size, os.stat(tgt).st_size,
                    os.stat(bsdiff_patch.name).st_size,
                    os.stat(puffdiff_patch.name).st_size)

  return 0


if __name__ == '__main__':
  sys.exit(main(sys.argv))