#!/usr/bin/env python3
#
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
"""A tool for running diffing tools and measuring patch sizes."""
import argparse
import logging
import os
import subprocess
import sys
import tempfile
class Error(Exception):
"""Puffin general processing error."""
def ParseArguments(argv):
"""Parses and Validates command line arguments.
Args:
argv: command line arguments to parse.
Returns:
The arguments list.
"""
parser = argparse.ArgumentParser()
parser.add_argument('--src-corpus', metavar='DIR',
help='The source corpus directory with compressed files.')
parser.add_argument('--tgt-corpus', metavar='DIR',
help='The target corpus directory with compressed files.')
parser.add_argument('--debug', action='store_true',
help='Turns on verbosity.')
# Parse command-line arguments.
args = parser.parse_args(argv)
for corpus in (args.src_corpus, args.tgt_corpus):
if not corpus or not os.path.isdir(corpus):
raise Error('Corpus directory {} is non-existent or inaccesible'
.format(corpus))
return args
def main(argv):
"""The main function."""
args = ParseArguments(argv[1:])
if args.debug:
logging.getLogger().setLevel(logging.DEBUG)
# Construct list of appropriate files.
src_files = list(filter(os.path.isfile,
[os.path.join(args.src_corpus, f)
for f in os.listdir(args.src_corpus)]))
tgt_files = list(filter(os.path.isfile,
[os.path.join(args.tgt_corpus, f)
for f in os.listdir(args.tgt_corpus)]))
# Check if all files in src_files have a target file in tgt_files.
files_mismatch = (set(map(os.path.basename, src_files)) -
set(map(os.path.basename, tgt_files)))
if files_mismatch:
raise Error('Target files {} do not exist in corpus: {}'
.format(files_mismatch, args.tgt_corpus))
for src in src_files:
with tempfile.NamedTemporaryFile() as puffdiff_patch, \
tempfile.NamedTemporaryFile() as bsdiff_patch:
tgt = os.path.join(args.tgt_corpus, os.path.basename(src))
operation = 'puffdiff'
cmd = ['puffin',
'--operation={}'.format(operation),
'--src_file={}'.format(src),
'--dst_file={}'.format(tgt),
'--patch_file={}'.format(puffdiff_patch.name)]
# Running the puffdiff operation
if subprocess.call(cmd) != 0:
raise Error('Puffin failed to do {} command: {}'
.format(operation, cmd))
operation = 'bsdiff'
cmd = ['bsdiff', '--type', 'bz2', src, tgt, bsdiff_patch.name]
# Running the bsdiff operation
if subprocess.call(cmd) != 0:
raise Error('Failed to do {} command: {}'
.format(operation, cmd))
logging.debug('%s(%d -> %d) : bsdiff(%d), puffdiff(%d)',
os.path.basename(src),
os.stat(src).st_size, os.stat(tgt).st_size,
os.stat(bsdiff_patch.name).st_size,
os.stat(puffdiff_patch.name).st_size)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))