#!/usr/bin/env python3
# Copyright 2019 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Prints the large commits given a .csv file from a telemetry size graph."""
import argparse
import re
import subprocess
# Commit ranges where perf bot was giving invalid results.
# Range objects implement __contains__ for fast "in" operators.
_BAD_COMMIT_RANGES = [
range(1045024, 1045552), # https://crbug.com/1361952
]
def _ReadCsv(path):
"""Returns the contents of the .csv as a list of (int, int)."""
ret = []
with open(path) as f:
for line in f:
parts = line.rstrip().split(',')
if len(parts) == 2 and parts[0] != 'revision':
ret.append((int(parts[0]), int(float(parts[1]))))
return ret
def _FindBigDeltas(revs_and_sizes, increase_threshold, decrease_threshold):
"""Filters revs_and_sizes for entries that grow/shrink too much."""
big_jumps = []
prev_rev, prev_size = revs_and_sizes[0]
for rev, size in revs_and_sizes:
delta = size - prev_size
if delta > increase_threshold or -delta > decrease_threshold:
big_jumps.append((rev, delta, prev_rev))
prev_rev = rev
prev_size = size
return big_jumps
def _LookupCommitInfo(rev):
sha1 = subprocess.check_output(
['git', 'crrev-parse', str(rev)], encoding="utf-8").strip()
if not sha1:
raise Exception(f'git crrev-parse for {rev} failed. Probably need to '
f'"git fetch origin main"')
desc = subprocess.check_output(['git', 'log', '-n1', sha1], encoding="utf-8")
author = re.search(r'Author: .*?<(.*?)>', desc).group(1)
day, year = re.search(r'Date:\s+\w+\s+(\w+ \d+)\s+.*?\s+(\d+)', desc).groups()
date = '{} {}'.format(day, year)
title = re.search(r'\n +(\S.*)', desc).group(1).replace('\t', ' ')
return sha1, author, date, title
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--increase-threshold',
type=int,
default=30 * 1024,
help='Minimum number of bytes larger to be considered a notable.')
parser.add_argument(
'--decrease-threshold',
type=int,
default=30 * 1024,
help='Minimum number of bytes smaller to be considered a notable.')
parser.add_argument(
'points_csv', help='Input .csv file with columns: revision,value')
options = parser.parse_args()
revs_and_sizes = _ReadCsv(options.points_csv)
big_deltas = _FindBigDeltas(revs_and_sizes, options.increase_threshold,
options.decrease_threshold)
print('Printing info for up to {} commits in the range {}-{}'.format(
len(big_deltas), revs_and_sizes[0][0], revs_and_sizes[-1][0]))
print('Revision,Hash,Title,Author,Delta,Date')
num_bad_commits = 0
for rev, delta, prev_rev in big_deltas:
if any(rev in r for r in _BAD_COMMIT_RANGES):
num_bad_commits += 1
continue
sha1, author, date, title = _LookupCommitInfo(rev)
rev_str = str(rev)
if rev - prev_rev > 1:
rev_str = f'{prev_rev}..{rev}'
print('\t'.join([rev_str, sha1, title, author, str(delta), date]))
if num_bad_commits:
print(f'Ignored {num_bad_commits} commits from bad ranges')
if __name__ == '__main__':
main()