#!/usr/bin/env python3
# Copyright 2020 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generates stats on modularization efforts. Stats include:
- Percentage of added lines in modularized files over legacy ones.
- The top 50 contributors to the modularized files.
"""
import argparse
import datetime
import json
import os
import subprocess
import sys
from collections import OrderedDict
from collections import defaultdict
from typing import List, Tuple
# Output json keys
KEY_LOC_MODULARIZED = 'loc_modularized'
KEY_LOC_LEGACY = 'loc_legacy'
KEY_RANKINGS_MODULARIZED = 'rankings'
KEY_RANKINGS_LEGACY = 'rankings_legacy'
KEY_START_DATE = 'start_date'
KEY_END_DATE = 'end_date'
_M12N_DIRS = [
'chrome/browser',
'components',
]
_LEGACY_DIR = 'chrome/android'
def GenerateLOCStats(start_date,
end_date,
*,
quiet=False,
json_format=False,
git_dir=None):
"""Generate modulazation LOC stats.
Args:
start_date: The date to analyze the stat from.
end_date: The date to analyze the stat to.
quiet: True if no message is output during the processing.
json_format: True if the output should be in json format. Otherwise
a plain, human-readable table is generated.
git_dir: Git repo directory to use for stats. If None, the current directory
is used.
Return:
Text string containing the stat in a specified format.
"""
# Each CL is output in the following format:
#
# :thanhdng:2020-08-17:Use vector icons for zero state file results
#
# 118 98 chromeos/ui/base/file_icon_util.cc
# 2 1 chromeos/ui/base/file_icon_util.h
# 0 20 chromeos/ui/base/file_icon_util_unittest.cc
#
# i.e.:
#
# :author:commit-date:subject
#
# added-lines deleted-lines file-path1
# added-lines deleted-lines file-path2
# ...
repo_dir = git_dir or os.getcwd()
command = [
'git', '-C', repo_dir, 'log', '--numstat', '--no-renames',
'--format=#:%al:%cs:%s', '--after=' + start_date, '--before=' + end_date,
'chrome', 'components'
]
try:
proc = subprocess.Popen(
command,
bufsize=1, # buffered mode
stdout=subprocess.PIPE,
universal_newlines=True)
except subprocess.SubprocessError as e:
print(f'{command} failed with code {e.returncode}.', file=sys.stderr)
print(f'\nSTDERR: {e.stderr}', file=sys.stderr)
print(f'\nSTDOUT: {e.stdout}', file=sys.stderr)
raise
author_stat_m12n = defaultdict(int)
author_stat_legacy = defaultdict(int)
total_m12n = 0
total_legacy = 0
prev_msg_len = 0
revert_cl = False
for raw_line in proc.stdout:
if raw_line.isspace():
continue
line = raw_line.strip()
if line.startswith('#'): # patch summary line
_, author, commit_date, *subject = line.split(':', 4)
revert_cl = (subject[0].startswith('Revert')
or subject[0].startswith('Reland'))
else:
if revert_cl or not line.endswith('.java'):
continue
# Do not take into account the number of deleted lines, which can
# turn the overall changes to negative. If a class was renamed,
# for instance, what's deleted is added somewhere else, so counting
# only for addition works. Other kinds of deletion will be ignored.
added, _deleted, path = line.split()
diff = int(added)
if _is_m12n_path(path):
total_m12n += diff
author_stat_m12n[author] += diff
elif _is_legacy_path(path):
total_legacy += diff
author_stat_legacy[author] += diff
msg = f'\rProcessing {commit_date} by {author}'
if not quiet: _print_progress(msg, prev_msg_len)
prev_msg_len = len(msg)
if not quiet:
_print_progress('Processing complete', prev_msg_len)
print('\n')
rankings_modularized = OrderedDict(
sorted(author_stat_m12n.items(), key=lambda x: x[1], reverse=True))
rankings_legacy = OrderedDict(
sorted(author_stat_legacy.items(), key=lambda x: x[1], reverse=True))
if json_format:
return json.dumps({
KEY_LOC_MODULARIZED: total_m12n,
KEY_LOC_LEGACY: total_legacy,
KEY_RANKINGS_MODULARIZED: rankings_modularized,
KEY_RANKINGS_LEGACY: rankings_legacy,
KEY_START_DATE: start_date,
KEY_END_DATE: end_date,
})
else:
output = []
total = total_m12n + total_legacy
percentage = 100.0 * total_m12n / total if total > 0 else 0
output.append(f'# of lines added in modularized files: {total_m12n}')
output.append(f'# of lines added in non-modularized files: {total_legacy}')
output.append(f'% of lines landing in modularized files: {percentage:2.2f}')
# Shows the top 50 contributors in each category.
output.extend(
_print_ranking(rankings_modularized, total_m12n,
'modules and components'))
output.extend(
_print_ranking(rankings_legacy, total_legacy, 'legacy and glue'))
return '\n'.join(output)
def _print_ranking(rankings: OrderedDict, total: int, label: str) -> List[str]:
if not rankings:
return []
output = []
output.append(f'\nTop contributors ({label}):')
output.append('No lines % author')
for rank, author in enumerate(list(rankings.keys())[:50], 1):
lines = rankings[author]
if lines == 0:
break
ratio = 100 * lines / total
output.append(f'{rank:2d} {lines:6d} {ratio:5.1f} {author}')
return output
def _is_m12n_path(path):
for prefix in _M12N_DIRS:
if path.startswith(prefix):
return True
return False
def _is_legacy_path(path):
return path.startswith(_LEGACY_DIR)
def _print_progress(msg, prev_msg_len):
msg_len = len(msg)
# Add spaces to remove the previous progress output completely.
if msg_len < prev_msg_len:
msg += ' ' * (prev_msg_len - msg_len)
print(msg, end='\r')
def GetDateRange(*, past_days: int) -> Tuple[str, str]:
"""Returns start and end date for a period of past days.
Use the results as start_date and end_date of GenerateLOCStats.
"""
today = datetime.date.today()
delta = datetime.timedelta(days=past_days)
past = datetime.datetime(today.year, today.month, today.day) - delta
return (past.date().isoformat(), today.isoformat())
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generates LOC stats for modularization effort.")
date_group = parser.add_mutually_exclusive_group(required=True)
date_group.add_argument('--date',
type=str,
metavar=('<date-from>', '<date-to>'),
nargs=2,
help='date range (YYYY-MM-DD)~(YYYY-MM-DD)')
date_group.add_argument('--past-days',
type=int,
help='The number of days to look back for stats. '
'0 for today only.')
parser.add_argument('-q',
'--quiet',
action='store_true',
help='Do not output any message while processing')
parser.add_argument('-j',
'--json',
action='store_true',
help='Output result in json format. '
'If not specified, output in more human-readable table.')
parser.add_argument('-o',
'--output',
type=str,
help='File to write the result to in json format. '
'If not specified, outputs to console.')
parser.add_argument('--git-dir',
type=str,
help='Root directory of the git repo to look into. '
'If not specified, use the current directory.')
args = parser.parse_args()
if args.past_days and args.past_days < 0:
raise parser.error('--past-days must be non-negative.')
if args.date:
start_date, end_date = args.date
else:
start_date, end_date = GetDateRange(past_days=args.past_days)
result = GenerateLOCStats(start_date,
end_date,
quiet=args.quiet,
json_format=args.json,
git_dir=args.git_dir)
if args.output:
with open(args.output, 'w') as f:
f.write(result)
else:
print(result)