KINDS = [
'section-major',
'section-minor',
'section-group',
'row',
]
def iter_clean_lines(lines):
lines = iter(lines)
for rawline in lines:
line = rawline.strip()
if line.startswith('#') and not rawline.startswith('##'):
continue
yield line, rawline
def parse_table_lines(lines):
lines = iter_clean_lines(lines)
group = None
prev = ''
for line, rawline in lines:
if line.startswith('## '):
assert not rawline.startswith(' '), (line, rawline)
if group:
assert prev, (line, rawline)
kind, after, _ = group
assert kind and kind != 'section-group', (group, line, rawline)
assert after is not None, (group, line, rawline)
else:
assert not prev, (prev, line, rawline)
kind, after = group = ('section-group', None)
title = line[3:].lstrip()
assert title, (line, rawline)
if after is not None:
try:
line, rawline = next(lines)
except StopIteration:
line = None
if line != after:
raise NotImplementedError((group, line, rawline))
yield kind, title
group = None
elif group:
raise NotImplementedError((group, line, rawline))
elif line.startswith('##---'):
assert line.rstrip('-') == '##', (line, rawline)
group = ('section-minor', '', line)
elif line.startswith('#####'):
assert not line.strip('#'), (line, rawline)
group = ('section-major', '', line)
elif line:
yield 'row', line
prev = line
def iter_sections(lines):
header = None
section = []
for kind, value in parse_table_lines(lines):
if kind == 'row':
if not section:
if header is None:
header = value
continue
raise NotImplementedError(repr(value))
yield tuple(section), value
else:
if header is None:
header = False
start = KINDS.index(kind)
section[start:] = [value]
def collect_sections(lines):
sections = {}
for section, row in iter_sections(lines):
if section not in sections:
sections[section] = [row]
else:
sections[section].append(row)
return sections
def collate_sections(lines):
collated = {}
for section, rows in collect_sections(lines).items():
parent = collated
current = ()
for name in section:
current += (name,)
try:
child, secrows, totalrows = parent[name]
except KeyError:
child = {}
secrows = []
totalrows = []
parent[name] = (child, secrows, totalrows)
parent = child
if current == section:
secrows.extend(rows)
totalrows.extend(rows)
return collated
#############################
# the commands
def cmd_count_by_section(lines):
div = ' ' + '-' * 50
total = 0
def render_tree(root, depth=0):
nonlocal total
indent = ' ' * depth
for name, data in root.items():
subroot, rows, totalrows = data
sectotal = f'({len(totalrows)})' if totalrows != rows else ''
count = len(rows) if rows else ''
if depth == 0:
yield div
yield f'{sectotal:>7} {count:>4} {indent}{name}'
yield from render_tree(subroot, depth+1)
total += len(rows)
sections = collate_sections(lines)
yield from render_tree(sections)
yield div
yield f'(total: {total})'
#############################
# the script
def parse_args(argv=None, prog=None):
import argparse
parser = argparse.ArgumentParser(prog=prog)
parser.add_argument('filename')
args = parser.parse_args(argv)
ns = vars(args)
return ns
def main(filename):
with open(filename) as infile:
for line in cmd_count_by_section(infile):
print(line)
if __name__ == '__main__':
kwargs = parse_args()
main(**kwargs)