#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Processes the raw output from containers_memory_usage into CSV files. Each CSV
# file contains the results for all tested container types for a given key and
# value type.
#
# Usage:
# $ out/release/containers_memory_benchmark &> output.txt
# $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results
import argparse
from collections.abc import Sequence
import csv
import os.path
import re
import sys
from typing import Optional
_HEADER_RE = re.compile(r'===== (?P<name>.+) =====')
_ITER_RE = re.compile(r'iteration (?P<iter>\d+)')
_ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)')
_FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)')
class ContainerStatsProcessor:
def __init__(self, name: str):
# e.g. base::flat_map
self._name = name
# current number of elements in the container
self._n = None
# map of address to size for currently active allocations. Needed because
# the free handler only records an address, and not a size.
self._addr_to_size = {}
# running count of the number of bytes needed at the current iteration
self._running_size = 0
# map of container size to number of bytes used to store a container of that
# size. Keys are expected to be contiguous from 0 to the total iteration
# count.
self._data = {}
@property
def name(self):
return self._name
@property
def data(self):
return self._data
def did_alloc(self, addr: str, size: int):
self._addr_to_size[addr] = size
self._running_size += size
def did_free(self, addr: str):
size = self._addr_to_size.pop(addr)
self._running_size -= size
def did_iterate(self, n: int):
if self._n is not None:
self.flush_current_iteration_if_needed()
self._n = n
def flush_current_iteration_if_needed(self):
self._data[self._n] = self._running_size
class TestCaseProcessor:
def __init__(self, name: str):
# e.g. int -> std::string
self._name = name
# containers for which all allocation data has been processed and finalized.
self._finalized_stats: list[ContainerStatsProcessor] = []
# the current container being processed.
self._current_container_stats: Optional[ContainerStatsProcessor] = None
@property
def current_container_stats(self):
return self._current_container_stats
def did_begin_container_stats(self, container_type: str):
self._finalize_current_container_stats_if_needed()
self._current_container_stats = ContainerStatsProcessor(container_type)
def did_finish_container_stats(self, output_dir: str):
self._finalize_current_container_stats_if_needed()
with open(
os.path.join(output_dir, f'{self._name}.csv'), 'w', newline=''
) as f:
writer = csv.writer(f)
# First the column headers...
writer.writerow(
['size'] + [stats.name for stats in self._finalized_stats]
)
# In theory, all processed containers should have the same number of keys,
# but assert just to be sure.
keys = []
for stats in self._finalized_stats:
if not keys:
keys = sorted(stats.data.keys())
else:
assert keys == sorted(stats.data.keys())
for key in keys:
writer.writerow(
[key] + [stats.data[key] for stats in self._finalized_stats]
)
def _finalize_current_container_stats_if_needed(self):
if self._current_container_stats:
self._current_container_stats.flush_current_iteration_if_needed()
self._finalized_stats.append(self._current_container_stats)
self._current_container_stats = None
def main(argv: Sequence[str]) -> None:
parser = argparse.ArgumentParser(
description='Processes raw output from containers_memory_usage into CSVs.'
)
parser.add_argument(
'-o', help='directory to write CSV files to', required=True
)
args = parser.parse_args()
# It would be nicer to use a ContextManager, but that complicates splitting up
# the input and iterating through it. This is "good enough".
processor: Optional[TestCaseProcessor] = None
for line in sys.stdin:
line = line.strip()
if '->' in line:
if processor:
processor.did_finish_container_stats(args.o)
processor = TestCaseProcessor(line)
continue
match = _HEADER_RE.match(line)
if match:
processor.did_begin_container_stats(match.group('name'))
match = _ITER_RE.match(line)
if match:
processor.current_container_stats.did_iterate(int(match.group('iter')))
continue
match = _ALLOC_RE.match(line)
if match:
processor.current_container_stats.did_alloc(
match.group('alloc_addr'), int(match.group('size'))
)
continue
match = _FREED_RE.match(line)
if match:
processor.current_container_stats.did_free(match.group('freed_addr'))
continue
if processor:
processor.did_finish_container_stats(args.o)
if __name__ == '__main__':
main(sys.argv)