compare.py | Explore in Territory

#!/usr/bin/env python3

# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import argparse
import logging
import os
import pandas as pd
import numpy

from scipy import stats as scipy_stats


def get_diamond_string(diamond_count: int):
  if diamond_count == 0:
    return "~"
  elif diamond_count == 1:
    return "◆"
  elif diamond_count == 2:
    return "◆◆"
  elif diamond_count == 3:
    return "◆◆◆"
  elif diamond_count == 4:
    return "◆◆◆◆"


def get_diamonds_count(significance: pd.DataFrame):
  """
  This function emulates the "diamond" significance representation
  that is familiar to UMA users.
  """

  assert (not (significance > 1).any().any())
  assert (not (significance < 0).any().any())

  # Avoid log10(0) which is undefined.
  significance = numpy.clip(significance, 0, 0.999999)

  # scipy_stats.norm.cdf(1.96) = 0.975 and we're interested in 2 tail
  # test. 1.96 gives a 0.05 p-value. Multiply by 2 here to correct.
  p_value = (1 - significance) * 2

  # floor() to avoid exaggerating results and to round.
  # absolute() to make the result positive.
  log_p_value = numpy.floor(numpy.absolute(numpy.log10(p_value)))

  # Clip because 4 diamond is the max no matter the p-value.
  return numpy.clip(log_p_value, 0, 4)


def compute_mean_and_stderr(summary_path: str):
  df = pd.read_csv(summary_path)

  # skipna because no line has as all measurements. This is because of the
  # different sampling rates of the data sources in power_sampler
  # and power_metrics.
  means = df.mean(skipna=True)

  # Calculate the standard error of each column.
  stderrs = df.std(skipna=True) / numpy.sqrt(df.count())
  stats = means.to_frame().join(stderrs.to_frame(),
                                lsuffix='mean',
                                rsuffix='stderr')
  stats = stats.rename(columns={"0mean": "mean", "0stderr": "stderr"})

  return stats


def percent_difference(first_value: pd.DataFrame, second_value: pd.DataFrame):
  """
  Returns the comparative percentage difference between two
  values/columns.

  The result is to be read as :
    |second_value| is X% smaller/larger than |first_value|.

  Ex: percent_difference(20, 10) --> -50
  Ex: percent_difference(10, 50) --> 500
  """

  return ((second_value - first_value) / first_value) * 100


def compare(data_dir: str, baseline_summary: str, alternative_summary: str):
  """Open two summary files and compare their values. Saves the results
  in data_dir.

  Args:
    data_dir: The directory to save the comparison csv in.
    baseline_summary: summary.csv for the baseline.
    alternative_summary: summary.csv for the comparison.
  """

  # Get names of the browsers being compared from the paths.
  baseline_name = os.path.basename(
      os.path.dirname(baseline_summary)).split("_")[0]
  alternative_name = os.path.basename(
      os.path.dirname(alternative_summary)).split("_")[0]

  all_stats = []

  # Extract mean and std values for each column of |summary| into a new
  # dataframe.
  baseline_stats = compute_mean_and_stderr(baseline_summary)
  alternative_stats = compute_mean_and_stderr(alternative_summary)

  # Join the calculated values for both browsers into a single dataframe.
  comparison_summary = baseline_stats.join(alternative_stats,
                                           lsuffix=f"_{baseline_name}",
                                           rsuffix=f"_{alternative_name}")

  # Calculate the difference in percent between the baseline and comparison.
  comparison_summary["difference"] = percent_difference(
      baseline_stats["mean"], alternative_stats["mean"])

  # See https://www.cliffsnotes.com/study-guides/statistics/univariate-inferential-tests/two-sample-z-test-for-comparing-two-means
  comparison_summary["z_score"] = (baseline_stats["mean"] -
                                   alternative_stats["mean"]) / numpy.sqrt(
                                       pow(baseline_stats["stderr"], 2) +
                                       pow(alternative_stats["stderr"], 2))

  # See  https://machinelearningmastery.com/critical-values-for-statistical-hypothesis-testing/
  comparison_summary["significance_level"] = scipy_stats.norm.cdf(
      abs(comparison_summary["z_score"]))

  diamond_count = get_diamonds_count(comparison_summary["significance_level"])
  comparison_summary["diamonds"] = diamond_count.apply(get_diamond_string)

  # Drop results for which comparing the mean makes no sense.
  comparison_summary = comparison_summary.drop([
      'battery_max_capacity', 'battery_current_capacity', 'sample_time',
      'elapsed_ns'
  ])

  # Display and save results.
  logging.info(comparison_summary)
  comparison_summary.to_csv(f"{data_dir}/comparison_summary.csv")


def main():
  parser = argparse.ArgumentParser(
      description='Compares two summary files for analysis.')
  parser.add_argument("--output_dir",
                      help="Directory where to write the comparison file.",
                      required=True)
  parser.add_argument("--baseline_dir",
                      help="Directory containing the baseline benchmark data.",
                      required=True)
  parser.add_argument(
      "--alternative_dir",
      help="Directory containing the alternative benchmark data.",
      required=True)
  parser.add_argument('--verbose',
                      action='store_true',
                      help='Print verbose output.')
  args = parser.parse_args()

  if args.verbose:
    log_level = logging.DEBUG
  else:
    log_level = logging.INFO
  logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)

  baseline_summary_path = os.path.join(args.baseline_dir, "summary.csv")
  alternative_summary_path = os.path.join(args.alternative_dir, "summary.csv")
  summaries = [baseline_summary_path, alternative_summary_path]

  for summary in summaries:
    if not os.path.isfile(summary):
      logging.error(f"summary.csv missing in {summary}.")
      sys.exit(-1)

  compare(args.output_dir, summaries[0], summaries[1])


if __name__ == "__main__":

  # Avoid scientific notation when printing numbers.
  pd.options.display.float_format = '{:.6f}'.format

  main()
chromium/tools/mac/power/compare.py