chromium/content/test/gpu/power_measurement_results/analyze_power_measurement_results.py

#!/usr/bin/env python
# Copyright 2019 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script which analyze power measurement test results from bots.

Such analysis provides insights into power data to better understand
Intel Power Gadget.

Related design doc:
https://docs.google.com/document/d/1s3L2IYguQmPHInsKkbHh06hXCXo8ggo5iPIhOaCNwVw
"""

import enum
import json
import logging
import math
import os
import sys

_TESTS = [
    'Basic', 'Video_720_MP4', 'Video_720_MP4_Fullscreen',
    'Video_720_MP4_Underlay', 'Video_720_MP4_Underlay_Fullscreen'
]
_MEASUREMENTS = ['DRAM', 'Processor']

_RESULTS_PATH = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), 'win10_intel_hd_630')

_RESULTS_JSON_FILES = [
    'build_4370_4425_repeat3.json',
    'build_4426_4759_repeat3.json',
    'build_4760_5047_repeat3.json',
]

MIN_RUNS_PER_BOT = 8


class RepeatStrategy(enum.Enum):
  COUNT_EACH = 1  # count each run individually
  COUNT_MINIMUM = 2  # count the run with minimum power
  COUNT_AVERAGE = 3  # count the average power of all runs
  COUNT_MEDIAN = 4  # count the median_low (power wise) of all runs
  COUNT_MINIMUM_FIRST_TWO = 5  # for the first 2, count the run with less power

  @classmethod
  def ToString(cls, strategy):
    if strategy == cls.COUNT_EACH:
      return 'each'
    if strategy == cls.COUNT_MINIMUM:
      return 'minimum'
    if strategy == cls.COUNT_AVERAGE:
      return 'average'
    if strategy == cls.COUNT_MEDIAN:
      return 'median'
    assert strategy == cls.COUNT_MINIMUM_FIRST_TWO
    return 'minimum (first two)'


def LoadResultsJsonFiles():
  jsons = []
  for json_filename in _RESULTS_JSON_FILES:
    json_path = os.path.join(_RESULTS_PATH, json_filename)
    with open(json_path, 'r') as json_file:
      logging.debug('Loading %s', json_path)
      jsons.append(json.load(json_file))
  return jsons


def DetermineResultsFromMultipleRuns(measurements, repeat_strategy):
  if repeat_strategy == RepeatStrategy.COUNT_EACH:
    return measurements
  if repeat_strategy == RepeatStrategy.COUNT_MINIMUM:
    measurements.sort()
    return [measurements[0]]
  if repeat_strategy == RepeatStrategy.COUNT_AVERAGE:
    return [Mean(measurements)]
  if repeat_strategy == RepeatStrategy.COUNT_MEDIAN:
    return [MedianLow(measurements)]
  if repeat_strategy == RepeatStrategy.COUNT_MINIMUM_FIRST_TWO:
    assert len(measurements) >= 2
    first_two = measurements[0:2]
    first_two.sort()
    return [first_two[0]]

  assert False
  return []


# pylint: disable=too-many-locals
def ProcessJsonData(jsons,
                    per_bot=False,
                    repeat_strategy=RepeatStrategy.COUNT_MINIMUM):
  min_build = sys.maxsize
  max_build = -1
  results = {}
  bots = set()
  for j in jsons:
    for build in j.get('builds', []):
      build_number = build.get('number', -1)
      if build_number > 0:
        min_build = min(build_number, min_build)
        max_build = max(build_number, max_build)

      bot = build['bot']
      bots.add(bot)

      for test in build['tests']:
        name = test['name'].split('.')[-1]
        assert name in _TESTS
        if results.get(name, None) is None:
          if per_bot:
            results[name] = {}
          else:
            results[name] = []
        test_data = results[name]
        if per_bot:
          if test_data.get(bot, None) is None:
            test_data[bot] = []
          test_data = test_data[bot]

        measurements = [0]
        for measurement_name in _MEASUREMENTS:
          actual_measurement_name = measurement_name + ' Power_0'
          data = test[actual_measurement_name]
          count = len(data)
          while len(measurements) < count:
            measurements.append(0)
          for ii in range(count):
            measurements[ii] = measurements[ii] + data[ii]
        assert measurements
        test_data.extend(
            DetermineResultsFromMultipleRuns(measurements, repeat_strategy))

  return {
      'min_build': min_build,
      'max_build': max_build,
      'bots': list(bots),
      'results': results,
  }


# pylint: enable=too-many-locals


def Mean(data):
  assert len(data) > 0
  total = 0
  for num in data:
    total = total + num
  return total / len(data)


def Stdev(data):
  assert len(data) > 0
  mean = Mean(data)
  total = 0
  for num in data:
    total = total + (num - mean) * (num - mean)
  return math.sqrt(total / len(data))


def MedianLow(data):
  # Assume list is sorted.
  assert len(data) > 0
  index = int((len(data) - 1) / 2)
  return data[index]


def MarkSection():
  print('')


def MarkExperiment(description):
  print('')
  print('**************************************************************')
  print(description)
  print('**************************************************************')
  print('')


def GetBotBuilds(jsons, bot_name):
  build_numbers = []
  for j in jsons:
    builds = j.get('builds', [])
    for build in builds:
      build_number = build.get('number', -1)
      if build_number > 0:
        bot = build['bot']
        if bot == bot_name:
          build_numbers.append(build_number)
  return build_numbers


def GetOutliers(data, variation_threshold):
  mean = Mean(data)
  max_value = mean + mean * variation_threshold
  min_value = mean - mean * variation_threshold
  outliers = []
  for value in data:
    if value > max_value or value < min_value:
      outliers.append(value)
  return outliers


# pylint: disable=too-many-locals
def FindBuild(jsons, selected_bots, test_name, result):
  for j in jsons:
    for build in j.get('builds', []):
      build_number = build.get('number', -1)
      if build_number < 0:
        continue
      bot = build['bot']
      if bot not in selected_bots:
        continue

      for test in build['tests']:
        name = test['name'].split('.')[-1]
        assert name in _TESTS
        if name != test_name:
          continue

        # Use RepeatStrategy.COUNT_MINIMUM
        measurements = [0]
        for measurement_name in _MEASUREMENTS:
          actual_measurement_name = measurement_name + ' Power_0'
          data = test[actual_measurement_name]
          count = len(data)
          while len(measurements) < count:
            measurements.append(0)
          for ii in range(count):
            measurements[ii] = measurements[ii] + data[ii]
        assert measurements
        measurements.sort()
        if measurements[0] == result:
          return {'bot': bot, 'build': build_number}
  return None
# pylint: enable=too-many-locals


def RunExperiment_BadBots(jsons,
                          stdev_threshold,
                          repeat_strategy=RepeatStrategy.COUNT_MINIMUM):
  MarkExperiment('Locate potential bad bots: thresh=%0.2f, repeat=%s' %
                 (stdev_threshold, RepeatStrategy.ToString(repeat_strategy)))
  outcome = ProcessJsonData(
      jsons, per_bot=True, repeat_strategy=repeat_strategy)
  logging.debug('Processed builds: [%d, %d]', outcome['min_build'],
                outcome['max_build'])
  logging.debug('Total number of bots: %d', len(outcome['bots']))
  results = outcome['results']
  total_bad_bots = set()
  for test_name, test_results in results.items():
    if test_name == 'Basic':
      # Ignore Basic test results. They seem more unstable.
      continue
    MarkSection()
    logging.debug('Results for test: %s', test_name)
    bots_considered = 0
    bad_bots = []
    for bot_name, bot_results in test_results.items():
      if len(bot_results) < MIN_RUNS_PER_BOT:
        continue
      bots_considered = bots_considered + 1
      stdev = Stdev(bot_results)
      bot_results.sort()
      if stdev > stdev_threshold:
        bad_bots.append(bot_name)
        logging.debug('Potential bad bot %s: stdev = %f', bot_name, stdev)
    total_bad_bots |= set(bad_bots)
    logging.debug('Total bots considered: %d', bots_considered)
    logging.debug('Bad bots: %d', len(bad_bots))
    logging.debug('%s', bad_bots)
  MarkSection()
  total_bad_bots = list(total_bad_bots)
  total_bad_bots.sort()
  logging.debug('All potential bad bots: %d', len(total_bad_bots))
  logging.debug('%s', total_bad_bots)
  MarkSection()
  for bot in total_bad_bots:
    build_numbers = GetBotBuilds(jsons, bot)
    build_numbers.sort()
    logging.debug('Bad bot %s builds: %s', bot, build_numbers)
  return total_bad_bots


# pylint: disable=too-many-locals
def RunExperiment_GoodBots(jsons,
                           bad_bots=None,
                           repeat_strategy=RepeatStrategy.COUNT_MINIMUM):
  bad_bots = bad_bots or []
  STDEV_GOOD_BOT_THRESHOLD = 0.2
  GOOD_BOT_RANGE_PERC = 0.08
  REGULAR_BOT_RANGE_PERC = 0.15
  MarkExperiment(
      'Locate potential good bots: thresh=%0.2f, repeat=%s' %
      (STDEV_GOOD_BOT_THRESHOLD, RepeatStrategy.ToString(repeat_strategy)))
  outcome = ProcessJsonData(
      jsons, per_bot=True, repeat_strategy=repeat_strategy)
  logging.debug('Processed builds: [%d, %d]', outcome['min_build'],
                outcome['max_build'])
  logging.debug('Total number of bots: %d', len(outcome['bots']))
  total_good_bots = set(outcome['bots'])
  for test_name, test_results in outcome['results'].items():
    if test_name == 'Basic':
      # Ignore Basic test results. They seem more unstable.
      continue
    MarkSection()
    logging.debug('Results for test: %s', test_name)
    bots_considered = 0
    stdev_list = []
    good_bots = []
    for bot_name, bot_results in test_results.items():
      if len(bot_results) < MIN_RUNS_PER_BOT:
        continue
      stdev = Stdev(bot_results)
      stdev_list.append(stdev)
      if bot_name in bad_bots:
        continue
      bot_results.sort()
      bots_considered = bots_considered + 1
      mean = Mean(bot_results)
      if stdev < STDEV_GOOD_BOT_THRESHOLD:
        good_bots.append(bot_name)
        logging.debug('Potential good bot %s: mean = %f, stdev = %f', bot_name,
                      mean, stdev)
        outliers = GetOutliers(bot_results, GOOD_BOT_RANGE_PERC)
        if outliers:
          logging.debug('Good bot %s: %d runs out of %d%% range', bot_name,
                        len(outliers), GOOD_BOT_RANGE_PERC * 100)
      else:
        outliers = GetOutliers(bot_results, REGULAR_BOT_RANGE_PERC)
        if outliers:
          logging.debug('Regular bot %s: %d runs out of %d%% range', bot_name,
                        len(outliers), REGULAR_BOT_RANGE_PERC * 100)
    total_good_bots &= set(good_bots)
    logging.debug('Total bots considered: %d', bots_considered)
    logging.debug('Good bots: %d', len(good_bots))
    logging.debug('%s', good_bots)
    logging.debug('Average per bot stdev: %f', Mean(stdev_list))
  MarkSection()
  total_good_bots = list(total_good_bots)
  total_good_bots.sort()
  logging.debug('All potential good bots: %d', len(total_good_bots))
  logging.debug('%s', total_good_bots)
  MarkSection()
  for bot in total_good_bots:
    build_numbers = GetBotBuilds(jsons, bot)
    build_numbers.sort()
    logging.debug('Good bot %s builds: %s', bot, build_numbers)
  return total_good_bots
# pylint: enable=too-many-locals


# This could definitely use some refactoring to be more readable and make
# pylint happier, but currently difficult to change confidently without any
# unittests.
# pylint: disable=too-many-locals,too-many-branches,too-many-statements
def RunExperiment_BestVariations(jsons, find_m_bots, variation_threshold):
  GET_RID_OF_N_BOTS_WITH_WORST_STDEV = 10

  MarkExperiment('Find %d bots with best variations, threshold = %0.2f%%' %
                 (find_m_bots, (variation_threshold * 100)))

  outcome = ProcessJsonData(
      jsons, per_bot=True, repeat_strategy=RepeatStrategy.COUNT_MINIMUM)
  candidates_per_test = {}
  candidate_bots_per_test = {}
  for test_name, test_results in outcome['results'].items():
    if test_name == 'Basic':
      # Ignore Basic test results. They seem more unstable.
      continue
    bots_considered = 0
    candidates = []
    stdev_list = []
    # Remove N bots with worst stdev
    for bot_name, bot_results in test_results.items():
      if len(bot_results) < MIN_RUNS_PER_BOT:
        continue
      bots_considered = bots_considered + 1
      mean = Mean(bot_results)
      stdev = Stdev(bot_results)
      candidates.append({
          'bot': bot_name,
          'mean': mean,
          'stdev': stdev,
          'data': bot_results,
      })
      stdev_list.append(stdev)
    stdev_list.sort()
    guard_stdev = stdev_list[-GET_RID_OF_N_BOTS_WITH_WORST_STDEV]
    candidates_with_good_stdev = []
    mean_list = []
    for candidate in candidates:
      if candidate['stdev'] < guard_stdev:
        candidates_with_good_stdev.append(candidate)
        mean_list.append(candidate['mean'])
    assert (len(candidates) - GET_RID_OF_N_BOTS_WITH_WORST_STDEV == len(
        candidates_with_good_stdev))

    assert len(candidates_with_good_stdev) > find_m_bots

    # Find M bots with minimum range of means
    mean_list.sort()
    min_range = mean_list[-1] - mean_list[0]
    candidate_index = 0
    for low_index in range(len(candidates_with_good_stdev) - find_m_bots + 1):
      high_index = low_index + find_m_bots - 1
      mean_range = mean_list[high_index] - mean_list[low_index]
      if mean_range < min_range:
        min_range = mean_range
        candidate_index = low_index
    min_mean = mean_list[candidate_index]
    max_mean = mean_list[candidate_index + find_m_bots - 1]
    candidates = []
    candidate_bots = []
    for candidate in candidates_with_good_stdev:
      if candidate['mean'] >= min_mean and candidate['mean'] <= max_mean:
        candidates.append(candidate)
        candidate_bots.append(candidate['bot'])
    assert len(candidates) == find_m_bots
    candidate_bots_per_test[test_name] = set(candidate_bots)
    candidates_per_test[test_name] = candidates

  # Now find the list of bots that work well for all tests
  selected_bots = None
  for test_name, bots in candidate_bots_per_test.items():
    if selected_bots is None:
      selected_bots = bots
    else:
      selected_bots = selected_bots & bots
  logging.debug('Intended to find %d bots, actually found %d', find_m_bots,
                len(selected_bots))
  selected_bots = list(selected_bots)
  selected_bots.sort()
  logging.debug(selected_bots)

  # Validate: check variations are within a range
  for test_name, candidates in candidates_per_test.items():
    MarkSection()
    results = []
    for candidate in candidates:
      if candidate['bot'] in selected_bots:
        results.extend(candidate['data'])
    mean = Mean(results)
    stdev = Stdev(results)
    logging.debug('Validate test %s: mean = %f, stdev = %f', test_name, mean,
                  stdev)
    outliers = GetOutliers(results, variation_threshold)
    if outliers:
      # Find corresponding builds
      builds = []
      for outlier in outliers:
        build = FindBuild(jsons, selected_bots, test_name, outlier)
        assert build is not None
        builds.append(build)
      logging.debug('%d runs out of %d are not within %0.2f%% range: %s',
                    len(outliers), len(results), (variation_threshold * 100),
                    outliers)
      logging.debug(builds)
# pylint: enable=too-many-locals,too-many-branches,too-many-statements


def main():
  logging.basicConfig(level=logging.DEBUG)

  jsons = LoadResultsJsonFiles()

  bad_bots = RunExperiment_BadBots(jsons, 0.5, RepeatStrategy.COUNT_EACH)
  RunExperiment_GoodBots(jsons, bad_bots, RepeatStrategy.COUNT_EACH)

  bad_bots = RunExperiment_BadBots(jsons, 0.5, RepeatStrategy.COUNT_AVERAGE)
  RunExperiment_GoodBots(jsons, bad_bots, RepeatStrategy.COUNT_AVERAGE)

  bad_bots = RunExperiment_BadBots(jsons, 0.5, RepeatStrategy.COUNT_MEDIAN)
  RunExperiment_GoodBots(jsons, bad_bots, RepeatStrategy.COUNT_MEDIAN)

  bad_bots = RunExperiment_BadBots(jsons, 0.5,
                                   RepeatStrategy.COUNT_MINIMUM_FIRST_TWO)
  RunExperiment_GoodBots(jsons, bad_bots,
                         RepeatStrategy.COUNT_MINIMUM_FIRST_TWO)

  bad_bots = RunExperiment_BadBots(jsons, 0.5, RepeatStrategy.COUNT_MINIMUM)
  RunExperiment_GoodBots(jsons, bad_bots, RepeatStrategy.COUNT_MINIMUM)

  RunExperiment_BestVariations(jsons, 25, 0.12)

  return 0


if __name__ == '__main__':
  sys.exit(main())