chromium/content/test/gpu/find_unknown_typ_tags.py

#!/usr/bin/env vpython3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script for comparing known typ tags to what's generated on the bots.

This is important for making unknown generated tags fatal, as we need an easy
way to check if any bots are producing tags we don't know about.

Depends on the `bq` tool, which is available as part of the Google Cloud SDK
https://cloud.google.com/sdk/docs/quickstarts.
"""

import argparse
import json
import os
import subprocess

from gpu_path_util import setup_typ_paths  # pylint: disable=unused-import

from gpu_tests import gpu_integration_test

from typ import expectations_parser

BQ_QUERY_TEMPLATE = """\
WITH
    tags AS (
        SELECT (
            ARRAY(
                SELECT value
                FROM tr.tags
                WHERE key = "typ_tag"
            )
        ) AS typ_tags
        FROM `{table}` tr
        WHERE DATE(partition_time) > DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)
    )
SELECT DISTINCT typ_tags
FROM tags
CROSS JOIN UNNEST(tags.typ_tags) as typ_tags
"""
MAX_ROWS = (2**31) - 1


def ParseArgs():
  parser = argparse.ArgumentParser(
      'Script for finding cases where the typ tags generated on the bots and '
      'the typ tags we know about are out of sync.')
  parser.add_argument('--project',
                      required=True,
                      help='The billing project to use for BigQuery queries. '
                      'Must have access to the ResultDB BQ tables, e.g. '
                      '"chrome-luci-data.chromium.gpu_ci_test_results".')
  return parser.parse_args()


def _GetUsedTags():
  """Helper function to get all currently used tags."""
  # Get the list of tags in expectation files. Any expectation file will do
  # since tags are synced between all of them.
  expectation_file_path = os.path.join(os.path.dirname(__file__), 'gpu_tests',
                                       'test_expectations',
                                       'info_collection_expectations.txt')
  with open(expectation_file_path) as f:
    list_parser = expectations_parser.TaggedTestListParser(f.read())
  used_tags = set()
  for tag_set in list_parser.tag_sets:
    used_tags |= set(tag_set)
  return used_tags


def _GetGeneratedTags(args):
  """Helper function to get all currently generated tags from bots."""
  generated_tags = set()
  for table in [
      'chrome-luci-data.chromium.gpu_ci_test_results',
      'chrome-luci-data.chromium.gpu_try_test_results'
  ]:
    query = BQ_QUERY_TEMPLATE.format(table=table)
    cmd = [
        'bq',
        'query',
        '--max_rows=%d' % MAX_ROWS,
        '--format=json',
        '--project_id=%s' % args.project,
        '--use_legacy_sql=false',
        query,
    ]
    with open(os.devnull, 'w') as devnull:
      try:
        stdout = subprocess.check_output(cmd, stderr=devnull)
      except subprocess.CalledProcessError as e:
        print(e.output)
        raise
    results = json.loads(stdout)
    for pair in results:
      generated_tags |= set(pair.values())
  return generated_tags


def main():
  args = ParseArgs()

  used_tags = _GetUsedTags()
  # Get the list of ignored tags from the GPU tests.
  ignored_tags = set(gpu_integration_test.GpuIntegrationTest.IgnoredTags())
  generated_tags = _GetGeneratedTags(args)
  known_tags = used_tags | ignored_tags
  unused_tags = generated_tags - known_tags

  if unused_tags:
    print('Tags that were generated but unused:')
    for t in unused_tags:
      print(t)
    print('')

  stale_tags = known_tags - generated_tags
  if stale_tags:
    print('Tags that are known but not generated:')
    for t in stale_tags:
      print(t)

  if not (unused_tags or stale_tags):
    print('Known and generated tags are in sync.')


if __name__ == '__main__':
  main()