chromium/tools/perf/cli_tools/tbmv3/trace_downloader.py

# Copyright 2020 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import logging
import os

from py_utils import cloud_storage

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DEFAULT_TRACE_DIR = os.path.join(_SCRIPT_DIR, 'traces')

HTML_URL_PREFIX = ('https://storage.cloud.google.com/chrome-telemetry-output/')


def _GetSubpathInBucket(html_url):
  """Returns the path minus the HTML_URL_PREFIX.

  Given https://storage.../chrome-telemetry-output/foo/bar/trace.html,
  it returns foo/bar/trace.html."""
  if not html_url.startswith(HTML_URL_PREFIX):
    raise Exception('Html trace url must start with %s' % HTML_URL_PREFIX)
  return html_url.replace(HTML_URL_PREFIX, "")


def _GetProtoTraceLinkFromTraceEventsDir(link_prefix):
  """Returns the first proto trace in |link_prefix|/trace/traceEvents/"""
  proto_link_prefix = '/'.join([link_prefix, 'trace/traceEvents/**'])
  try:
    for link in cloud_storage.List(cloud_storage.TELEMETRY_OUTPUT,
                                   proto_link_prefix):
      if link.endswith('.pb.gz') or link.endswith('.pb'):
        return link[1:]  # Strip the initial '/'.
  except cloud_storage.NotFoundError as e:
    # This directory doesn't exist at all.
    raise cloud_storage.NotFoundError('No URLs match the prefix %s: %s' %
                                      (proto_link_prefix, str(e)))
  # The directory exists, but no proto trace found.
  raise cloud_storage.NotFoundError(
      'Proto trace not found in cloud storage. Path: %s.' % proto_link_prefix)


def GetFileExtension(file_path):
  """Given foo/bar/baz.pb.gz, returns '.pb.gz'."""
  # Get the filename only because the directory names can contain "." like
  # "v8.browsing".
  filename = file_path.split('/')[-1]
  first_dot_index = filename.find('.')
  if first_dot_index == -1:
    return ''
  return filename[first_dot_index:]


def GetLocalTraceFileName(html_url):
  """Returns a local filename derived from the html trace url.

  Given https://storage.../chrome-telemetry-output/foo/bar/trace.html, it
  returns foo_bar_trace as the local filename. The filename does not contain
  extensions. It's up to the caller to add .html or .pb etc."""
  subpath = _GetSubpathInBucket(html_url)
  extension = GetFileExtension(subpath)
  no_extension_subpath = subpath[:-len(extension)]
  return '_'.join(no_extension_subpath.split('/'))


def FindProtoTracePath(html_url):
  """
  Finds the proto trace path given a html trace url.

  In the simple case foo/bar/trace.pb is the proto trace for foo/bar/trace.html.
  But sometimes that's not available so we have to look for a .pb.gz file in a
  special directory."""
  subpath = _GetSubpathInBucket(html_url)
  if subpath.endswith('trace.html'):
    proto_path = subpath.replace('trace.html', 'trace.pb')
    if cloud_storage.Exists(cloud_storage.TELEMETRY_OUTPUT, proto_path):
      return proto_path
    proto_path += '.gz'
    if cloud_storage.Exists(cloud_storage.TELEMETRY_OUTPUT, proto_path):
      return proto_path

  directory_path = '/'.join(subpath.split('/')[:-1])
  return _GetProtoTraceLinkFromTraceEventsDir(directory_path)


def DownloadHtmlTrace(html_url, download_dir=DEFAULT_TRACE_DIR):
  """Downloads html trace given the url. Returns local path.

  Skips downloading if file was already downloaded once."""
  local_filename = os.path.join(download_dir, GetLocalTraceFileName(html_url))
  local_path = local_filename + '.html'
  if os.path.exists(local_path):
    logging.info('%s already downloaded. Skipping.' % local_path)
    return local_path

  remote_path = _GetSubpathInBucket(html_url)
  if not cloud_storage.Exists(cloud_storage.TELEMETRY_OUTPUT, remote_path):
    raise cloud_storage.NotFoundError(
        'HTML trace %s not found in cloud storage.' % html_url)

  cloud_storage.Get(cloud_storage.TELEMETRY_OUTPUT, remote_path, local_path)
  return local_path


def DownloadProtoTrace(html_url, download_dir=DEFAULT_TRACE_DIR):
  """Downloads the associated proto trace for html trace url. Returns path.

  Skips downloading if file was already downloaded once."""
  local_filename = os.path.join(download_dir, GetLocalTraceFileName(html_url))
  for local_path in [local_filename + '.pb', local_filename + '.pb.gz']:
    if os.path.exists(local_path):
      logging.info('%s already downloaded. Skipping.' % local_path)
      return local_path

  remote_path = FindProtoTracePath(html_url)
  extension = GetFileExtension(remote_path)
  local_path = local_filename + extension

  cloud_storage.Get(cloud_storage.TELEMETRY_OUTPUT, remote_path, local_path)
  return local_path