wpt_manifest.py | Explore in Territory

# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""WPTManifest is responsible for handling MANIFEST.json.

The MANIFEST.json file contains metadata about files in web-platform-tests,
such as what tests exist, and extra information about each test, including
test type, options, URLs to use, and reference file paths if applicable.

Naming conventions:
* A (file) path is a relative file system path from the root of WPT.
* A (test) URL is the path (with an optional query string) to the test on
  wptserve relative to url_base.
Neither has a leading slash.
"""

import json
import logging
from typing import (
    Any,
    Dict,
    List,
    Literal,
    NamedTuple,
    Optional,
    Sequence,
    Tuple,
)

from blinkpy.common.memoized import memoized
from blinkpy.common.path_finder import PathFinder

_log = logging.getLogger(__name__)

# The default filename of manifest expected by `wpt`.
MANIFEST_NAME = 'MANIFEST.json'

# Generating the WPT manifest entirely from scratch is a slow process; it takes
# >10 seconds real-time on a powerful Linux desktop. To avoid paying this cost,
# we keep a cached version of the manifest in the source tree, the 'base
# manifest', and update it automatically whenever we import WPT. We utilize a
# separate file for this and then copy it to MANIFEST_NAME so that modifications
# or corruptions (which often happen if the test runner is killed by the user
# mid-run) do not cause trouble.
#
# The filename used for the base manifest includes the version as a
# workaround for trouble landing huge changes to the base manifest when
# the version changes. See https://crbug.com/876717.
#
# NOTE: If this is changed, be sure to update other instances of
# "WPT_BASE_MANIFEST_8" in the code.
BASE_MANIFEST_NAME = 'WPT_BASE_MANIFEST_8.json'

# TODO(robertma): Use the official wpt.manifest module.


Relation = Literal['==', '!=']
Reference = Tuple[str, Relation]

FuzzyRange = Tuple[int, int]
FuzzyParameters = Tuple[Optional[FuzzyRange], Optional[FuzzyRange]]


class _Test(NamedTuple):
    """A container for per-test information."""
    # To save space, `file_path` is `None` if it's identical to the URL, which
    # it is for most tests.
    file_path: Optional[str]
    test_type: str
    references: List[Reference]
    extras: Dict[str, Any]

    @property
    def slow(self) -> bool:
        return self.extras.get('timeout') == 'long'

    @property
    def pac(self) -> Optional[str]:
        return self.extras.get('pac')

    @property
    def fuzzy_params(self) -> FuzzyParameters:
        params = self.extras.get('fuzzy')
        if not params:
            return None, None
        _, (max_diff, total_pixels) = params[0]
        assert len(max_diff) == 2, max_diff
        assert len(total_pixels) == 2, total_pixels
        return max_diff, total_pixels

    @property
    def jsshell(self) -> bool:
        """Whether this manifest item is a jsshell test.

        "jsshell" is one of the scopes automatically generated from .any.js
        tests. It is intended to run in a thin JavaScript shell instead of a
        full browser, so we usually ignore it in web tests. (crbug.com/871950)
        """
        return self.extras.get('jsshell', False)


class WPTManifest:
    """A simple abstraction of WPT MANIFEST.json.

    The high-level structure of the manifest is as follows:
        {
            "items": {
                "crashtest": {
                    "dir1": {
                        "dir2": {
                            "filename1": [
                                "git object ID",
                                [manifest item],
                                [manifest item],
                                ...
                            ],
                        },
                    },
                },
                "manual": {...},
                "reftest": {...},
                "print-reftest": {...},
                "testharness": {...},
            },
            // other info...
        }

    The 'git object ID' is the ID the git repository has assigned to the file
    blob, i.e. via git hash-object.

    The format of a manifest item depends on:
        https://github.com/web-platform-tests/wpt/blob/master/tools/manifest/item.py
    which can be roughly summarized as follows:
        * testharness test: [url, extras]
        * reftest: [url, references, extras]
        * print-reftest: [url, references, extras]
    where `extras` is a dict with the following optional items:
        * testharness test: {"timeout": "long", "testdriver": True}
        * reftest: {"timeout": "long", "viewport_size": ..., "dpi": ...}
        * print-reftest: {"timeout": "long", "viewport_size": ..., "dpi": ..., "page_ranges": ...}
    and `references` is a list that looks like:
        [[reference_url1, "=="], [reference_url2, "!="], ...]
    """

    def __init__(self,
                 raw_dict,
                 wpt_dir: str,
                 test_types: Optional[Sequence[str]] = None,
                 exclude_jsshell: bool = True):
        self._raw_dict = raw_dict
        self.wpt_dir = wpt_dir
        self.test_types = test_types or (
            'manual',
            'reftest',
            'print-reftest',
            'testharness',
            'crashtest',
        )
        self._tests_by_url = {}
        self._exclude_jsshell = exclude_jsshell

        items = self._raw_dict.get('items', {})
        for test_type in self.test_types:
            self._map_tests(test_type, items.get(test_type, {}))

    def _map_tests(self, test_type: str, trie, path: str = ''):
        """Record tests present in a trie for some test type.

        Arguments:
            test_type: The WPT test type.
            trie: Either:
              * A list, which represents a test file (a leaf in the trie).
              * A map representing a test directory. It maps the next path
                component to the corresponding child.
            path: The path so far to this test file or directory.

        Note:
            When constructing the external manifest, this recursive walk must
            traverse all 50k+ items, so it impacts the startup performance of
            many tools.
        """
        if isinstance(trie, dict):
            for component, child in trie.items():
                # URLs always use `/` for path separators. Don't add a leading
                # `/`, since that's the convention in `blinkpy` for test paths.
                child_path = f'{path}/{component}' if path else component
                self._map_tests(test_type, child, child_path)
            return

        assert len(trie) >= 2, f'{trie!r} must contain at least one test'
        # Ignore the first element, which is the file's Git tree ID.
        for url, *maybe_refs, extras in trie[1:]:
            assert len(maybe_refs) <= 1, f'extra item data: {maybe_refs!r}'
            refs = maybe_refs[0] if maybe_refs else []
            # To save space, the v8 manifest omits the URL if it's
            # identical to the file path, which it is for most tests.
            if url:
                # Trim any leading `/`, which WPT URLs use by convention.
                if url.startswith('/'):
                    url = url[1:]
                test = _Test(path, test_type, refs, extras)
            else:
                url, test = path, _Test(None, test_type, refs, extras)
            assert url not in self._tests_by_url, f'duplicate URL {url!r}'
            if not self._exclude_jsshell or not test.jsshell:
                self._tests_by_url[url] = test

    @classmethod
    def from_file(cls,
                  port,
                  manifest_path: str,
                  test_types: Optional[Sequence[str]] = None,
                  exclude_jsshell: bool = True) -> 'WPTManifest':
        fs = port.host.filesystem
        with fs.open_text_file_for_reading(manifest_path) as manifest_file:
            raw_dict = json.load(manifest_file)
        return cls(raw_dict,
                   fs.dirname(fs.relpath(manifest_path, port.web_tests_dir())),
                   test_types, exclude_jsshell)

    @memoized
    def all_urls(self):
        """Returns a set of the URLs for all items in the manifest."""
        return frozenset(self._tests_by_url)

    def get_test_type(self, url: str) -> Optional[str]:
        """Returns the test type of the given test file path."""
        assert not url.startswith('/')
        test = self._tests_by_url.get(url)
        return test and test.test_type

    def is_test_file(self, file_path: str) -> bool:
        """Checks if file_path is a test file according to the manifest."""
        assert not file_path.startswith('/')
        components = file_path.split('/')
        assert components, file_path
        tries_by_type = self._raw_dict.get('items', {})
        return any(
            self._contains_file(tries_by_type.get(test_type, {}), components)
            for test_type in self.test_types)

    def _contains_file(self, trie, components: Sequence[str]) -> bool:
        """Determine if a test trie contains a test file."""
        if isinstance(trie, list):
            # Not a test file if there are still components at a leaf.
            return not bool(components)
        if not components:
            # This is a test directory, not a test file.
            return False
        next_component, *rest = components
        child = trie.get(next_component)
        return bool(child) and self._contains_file(child, rest)

    def is_test_url(self, url):
        """Checks if url is a valid test in the manifest."""
        assert not url.startswith('/')
        return url in self.all_urls()

    def is_crash_test(self, url):
        """Checks if a WPT is a crashtest according to the manifest."""
        return self.get_test_type(url) == 'crashtest'

    def is_manual_test(self, url):
        """Checks if a WPT is a manual according to the manifest."""
        return self.get_test_type(url) == 'manual'

    def is_print_reftest(self, url):
        """Checks if a WPT is a print reftest according to the manifest."""
        return self.get_test_type(url) == 'print-reftest'

    def is_slow_test(self, url):
        """Checks if a WPT is slow (long timeout) according to the manifest.

        Args:
            url: A WPT URL.

        Returns:
            True if the test is found and is slow, False otherwise.
        """
        test = self._tests_by_url.get(url)
        return test.slow if test else False

    def extract_test_pac(self, url):
        """Get the proxy configuration (PAC) for the test

        Args:
            url: A WPT URL.

        Returns:
            A relative PAC url if noted by the test, None otherwise.
        """
        test = self._tests_by_url.get(url)
        return test and test.pac

    def extract_reference_list(self, url: str) -> List[Tuple[Relation, str]]:
        """Extracts reference information of the specified (print) reference test.

        The return value is a list of (match/not-match, reference path in wpt)
        pairs, like:
           [("==", "/foo/bar/baz-match.html"),
            ("!=", "/foo/bar/baz-mismatch.html")]
        """
        test = self._tests_by_url.get(url)
        if not test:
            return []
        return [(relation, ref) for ref, relation in test.references]

    def extract_fuzzy_metadata(self, url: str) -> FuzzyParameters:
        """Extracts the fuzzy reftest metadata for the specified (print) reference test.

        Although WPT supports multiple fuzzy references for a given test (one
        for each reference file), blinkpy only supports a single reference per
        test. As such, we just return the first fuzzy reference that we find.

        FIXME: It is possible for the references and the fuzzy metadata to be
        listed in different orders, which would then make our 'choose first'
        logic incorrect. Instead we should return a dictionary and let our
        caller select the reference being used.

        See https://web-platform-tests.org/writing-tests/reftests.html#fuzzy-matching

        Args:
            url: A WPT URL.

        Returns:
            A pair of lists representing the maxDifference and totalPixel ranges
            for the test. If the test isn't a reference test or doesn't have
            fuzzy information, a pair of Nones are returned.
        """
        test = self._tests_by_url.get(url)
        test_type = self.get_test_type(url)
        if test_type not in {'reftest', 'print-reftest'}:
            return None, None
        return test.fuzzy_params

    def file_path_for_test_url(self, url: str) -> Optional[str]:
        """Finds the file path for the given test URL.

        Args:
            url: a WPT test URL.

        Returns:
            The path to the file containing this test URL, or None if not found.
        """
        test = self._tests_by_url.get(url)
        return (test.file_path or url) if test else None

    @staticmethod
    def ensure_manifest(port, path=None):
        """Regenerates the WPT MANIFEST.json file.

        Args:
            port: A blinkpy.web_tests.port.Port object.
            path: The path to a WPT root (relative to web_tests, optional).
        """
        fs = port.host.filesystem
        if path is None:
            path = fs.join('external', 'wpt')
        wpt_path = fs.join(port.web_tests_dir(), path)
        manifest_path = fs.join(wpt_path, MANIFEST_NAME)

        # Unconditionally delete local MANIFEST.json to avoid regenerating the
        # manifest from scratch (when version is bumped) or invalid/out-of-date
        # local manifest breaking the runner.
        if fs.exists(manifest_path):
            _log.debug('Removing existing manifest file "%s".', manifest_path)
            fs.remove(manifest_path)

        # TODO(crbug.com/853815): perhaps also cache the manifest for wpt_internal.
        #
        # `url_base` should match those of `web_tests/wptrunner.blink.ini` (or
        # the implicit root `/` URL base).
        if path.startswith('external'):
            base_manifest_path = fs.join(port.web_tests_dir(), 'external',
                                         BASE_MANIFEST_NAME)
            if fs.exists(base_manifest_path):
                _log.debug('Copying base manifest from "%s" to "%s".',
                           base_manifest_path, manifest_path)
                fs.copyfile(base_manifest_path, manifest_path)
            else:
                _log.error('Manifest base not found at "%s".',
                           base_manifest_path)
            url_base = '/'
        elif path.startswith('wpt_internal'):
            url_base = '/wpt_internal/'

        WPTManifest.generate_manifest(port, wpt_path, url_base)

        if fs.isfile(manifest_path):
            _log.info(
                f'Manifest generation completed for {url_base!r} ({path})')
        else:
            _log.error(
                f'Manifest generation failed for {url_base!r} ({path}); '
                'creating an empty MANIFEST.json...')
            fs.write_text_file(manifest_path, '{}')

    @staticmethod
    def generate_manifest(port, dest_path, url_base: str = '/'):
        """Generates MANIFEST.json on the specified directory."""
        wpt_exec_path = PathFinder(
            port.host.filesystem).path_from_chromium_base(
                'third_party', 'wpt_tools', 'wpt', 'wpt')
        cmd = [
            port.python3_command(),
            wpt_exec_path,
            'manifest',
            '-v',
            '--no-download',
            f'--tests-root={dest_path}',
            f'--url-base={url_base}',
        ]

        # ScriptError will be raised if the command fails.
        # This will also include stderr in the exception message.
        output = port.host.executive.run_command(cmd, timeout_seconds=600)
        if output:
            _log.debug('Output: %s', output)
chromium/third_party/blink/tools/blinkpy/w3c/wpt_manifest.py