chromium/third_party/wpt_tools/wpt/tools/gitignore/gitignore.py

import re
import os
import itertools
from collections import defaultdict
from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Pattern, Tuple, TypeVar,
                    Union, cast)


T = TypeVar('T')

end_space = re.compile(r"([^\\]\s)*$")


def fnmatch_translate(pat: bytes) -> Tuple[bool, Pattern[bytes]]:
    parts = []
    seq: Optional[int] = None
    i = 0
    any_char = b"[^/]"
    if pat[0:1] == b"/":
        parts.append(b"^")
        pat = pat[1:]
    else:
        # By default match the entire path up to a /
        # but if / doesn't appear in the pattern we will mark is as
        # a name pattern and just produce a pattern that matches against
        # the filename
        parts.append(b"^(?:.*/)?")

    name_pattern = True
    if pat[-1:] == b"/":
        # If the last character is / match this directory or any subdirectory
        pat = pat[:-1]
        suffix = b"(?:/|$)"
    else:
        suffix = b"$"
    while i < len(pat):
        c = pat[i:i+1]
        if c == b"\\":
            if i < len(pat) - 1:
                i += 1
                c = pat[i:i+1]
                parts.append(re.escape(c))
            else:
                raise ValueError
        elif seq is not None:
            # TODO: this doesn't really handle invalid sequences in the right way
            if c == b"]":
                seq = None
                if parts[-1] == b"[":
                    parts = parts[:-1]
                elif parts[-1] == b"^" and parts[-2] == b"[":
                    raise ValueError
                else:
                    parts.append(c)
            elif c == b"-":
                parts.append(c)
            elif c == b"[":
                raise ValueError
            else:
                parts.append(re.escape(c))
        elif c == b"[":
            parts.append(b"[")
            if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"):
                parts.append(b"^")
                i += 1
            seq = i
        elif c == b"*":
            if i < len(pat) - 1 and pat[i+1:i+2] == b"*":
                if i > 0 and pat[i-1:i] != b"/":
                    raise ValueError
                parts.append(b".*")
                i += 1
                if i < len(pat) - 1 and pat[i+1:i+2] != b"/":
                    raise ValueError
            else:
                parts.append(any_char + b"*")
        elif c == b"?":
            parts.append(any_char)
        elif c == b"/" and not seq:
            name_pattern = False
            parts.append(c)
        else:
            parts.append(re.escape(c))
        i += 1

    if name_pattern:
        parts[0] = b"^"

    if seq is not None:
        raise ValueError
    parts.append(suffix)
    try:
        return name_pattern, re.compile(b"".join(parts))
    except Exception:
        raise ValueError

# Regexp matching rules that have to be converted to patterns
pattern_re = re.compile(br".*[\*\[\?]")


def parse_line(line: bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]]:
    line = line.rstrip()
    if not line or line[0:1] == b"#":
        return None

    invert = line[0:1] == b"!"
    if invert:
        line = line[1:]

    dir_only = line[-1:] == b"/"

    if dir_only:
        line = line[:-1]

    # Could make a special case for **/foo, but we don't have any patterns like that
    if not invert and not pattern_re.match(line):
        literal = True
        pattern: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]] = tuple(line.rsplit(b"/", 1))
    else:
        pattern = fnmatch_translate(line)
        literal = False

    return invert, dir_only, literal, pattern


class PathFilter:
    def __init__(self, root: bytes, extras: Optional[List[bytes]] = None, cache: Optional[MutableMapping[bytes, bool]] = None) -> None:
        if root:
            ignore_path: Optional[bytes] = os.path.join(root, b".gitignore")
        else:
            ignore_path = None
        if not ignore_path and not extras:
            self.trivial = True
            return
        self.trivial = False

        self.literals_file: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict)
        self.literals_dir: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict)
        self.patterns_file: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = []
        self.patterns_dir: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = []

        if cache is None:
            cache = {}
        self.cache: MutableMapping[bytes, bool] = cache

        if extras is None:
            extras = []

        if ignore_path and os.path.exists(ignore_path):
            args: Tuple[Optional[bytes], List[bytes]] = (ignore_path, extras)
        else:
            args = None, extras
        self._read_ignore(*args)

    def _read_ignore(self, ignore_path: Optional[bytes], extras: List[bytes]) -> None:
        if ignore_path is not None:
            with open(ignore_path, "rb") as f:
                for line in f:
                    self._read_line(line)
        for line in extras:
            self._read_line(line)

    def _read_line(self, line: bytes) -> None:
        parsed = parse_line(line)
        if not parsed:
            return
        invert, dir_only, literal, rule = parsed

        if invert:
            # For exclude rules, we attach the rules to all preceeding patterns, so
            # that we can match patterns out of order and check if they were later
            # overriden by an exclude rule
            assert not literal
            rule = cast(Tuple[bool, Pattern[bytes]], rule)
            if not dir_only:
                rules_iter: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]] = itertools.chain(
                    itertools.chain(*(item.items() for item in self.literals_dir.values())),
                    itertools.chain(*(item.items() for item in self.literals_file.values())),
                    self.patterns_dir,
                    self.patterns_file)
            else:
                rules_iter = itertools.chain(
                    itertools.chain(*(item.items() for item in self.literals_dir.values())),
                    self.patterns_dir)

            for rules in rules_iter:
                rules[1].append(rule)
        else:
            if literal:
                rule = cast(Tuple[bytes, ...], rule)
                if len(rule) == 1:
                    dir_name, pattern = None, rule[0]  # type: Tuple[Optional[bytes], bytes]
                else:
                    dir_name, pattern = rule
                self.literals_dir[dir_name][pattern] = []
                if not dir_only:
                    self.literals_file[dir_name][pattern] = []
            else:
                rule = cast(Tuple[bool, Pattern[bytes]], rule)
                self.patterns_dir.append((rule, []))
                if not dir_only:
                    self.patterns_file.append((rule, []))

    def filter(self,
               iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
               ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]:
        empty: Dict[Any, Any] = {}
        for dirpath, dirnames, filenames in iterator:
            orig_dirpath = dirpath
            path_sep = os.path.sep.encode()
            if path_sep != b"/":
                dirpath = dirpath.replace(path_sep, b"/")

            keep_dirs: List[Tuple[bytes, T]] = []
            keep_files: List[Tuple[bytes, T]] = []

            for iter_items, literals, patterns, target, suffix in [
                    (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"),
                    (filenames, self.literals_file, self.patterns_file, keep_files, b"")]:
                for item in iter_items:
                    name = item[0]
                    if dirpath:
                        path = b"%s/%s" % (dirpath, name) + suffix
                    else:
                        path = name + suffix
                    if path in self.cache:
                        if not self.cache[path]:
                            target.append(item)
                        continue
                    for rule_dir in [None, dirpath if dirpath != b"." else b""]:
                        if name in literals.get(rule_dir, empty):
                            exclude = literals[rule_dir][name]
                            if not any(rule.match(name if name_only else path)
                                       for name_only, rule in exclude):
                                # Skip this item
                                self.cache[path] = True
                                break
                    else:
                        for (component_only, pattern), exclude in patterns:
                            if component_only:
                                match = pattern.match(name)
                            else:
                                match = pattern.match(path)
                            if match:
                                if not any(rule.match(name if name_only else path)
                                           for name_only, rule in exclude):
                                    # Skip this item
                                    self.cache[path] = True
                                    break
                        else:
                            self.cache[path] = False
                            target.append(item)

            dirnames[:] = keep_dirs
            assert not any(b".git" == name for name, _ in dirnames)
            yield orig_dirpath, dirnames, keep_files

    def __call__(self,
                 iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
                 ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]:
        if self.trivial:
            return iterator

        return self.filter(iterator)


def has_ignore(dirpath: bytes) -> bool:
    return os.path.exists(os.path.join(dirpath, b".gitignore"))