// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
namespace url_pattern_index.flat;
// NOTE: Increment url_pattern_index::kUrlPatternIndexFormatVersion whenever
// making a breaking change to this schema.
// Corresponds to url_pattern_index::proto::UrlPatternType.
enum UrlPatternType : ubyte {
SUBSTRING,
WILDCARDED,
REGEXP,
}
// Corresponds to url_pattern_index::proto::AnchorType.
enum AnchorType : ubyte {
NONE,
BOUNDARY,
SUBDOMAIN,
}
// URL rule matching options. These correspond to multiple fields of
// url_pattern_index::proto::UrlRule, but here, they are represented as flags
// of the same bitmask to allow for compact storage.
enum OptionFlag : ubyte (bit_flags) {
IS_ALLOWLIST,
APPLIES_TO_FIRST_PARTY,
APPLIES_TO_THIRD_PARTY,
IS_CASE_INSENSITIVE,
}
// The options controlling whether or not to activate filtering for subresources
// of documents that match the URL pattern of the rule.
// Corresponds to url_pattern_index::proto::ActivationType.
enum ActivationType : ubyte (bit_flags) {
DOCUMENT, // Disable all rules on the page.
GENERIC_BLOCK, // Disable generic URL rules on the page.
}
// The types of subresource requests that a URL rule should be applied to.
enum ElementType : ushort (bit_flags) {
OTHER,
SCRIPT,
IMAGE,
STYLESHEET,
OBJECT,
XMLHTTPREQUEST,
// TODO(crbug.com/40516600): Remove OBJECT_SUBREQUEST type once
// url_pattern_index no longer has a dependency on proto::UrlRule.
OBJECT_SUBREQUEST,
SUBDOCUMENT,
PING,
MEDIA,
FONT,
WEBSOCKET,
WEBTRANSPORT,
WEBBUNDLE,
CSP_REPORT,
MAIN_FRAME,
// Note: Update the default value for |element_types| field in UrlRule, on
// adding/removing values from this enum.
}
// The request methods that a URL rule should be applied to.
enum RequestMethod : ushort (bit_flags) {
CONNECT,
DELETE,
GET,
HEAD,
OPTIONS,
PATCH,
POST,
PUT,
OTHER_HTTP, // Matches HTTP(s) request methods not listed above.
NON_HTTP, // Matches non-HTTP(s) requests.
// Note: Update the default value for `request_methods` field in UrlRule, on
// adding/removing values from this enum.
}
// The flat representation of a single URL rule. For more details regarding the
// fields please see the comments to url_pattern_index::proto::UrlRule.
table UrlRule {
// Rule matching options, a bitmask consisting of OptionFlags.
options : ubyte;
// A bitmask of ElementType. Equals ElementType_ANY & ~ElementType_MAIN_FRAME
// by default for compactness. We expect most rules to not use
// ElementType_MAIN_FRAME. Keep this in sync with
// url_pattern_index::kDefaultFlatElementTypesMask.
element_types : ushort = 32767;
// A bitmask of RequestMethod. Equals RequestMethod_ANY by default.
request_methods : ushort = 1023;
// A bitmask of ActivationType. Disables all activation types by default.
activation_types : ubyte = 0;
// Use SUBSTRING as default, since it's the most used pattern type. Same as
// the corresponding proto::UrlRule::url_pattern_type.
url_pattern_type : UrlPatternType = SUBSTRING;
// Use NONE as default, since most of the rules are not anchored.
anchor_left : AnchorType = NONE;
anchor_right : AnchorType = NONE;
// The list of initiator and request domains to be included/excluded from the
// filter's affected set. Each should either be null or have at least a single
// element. The domains should be in lower-case and kept sorted as defined by
// url_pattern_index::CompareDomains. The entries must consist of only ascii
// characters. Use punycode encoding for internationalized domains.
initiator_domains_included : [string];
initiator_domains_excluded : [string];
request_domains_included : [string];
request_domains_excluded : [string];
// A URL pattern in the format defined by |url_pattern_type|. This should
// only consist of ascii characters, since it's matched against a url where
// the host is encoded in the punycode format (in case of internationalized
// domains) and any other non-ascii characters are percent-escaped in utf-8.
// This should be in lower case if the rule is case-insensitive.
url_pattern : string;
// An id which uniquely identifies the rule. Clients must ensure uniqueness if
// they use this field.
id : uint;
// Priority of the rule. Larger the value, greater the priority.
priority : uint64;
// Binary blob for any embedder specific rule conditions. When specified,
// these should be interpreted by the url_pattern_index embedder.
embedder_conditions : [ubyte];
}
// Contains an N-gram (acting as a key in a hash table) and a list of URL rules
// associated with that N-gram.
table NGramToRules {
// A string consisting of N (up to 8) ascii-only non-special characters, which
// are stored in the lowest N non-zero bytes, lower bytes corresponding to
// later symbols. These are lower-cased to support case-insensitive matching.
ngram : ulong;
// The list of rules containing |ngram| as a substring of their URL pattern.
// Sorted in descending order of rule priority.
rule_list : [UrlRule];
}
// A data structure used to select only a handful of URL rule candidates that
// need to be matched against a certain resource URL.
table UrlPatternIndex {
// The N of an N-gram index. Note: |n| should be between 1 and 8.
n : uint;
// A hash table with open addressing. The keys of the table are N-grams.
ngram_index : [NGramToRules];
// The slot that is pointed to by all empty slots of |ngram_index| hash table.
// Note: This is a workaround needed because null offsets are not allowed as
// elements of FlatBuffer arrays.
ngram_index_empty_slot : NGramToRules;
// A list storing the rules that doesn't contain any valid N-grams in their
// URL patterns. Contains all the REGEXP rules as well. Sorted in descending
// order of rule priority.
// TODO(pkalinnikov): Think about better implementation for the fallback
// index. Possibly make it a hash map and maybe merge it with the N-gram
// index, since we can treat any sequence of characters shorter than N as an
// N-gram with zero bytes used for padding.
fallback_rules : [UrlRule];
}
root_type UrlPatternIndex;