// Copyright 2017 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_INDEX_H_ #define COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_INDEX_H_ #include <stddef.h> #include <stdint.h> #include <map> #include <optional> #include <string_view> #include <vector> #include "base/containers/flat_set.h" #include "base/functional/callback_forward.h" #include "base/memory/raw_ptr.h" #include "components/url_pattern_index/closed_hash_map.h" #include "components/url_pattern_index/flat/url_pattern_index_generated.h" #include "components/url_pattern_index/proto/rules.pb.h" #include "components/url_pattern_index/uint64_hasher.h" #include "components/url_pattern_index/url_pattern.h" #include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" class GURL; namespace url { class Origin; } namespace url_pattern_index { // The integer type used to represent N-grams. NGram; // The hasher used for hashing N-grams. NGramHasher; // The hash table probe sequence used both by UrlPatternIndex and its builder. NGramHashTableProber; // FlatBuffer offset aliases. UrlRuleOffset; UrlPatternIndexOffset; FlatStringOffset; FlatDomains; FlatDomainsOffset; struct OffsetVectorCompare { … }; FlatDomainMap; constexpr size_t kNGramSize = …; static_assert …; // The default element types mask as specified by the flatbuffer schema. constexpr uint16_t kDefaultFlatElementTypesMask = …; // The default element types mask used by a proto::UrlRule. constexpr uint32_t kDefaultProtoElementTypesMask = …; // Serializes the |rule| to the FlatBuffer |builder|, and returns an offset to // it in the resulting buffer. Returns null offset iff the |rule| could not be // serialized because of unsupported options or it is otherwise invalid. // // |domain_map| Should point to a non-nullptr map of domain vectors to their // existing offsets. It is used to de-dupe domain vectors in the serialized // rules. UrlRuleOffset SerializeUrlRule(const proto::UrlRule& rule, flatbuffers::FlatBufferBuilder* builder, FlatDomainMap* domain_map); // Performs three-way comparison between two domains. In the total order defined // by this predicate, the lengths of domains will be monotonically decreasing. // Domains of same length are ordered in lexicographic order. // Returns a negative value if |lhs_domain| should be ordered before // |rhs_domain|, zero if |lhs_domain| is equal to |rhs_domain| and a positive // value if |lhs_domain| should be ordered after |rhs_domain|. int CompareDomains(std::string_view lhs_domain, std::string_view rhs_domain); // The current format version of UrlPatternIndex. // Increase this value when introducing an incompatible change to the // UrlPatternIndex schema (flat/url_pattern_index.fbs). url_pattern_index // clients can use this as a signal to rebuild rulesets. constexpr int kUrlPatternIndexFormatVersion = …; // The class used to construct an index over the URL patterns of a set of URL // rules. The rules themselves need to be converted to FlatBuffers format by the // client of this class, as well as persisted into the |flat_builder| that is // supplied in the constructor. class UrlPatternIndexBuilder { … }; // Encapsulates a read-only index built over the URL patterns of a set of URL // rules, and provides fast matching of network requests against these rules. class UrlPatternIndexMatcher { … }; // Returns whether the `rule` is considered "generic". A generic rule is one // whose initator domain list is either empty or contains only negative domains. bool IsRuleGeneric(const flat::UrlRule& rule); // Returns whether the `origin` matches the initiator domain list of the `rule`. // A match means that the longest domain in `domains` that `origin` is a // sub-domain of is not an exception OR all the `domains` are exceptions and // neither matches the `origin`. Thus, domain filters with more domain // components trump filters with fewer domain components, i.e. the more specific // a filter is, the higher the priority. bool DoesOriginMatchInitiatorDomainList(const url::Origin& origin, const flat::UrlRule& rule); // Returns whether the request URL matches the request domain list of the // `rule`. See `DoesOriginMatchInitiatorDomainList` for an explanation of the // matching logic. bool DoesURLMatchRequestDomainList(const UrlPattern::UrlInfo& url, const flat::UrlRule& rule); // Returns whether the request matches flags of the specified `rule`. Takes into // account: // - `element_type` of the requested resource, if not *_NONE. // - `activation_type` for a subdocument request, if not *_NONE. // - `request_method` of the request, if not *_NONE. // - Whether the resource `is_third_party` w.r.t. its embedding document. // - Options specified by the embedder via `embedder_conditions_matcher`. bool DoesRuleFlagsMatch(const flat::UrlRule& rule, flat::ElementType element_type, flat::ActivationType activation_type, flat::RequestMethod request_method, bool is_third_party, const UrlPatternIndexMatcher::EmbedderConditionsMatcher& embedder_conditions_matcher); } // namespace url_pattern_index #endif // COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_INDEX_H_