chromium/components/url_pattern_index/url_pattern_index.h

// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_INDEX_H_
#define COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_INDEX_H_

#include <stddef.h>
#include <stdint.h>

#include <map>
#include <optional>
#include <string_view>
#include <vector>

#include "base/containers/flat_set.h"
#include "base/functional/callback_forward.h"
#include "base/memory/raw_ptr.h"
#include "components/url_pattern_index/closed_hash_map.h"
#include "components/url_pattern_index/flat/url_pattern_index_generated.h"
#include "components/url_pattern_index/proto/rules.pb.h"
#include "components/url_pattern_index/uint64_hasher.h"
#include "components/url_pattern_index/url_pattern.h"
#include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h"

class GURL;

namespace url {
class Origin;
}

namespace url_pattern_index {

// The integer type used to represent N-grams.
NGram;
// The hasher used for hashing N-grams.
NGramHasher;
// The hash table probe sequence used both by UrlPatternIndex and its builder.
NGramHashTableProber;

// FlatBuffer offset aliases.
UrlRuleOffset;
UrlPatternIndexOffset;

FlatStringOffset;
FlatDomains;
FlatDomainsOffset;

struct OffsetVectorCompare {};
FlatDomainMap;

constexpr size_t kNGramSize =;
static_assert;

// The default element types mask as specified by the flatbuffer schema.
constexpr uint16_t kDefaultFlatElementTypesMask =;

// The default element types mask used by a proto::UrlRule.
constexpr uint32_t kDefaultProtoElementTypesMask =;

// Serializes the |rule| to the FlatBuffer |builder|, and returns an offset to
// it in the resulting buffer. Returns null offset iff the |rule| could not be
// serialized because of unsupported options or it is otherwise invalid.
//
// |domain_map| Should point to a non-nullptr map of domain vectors to their
// existing offsets. It is used to de-dupe domain vectors in the serialized
// rules.
UrlRuleOffset SerializeUrlRule(const proto::UrlRule& rule,
                               flatbuffers::FlatBufferBuilder* builder,
                               FlatDomainMap* domain_map);

// Performs three-way comparison between two domains. In the total order defined
// by this predicate, the lengths of domains will be monotonically decreasing.
// Domains of same length are ordered in lexicographic order.
// Returns a negative value if |lhs_domain| should be ordered before
// |rhs_domain|, zero if |lhs_domain| is equal to |rhs_domain| and a positive
// value if |lhs_domain| should be ordered after |rhs_domain|.
int CompareDomains(std::string_view lhs_domain, std::string_view rhs_domain);

// The current format version of UrlPatternIndex.
// Increase this value when introducing an incompatible change to the
// UrlPatternIndex schema (flat/url_pattern_index.fbs). url_pattern_index
// clients can use this as a signal to rebuild rulesets.
constexpr int kUrlPatternIndexFormatVersion =;

// The class used to construct an index over the URL patterns of a set of URL
// rules. The rules themselves need to be converted to FlatBuffers format by the
// client of this class, as well as persisted into the |flat_builder| that is
// supplied in the constructor.
class UrlPatternIndexBuilder {};

// Encapsulates a read-only index built over the URL patterns of a set of URL
// rules, and provides fast matching of network requests against these rules.
class UrlPatternIndexMatcher {};

// Returns whether the `rule` is considered "generic". A generic rule is one
// whose initator domain list is either empty or contains only negative domains.
bool IsRuleGeneric(const flat::UrlRule& rule);

// Returns whether the `origin` matches the initiator domain list of the `rule`.
// A match means that the longest domain in `domains` that `origin` is a
// sub-domain of is not an exception OR all the `domains` are exceptions and
// neither matches the `origin`. Thus, domain filters with more domain
// components trump filters with fewer domain components, i.e. the more specific
// a filter is, the higher the priority.
bool DoesOriginMatchInitiatorDomainList(const url::Origin& origin,
                                        const flat::UrlRule& rule);

// Returns whether the request URL matches the request domain list of the
// `rule`. See `DoesOriginMatchInitiatorDomainList` for an explanation of the
// matching logic.
bool DoesURLMatchRequestDomainList(const UrlPattern::UrlInfo& url,
                                   const flat::UrlRule& rule);

// Returns whether the request matches flags of the specified `rule`. Takes into
// account:
//  - `element_type` of the requested resource, if not *_NONE.
//  - `activation_type` for a subdocument request, if not *_NONE.
//  - `request_method` of the request, if not *_NONE.
//  - Whether the resource `is_third_party` w.r.t. its embedding document.
//  - Options specified by the embedder via `embedder_conditions_matcher`.
bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
                        flat::ElementType element_type,
                        flat::ActivationType activation_type,
                        flat::RequestMethod request_method,
                        bool is_third_party,
                        const UrlPatternIndexMatcher::EmbedderConditionsMatcher&
                            embedder_conditions_matcher);

}  // namespace url_pattern_index

#endif  // COMPONENTS_URL_PATTERN_INDEX_URL_PATTERN_INDEX_H_