// Copyright 2013 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef COMPONENTS_URL_MATCHER_URL_MATCHER_H_ #define COMPONENTS_URL_MATCHER_URL_MATCHER_H_ #include <stddef.h> #include <memory> #include <set> #include <vector> #include "base/memory/raw_ptr.h" #include "base/memory/ref_counted.h" #include "base/substring_set_matcher/substring_set_matcher.h" #include "base/types/expected.h" #include "components/url_matcher/regex_set_matcher.h" #include "components/url_matcher/url_matcher_export.h" #include "net/base/ip_address.h" class GURL; namespace url_matcher { // This class represents a single URL matching condition, e.g. a match on the // host suffix or the containment of a string in the query component of a GURL. // // The difference from a simple MatcherStringPattern is that this also supports // checking whether the {Host, Path, Query} of a URL contains a string. The // reduction of URL matching conditions to MatcherStringPatterns conducted by // URLMatcherConditionFactory is not capable of expressing that alone. // // Also supported is matching regular expressions against the URL (URL_MATCHES). class URL_MATCHER_EXPORT URLMatcherCondition { … }; // Class to map the problem of finding {host, path, query} {prefixes, suffixes, // containments, and equality} in GURLs to the substring matching problem. // // Say, you want to check whether the path of a URL starts with "/index.html". // This class preprocesses a URL like "www.google.com/index.html" into something // like "www.google.com|/index.html". After preprocessing, you can search for // "|/index.html" in the string and see that this candidate URL actually has // a path that starts with "/index.html". On the contrary, // "www.google.com/images/index.html" would be normalized to // "www.google.com|/images/index.html". It is easy to see that it contains // "/index.html" but the path of the URL does not start with "/index.html". // // This preprocessing is important if you want to match a URL against many // patterns because it reduces the matching to a "discover all substrings // of a dictionary in a text" problem, which can be solved very efficiently // by the Aho-Corasick algorithm. // // IMPORTANT: The URLMatcherConditionFactory owns the MatcherStringPattern // referenced by created URLMatcherConditions. Therefore, it must outlive // all created URLMatcherCondition and the SubstringSetMatcher. class URL_MATCHER_EXPORT URLMatcherConditionFactory { … }; // This class represents a single URL query matching condition. The query // matching is done as a search for a key and optionally a value. // The matching makes use of CanonicalizeURLForComponentSearches to ensure that // the key starts and ends (optionally) with the right marker. class URL_MATCHER_EXPORT URLQueryElementMatcherCondition { … }; // This class represents a filter for the URL scheme to be hooked up into a // URLMatcherConditionSet. class URL_MATCHER_EXPORT URLMatcherSchemeFilter { … }; // This class represents a filter for port numbers to be hooked up into a // URLMatcherConditionSet. class URL_MATCHER_EXPORT URLMatcherPortFilter { … }; // This class represents a filter for CIDR blocks to be hooked up into a // URLMatcherConditionSet. class URL_MATCHER_EXPORT URLMatcherCidrBlockFilter { … }; // This class represents a set of conditions that all need to match on a // given URL in order to be considered a match. class URL_MATCHER_EXPORT URLMatcherConditionSet : public base::RefCounted<URLMatcherConditionSet> { … }; // This class allows matching one URL against a large set of // URLMatcherConditionSets at the same time. class URL_MATCHER_EXPORT URLMatcher { … }; } // namespace url_matcher #endif // COMPONENTS_URL_MATCHER_URL_MATCHER_H_