chromium/components/url_matcher/url_matcher.h

// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_URL_MATCHER_URL_MATCHER_H_
#define COMPONENTS_URL_MATCHER_URL_MATCHER_H_

#include <stddef.h>

#include <memory>
#include <set>
#include <vector>

#include "base/memory/raw_ptr.h"
#include "base/memory/ref_counted.h"
#include "base/substring_set_matcher/substring_set_matcher.h"
#include "base/types/expected.h"
#include "components/url_matcher/regex_set_matcher.h"
#include "components/url_matcher/url_matcher_export.h"
#include "net/base/ip_address.h"

class GURL;

namespace url_matcher {

// This class represents a single URL matching condition, e.g. a match on the
// host suffix or the containment of a string in the query component of a GURL.
//
// The difference from a simple MatcherStringPattern is that this also supports
// checking whether the {Host, Path, Query} of a URL contains a string. The
// reduction of URL matching conditions to MatcherStringPatterns conducted by
// URLMatcherConditionFactory is not capable of expressing that alone.
//
// Also supported is matching regular expressions against the URL (URL_MATCHES).
class URL_MATCHER_EXPORT URLMatcherCondition {};

// Class to map the problem of finding {host, path, query} {prefixes, suffixes,
// containments, and equality} in GURLs to the substring matching problem.
//
// Say, you want to check whether the path of a URL starts with "/index.html".
// This class preprocesses a URL like "www.google.com/index.html" into something
// like "www.google.com|/index.html". After preprocessing, you can search for
// "|/index.html" in the string and see that this candidate URL actually has
// a path that starts with "/index.html". On the contrary,
// "www.google.com/images/index.html" would be normalized to
// "www.google.com|/images/index.html". It is easy to see that it contains
// "/index.html" but the path of the URL does not start with "/index.html".
//
// This preprocessing is important if you want to match a URL against many
// patterns because it reduces the matching to a "discover all substrings
// of a dictionary in a text" problem, which can be solved very efficiently
// by the Aho-Corasick algorithm.
//
// IMPORTANT: The URLMatcherConditionFactory owns the MatcherStringPattern
// referenced by created URLMatcherConditions. Therefore, it must outlive
// all created URLMatcherCondition and the SubstringSetMatcher.
class URL_MATCHER_EXPORT URLMatcherConditionFactory {};

// This class represents a single URL query matching condition. The query
// matching is done as a search for a key and optionally a value.
// The matching makes use of CanonicalizeURLForComponentSearches to ensure that
// the key starts and ends (optionally) with the right marker.
class URL_MATCHER_EXPORT URLQueryElementMatcherCondition {};

// This class represents a filter for the URL scheme to be hooked up into a
// URLMatcherConditionSet.
class URL_MATCHER_EXPORT URLMatcherSchemeFilter {};

// This class represents a filter for port numbers to be hooked up into a
// URLMatcherConditionSet.
class URL_MATCHER_EXPORT URLMatcherPortFilter {};

// This class represents a filter for CIDR blocks to be hooked up into a
// URLMatcherConditionSet.
class URL_MATCHER_EXPORT URLMatcherCidrBlockFilter {};

// This class represents a set of conditions that all need to match on a
// given URL in order to be considered a match.
class URL_MATCHER_EXPORT URLMatcherConditionSet
    : public base::RefCounted<URLMatcherConditionSet> {};

// This class allows matching one URL against a large set of
// URLMatcherConditionSets at the same time.
class URL_MATCHER_EXPORT URLMatcher {};

}  // namespace url_matcher

#endif  // COMPONENTS_URL_MATCHER_URL_MATCHER_H_