// Copyright 2015 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ #define NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_ #include <stddef.h> #include <cstdint> #include <string_view> #include "base/containers/span.h" #include "base/memory/raw_ptr_exclusion.h" #include "base/memory/raw_span.h" #include "net/base/net_export.h" namespace net { enum { … }; // Looks up the string |key| with length |key_length| in a fixed set of // strings. The set of strings must be known at compile time. It is converted to // a graph structure named a DAFSA (Deterministic Acyclic Finite State // Automaton) by the script make_dafsa.py during compilation. This permits // efficient (in time and space) lookup. The graph generated by make_dafsa.py // takes the form of a constant byte array which should be supplied via the // |graph| parameter. The return value is kDafsaNotFound, kDafsaFound, or a // bitmap consisting of one or more of kDafsaExceptionRule, kDafsaWildcardRule // and kDafsaPrivateRule ORed together. // // TODO(nick): Replace this with FixedSetIncrementalLookup everywhere. NET_EXPORT int LookupStringInFixedSet(base::span<const uint8_t> graph, const char* key, size_t key_length); // Looks up the longest matching suffix for |host| with length |length| in a // reversed DAFSA. Partial matches must begin at a new component, i.e. host // itself could match or a host part starting after a dot could match. // If no match was found a value of 0 is written to |suffix_length| and the // value kDafsaNotFound is returned, otherwise the length of the longest match // is written to |suffix_length| and the type of the longest match is returned. int LookupSuffixInReversedSet(base::span<const uint8_t> graph, bool include_private, std::string_view host, size_t* suffix_length); // FixedSetIncrementalLookup provides efficient membership and prefix queries // against a fixed set of strings. The set of strings must be known at compile // time. The set is converted to a graph structure named a DAFSA (Deterministic // Acyclic Finite State Automaton) by the script //net/tools/dafsa/make_dafsa.py // during compilation. The conversion generates a C++ header file defining the // encoded graph as a constant byte array. This class provides a fast, constant- // space lookup operation against such byte arrays. // // The lookup proceeds incrementally, with input characters provided one at a // time. This approach allow queries of the form: "given an input string, which // prefixes of that string that appear in the fixed set?" As the matching // prefixes (and their result codes) are enumerated, the most suitable match // among them can be selected in a single pass. // // This class can also be used to perform suffix queries (instead of prefix // queries) against a fixed set, so long as the DAFSA is constructed on reversed // values, and the input is provided in reverse order. // // Example usage for simple membership query; |input| is a std::string: // // FixedSetIncrementalLookup lookup(kDafsa, sizeof(kDafsa)); // for (char c : input) { // if (!lookup.Advance(c)) // return false; // } // return lookup.GetResultForCurrentSequence() != kDafsaNotFound; // // Example usage for 'find longest prefix in set with result code == 3' query: // // FixedSetIncrementalLookup prefix_lookup(kDafsa, sizeof(kDafsa)); // size_t longest_match_end = 0; // for (size_t i = 0; i < input.length(); ++i) { // if (!prefix_lookup.Advance(input[i])) // break; // if (prefix_lookup.GetResultForCurrentSequence() == 3) // longest_match_end = (i + 1); // } // return input.substr(0, longest_match_end); // class NET_EXPORT FixedSetIncrementalLookup { … }; } // namespace net #endif // NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_