chromium/net/base/lookup_string_in_fixed_set.h

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_
#define NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_

#include <stddef.h>

#include <cstdint>
#include <string_view>

#include "base/containers/span.h"
#include "base/memory/raw_ptr_exclusion.h"
#include "base/memory/raw_span.h"
#include "net/base/net_export.h"

namespace net {

enum {};

// Looks up the string |key| with length |key_length| in a fixed set of
// strings. The set of strings must be known at compile time. It is converted to
// a graph structure named a DAFSA (Deterministic Acyclic Finite State
// Automaton) by the script make_dafsa.py during compilation. This permits
// efficient (in time and space) lookup. The graph generated by make_dafsa.py
// takes the form of a constant byte array which should be supplied via the
// |graph| parameter.  The return value is kDafsaNotFound, kDafsaFound, or a
// bitmap consisting of one or more of kDafsaExceptionRule, kDafsaWildcardRule
// and kDafsaPrivateRule ORed together.
//
// TODO(nick): Replace this with FixedSetIncrementalLookup everywhere.
NET_EXPORT int LookupStringInFixedSet(base::span<const uint8_t> graph,
                                      const char* key,
                                      size_t key_length);

// Looks up the longest matching suffix for |host| with length |length| in a
// reversed DAFSA. Partial matches must begin at a new component, i.e. host
// itself could match or a host part starting after a dot could match.
// If no match was found a value of 0 is written to |suffix_length| and the
// value kDafsaNotFound is returned, otherwise the length of the longest match
// is written to |suffix_length| and the type of the longest match is returned.
int LookupSuffixInReversedSet(base::span<const uint8_t> graph,
                              bool include_private,
                              std::string_view host,
                              size_t* suffix_length);

// FixedSetIncrementalLookup provides efficient membership and prefix queries
// against a fixed set of strings. The set of strings must be known at compile
// time. The set is converted to a graph structure named a DAFSA (Deterministic
// Acyclic Finite State Automaton) by the script //net/tools/dafsa/make_dafsa.py
// during compilation. The conversion generates a C++ header file defining the
// encoded graph as a constant byte array. This class provides a fast, constant-
// space lookup operation against such byte arrays.
//
// The lookup proceeds incrementally, with input characters provided one at a
// time. This approach allow queries of the form: "given an input string, which
// prefixes of that string that appear in the fixed set?" As the matching
// prefixes (and their result codes) are enumerated, the most suitable match
// among them can be selected in a single pass.
//
// This class can also be used to perform suffix queries (instead of prefix
// queries) against a fixed set, so long as the DAFSA is constructed on reversed
// values, and the input is provided in reverse order.
//
// Example usage for simple membership query; |input| is a std::string:
//
//    FixedSetIncrementalLookup lookup(kDafsa, sizeof(kDafsa));
//    for (char c : input) {
//      if (!lookup.Advance(c))
//         return false;
//    }
//    return lookup.GetResultForCurrentSequence() != kDafsaNotFound;
//
// Example usage for 'find longest prefix in set with result code == 3' query:
//
//    FixedSetIncrementalLookup prefix_lookup(kDafsa, sizeof(kDafsa));
//    size_t longest_match_end = 0;
//    for (size_t i = 0; i < input.length(); ++i) {
//      if (!prefix_lookup.Advance(input[i]))
//         break;
//      if (prefix_lookup.GetResultForCurrentSequence() == 3)
//        longest_match_end = (i + 1);
//    }
//    return input.substr(0, longest_match_end);
//
class NET_EXPORT FixedSetIncrementalLookup {};

}  // namespace net

#endif  // NET_BASE_LOOKUP_STRING_IN_FIXED_SET_H_