chromium/components/omnibox/browser/autocomplete_match_classification.h

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_
#define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_

#include "components/omnibox/browser/autocomplete_match.h"
#include "components/omnibox/browser/in_memory_url_index_types.h"

// Finds the matches for |find_text| in |text|, classifies those matches,
// merges those classifications with |original_class|, and returns the merged
// classifications.
// If |text_is_search_query| is false, matches are classified as MATCH, and
// non-matches are classified as NONE. Otherwise, if |text_is_search_query| is
// true, matches are classified as NONE, and non-matches are classified as
// MATCH. This is done to mimic the behavior of SearchProvider which decorates
// matches according to the approach used by Google Suggest.
// |find_text| and |text| will be lowercased.
//
//   For example, given
//     |find_text| is "sp new",
//     |text| is "Sports and News at sports.somesite.com - visit us!",
//     |text_is_search_query| is false, and
//     |original_class| is {{0, NONE}, {19, URL}, {38, NONE}} (marking
//     "sports.somesite.com" as a URL),
//   Then this will return
//     {{0, MATCH}, {2, NONE}, {11, MATCH}, {14, NONE}, {19, URL|MATCH},
//     {21, URL}, {38, NONE}}; i.e.,
//     "Sports and News at sports.somesite.com - visit us!"
//      ^ ^        ^  ^    ^ ^                ^
//      0 2        11 14  19 21               38
//      M N        M  N  U|M U                N
//
//   For example, given
//     |find_text| is "canal",
//     |text| is "panama canal",
//     |text_is_search_query| is true, and
//     |original_class| is {{0, NONE}},
//   Then this will return
//     {{0,MATCH}, {7, NONE}}; i.e.,
//     "panama canal"
//      ^      ^
//      0 M    7 N
ACMatchClassifications ClassifyAllMatchesInString(
    const std::u16string& find_text,
    const std::u16string& text,
    const bool text_is_search_query,
    const ACMatchClassifications& original_class = ACMatchClassifications());

// Cleans |text|, splits |find_text| into terms by breaking on whitespaces and
// most symbols, looks for those terms in cleaned |text|, and returns the
// matched terms sorted, deduped, and possibly filtered-by-word-boundary.
// If |allow_prefix_matching| is true, and |find_text| is an exact prefix
// (ignoring case but considering symbols) of |text|, then only a single term
// representing the prefix will be returned. E.g., for |find_text| "how to tie"
// and |text| "how to tie a tie", this will return "[how to tie] a tie". On the
// other hand, for |find_text| "to tie", this will return "how [to] [tie] a
// [tie]".
// If |allow_mid_word_matching| is false, the returned terms will be
// filtered-by-word-boundary. E.g., for |find_text| "ho to ie", |text|
// "how to tie a tie", and |allow_mid_word_matching| false, this will return
// "[ho]w [to] tie a tie". On the other hand, for |allow_mid_word_matching|
// true, this will return "[ho]w [to] t[ie] a t[ie]."
TermMatches FindTermMatches(std::u16string find_text,
                            std::u16string text,
                            bool allow_prefix_matching = true,
                            bool allow_mid_word_matching = false);

// A utility function called by `FindTermMatches` to find valid matches in text
// for the given terms. Matched terms are sorted, deduped, and possibly
// filtered-by-word-boundary. If `allow_mid_word_matching` is false, the
// returned terms will be filtered-by-word-boundary. E.g., for `find_text` "ho
// to ie", `text` "how to tie a tie", and `allow_mid_word_matching` false, this
// will return "[ho]w [to] tie a tie". On the other hand, for
// |allow_mid_word_matching| true, this will return "[ho]w [to] t[ie] a t[ie]."
TermMatches FindTermMatchesForTerms(const String16Vector& find_terms,
                                    const WordStarts& find_terms_word_starts,
                                    const std::u16string& cleaned_text,
                                    const WordStarts& text_word_starts,
                                    bool allow_mid_word_matching = false);

// Return an ACMatchClassifications structure given the |matches| to highlight.
// |matches| can be retrieved from calling FindTermMatches. |text_length| should
// be the full length (not the length of the truncated text clean returns) of
// the text being classified. It is used to ensure the trailing classification
// is correct; i.e. if matches end at 20, and text_length is greater than 20,
// ClassifyTermMatches will add a non_match_style classification with offset 20.
// |match_style| and |non_match_style| specify the classifications to use for
// matched and non-matched text.
ACMatchClassifications ClassifyTermMatches(TermMatches matches,
                                           size_t text_length,
                                           int match_style,
                                           int non_match_style);

#endif  // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_MATCH_CLASSIFICATION_H_