chromium/chrome/browser/ash/app_list/search/local_image_search/search_utils.cc

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/ash/app_list/search/local_image_search/search_utils.h"

#include <string>
#include <string_view>

#include "base/containers/fixed_flat_set.h"
#include "base/no_destructor.h"
#include "base/strings/string_util.h"
#include "chrome/browser/ash/app_list/search/local_image_search/file_search_result.h"

namespace app_list {

std::vector<FileSearchResult> FindIntersection(
    const std::vector<FileSearchResult>& vec1,
    const std::vector<FileSearchResult>& vec2) {
  std::vector<FileSearchResult> result;

  auto it1 = vec1.begin();
  auto it2 = vec2.begin();

  while (it1 != vec1.end() && it2 != vec2.end()) {
    if (it1->file_path < it2->file_path) {
      ++it1;
    } else if (it2->file_path < it1->file_path) {
      ++it2;
    } else {
      result.emplace_back(FileSearchResult(it1->file_path, it1->last_modified,
                                           it1->relevance + it2->relevance));
      ++it1;
      ++it2;
    }
  }

  return result;
}

bool IsStopWord(const std::string& word) {
  DCHECK(!base::IsAsciiUpper(word[0]));
  static constexpr auto kEnglishStopwords =
      base::MakeFixedFlatSet<std::string_view>(base::sorted_unique,
                                               {
                                                   "'ll",
                                                   "'ve",
                                                   "I",
                                                   "a",
                                                   "a's",
                                                   "able",
                                                   "about",
                                                   "above",
                                                   "abroad",
                                                   "abst",
                                                   "accordance",
                                                   "according",
                                                   "accordingly",
                                                   "across",
                                                   "act",
                                                   "actually",
                                                   "added",
                                                   "adj",
                                                   "adopted",
                                                   "affected",
                                                   "affecting",
                                                   "affects",
                                                   "after",
                                                   "afterwards",
                                                   "again",
                                                   "against",
                                                   "ago",
                                                   "ah",
                                                   "ahead",
                                                   "ain't",
                                                   "all",
                                                   "allow",
                                                   "allows",
                                                   "almost",
                                                   "alone",
                                                   "along",
                                                   "alongside",
                                                   "already",
                                                   "also",
                                                   "although",
                                                   "always",
                                                   "am",
                                                   "amid",
                                                   "amidst",
                                                   "among",
                                                   "amongst",
                                                   "amount",
                                                   "an",
                                                   "and",
                                                   "announce",
                                                   "another",
                                                   "any",
                                                   "anybody",
                                                   "anyhow",
                                                   "anymore",
                                                   "anyone",
                                                   "anything",
                                                   "anyway",
                                                   "anyways",
                                                   "anywhere",
                                                   "apart",
                                                   "apparently",
                                                   "appear",
                                                   "appreciate",
                                                   "appropriate",
                                                   "approximately",
                                                   "are",
                                                   "aren",
                                                   "aren't",
                                                   "arent",
                                                   "arise",
                                                   "around",
                                                   "as",
                                                   "aside",
                                                   "ask",
                                                   "asking",
                                                   "associated",
                                                   "at",
                                                   "auth",
                                                   "available",
                                                   "away",
                                                   "awfully",
                                                   "b",
                                                   "back",
                                                   "backward",
                                                   "backwards",
                                                   "be",
                                                   "became",
                                                   "because",
                                                   "become",
                                                   "becomes",
                                                   "becoming",
                                                   "been",
                                                   "before",
                                                   "beforehand",
                                                   "begin",
                                                   "beginning",
                                                   "beginnings",
                                                   "begins",
                                                   "behind",
                                                   "being",
                                                   "believe",
                                                   "below",
                                                   "beside",
                                                   "besides",
                                                   "best",
                                                   "better",
                                                   "between",
                                                   "beyond",
                                                   "bill",
                                                   "biol",
                                                   "both",
                                                   "bottom",
                                                   "brief",
                                                   "briefly",
                                                   "but",
                                                   "by",
                                                   "c",
                                                   "c'mon",
                                                   "c's",
                                                   "ca",
                                                   "call",
                                                   "came",
                                                   "can",
                                                   "can't",
                                                   "cannot",
                                                   "cant",
                                                   "caption",
                                                   "cause",
                                                   "causes",
                                                   "certain",
                                                   "certainly",
                                                   "changes",
                                                   "clearly",
                                                   "co",
                                                   "co.",
                                                   "com",
                                                   "come",
                                                   "comes",
                                                   "computer",
                                                   "con",
                                                   "concerning",
                                                   "consequently",
                                                   "consider",
                                                   "considering",
                                                   "contain",
                                                   "containing",
                                                   "contains",
                                                   "corresponding",
                                                   "could",
                                                   "couldn't",
                                                   "couldnt",
                                                   "course",
                                                   "cry",
                                                   "currently",
                                                   "d",
                                                   "dare",
                                                   "daren't",
                                                   "date",
                                                   "de",
                                                   "definitely",
                                                   "describe",
                                                   "described",
                                                   "despite",
                                                   "detail",
                                                   "did",
                                                   "didn't",
                                                   "different",
                                                   "directly",
                                                   "do",
                                                   "does",
                                                   "doesn't",
                                                   "doing",
                                                   "don't",
                                                   "done",
                                                   "down",
                                                   "downwards",
                                                   "due",
                                                   "during",
                                                   "e",
                                                   "each",
                                                   "ed",
                                                   "edu",
                                                   "effect",
                                                   "eg",
                                                   "eight",
                                                   "eighty",
                                                   "either",
                                                   "eleven",
                                                   "else",
                                                   "elsewhere",
                                                   "empty",
                                                   "end",
                                                   "ending",
                                                   "enough",
                                                   "entirely",
                                                   "especially",
                                                   "et",
                                                   "et-al",
                                                   "etc",
                                                   "even",
                                                   "ever",
                                                   "evermore",
                                                   "every",
                                                   "everybody",
                                                   "everyone",
                                                   "everything",
                                                   "everywhere",
                                                   "ex",
                                                   "exactly",
                                                   "example",
                                                   "except",
                                                   "f",
                                                   "fairly",
                                                   "far",
                                                   "farther",
                                                   "few",
                                                   "fewer",
                                                   "ff",
                                                   "fifteen",
                                                   "fifth",
                                                   "fifty",
                                                   "fill",
                                                   "find",
                                                   "fire",
                                                   "first",
                                                   "five",
                                                   "fix",
                                                   "followed",
                                                   "following",
                                                   "follows",
                                                   "for",
                                                   "forever",
                                                   "former",
                                                   "formerly",
                                                   "forth",
                                                   "forty",
                                                   "forward",
                                                   "found",
                                                   "four",
                                                   "from",
                                                   "front",
                                                   "full",
                                                   "further",
                                                   "furthermore",
                                                   "g",
                                                   "gave",
                                                   "get",
                                                   "gets",
                                                   "getting",
                                                   "give",
                                                   "given",
                                                   "gives",
                                                   "giving",
                                                   "go",
                                                   "goes",
                                                   "going",
                                                   "gone",
                                                   "got",
                                                   "gotten",
                                                   "greetings",
                                                   "h",
                                                   "had",
                                                   "hadn't",
                                                   "half",
                                                   "happens",
                                                   "hardly",
                                                   "has",
                                                   "hasn't",
                                                   "hasnt",
                                                   "have",
                                                   "haven't",
                                                   "having",
                                                   "he",
                                                   "he'd",
                                                   "he'll",
                                                   "he's",
                                                   "hello",
                                                   "help",
                                                   "hence",
                                                   "her",
                                                   "here",
                                                   "here's",
                                                   "hereafter",
                                                   "hereby",
                                                   "herein",
                                                   "heres",
                                                   "hereupon",
                                                   "hers",
                                                   "herse",
                                                   "herself",
                                                   "hes",
                                                   "hi",
                                                   "hid",
                                                   "him",
                                                   "himse",
                                                   "himself",
                                                   "his",
                                                   "hither",
                                                   "home",
                                                   "hopefully",
                                                   "how",
                                                   "how's",
                                                   "howbeit",
                                                   "however",
                                                   "hundred",
                                                   "i",
                                                   "i'd",
                                                   "i'll",
                                                   "i'm",
                                                   "i've",
                                                   "id",
                                                   "ie",
                                                   "if",
                                                   "ignored",
                                                   "im",
                                                   "immediate",
                                                   "immediately",
                                                   "importance",
                                                   "important",
                                                   "in",
                                                   "inasmuch",
                                                   "inc",
                                                   "inc.",
                                                   "indeed",
                                                   "index",
                                                   "indicate",
                                                   "indicated",
                                                   "indicates",
                                                   "information",
                                                   "inner",
                                                   "inside",
                                                   "insofar",
                                                   "instead",
                                                   "interest",
                                                   "into",
                                                   "invention",
                                                   "inward",
                                                   "is",
                                                   "isn't",
                                                   "it",
                                                   "it'd",
                                                   "it'll",
                                                   "it's",
                                                   "itd",
                                                   "its",
                                                   "itself",
                                                   "itse”",
                                                   "j",
                                                   "just",
                                                   "k",
                                                   "keep",
                                                   "keeps",
                                                   "kept",
                                                   "keys",
                                                   "kg",
                                                   "km",
                                                   "know",
                                                   "known",
                                                   "knows",
                                                   "l",
                                                   "largely",
                                                   "last",
                                                   "lately",
                                                   "later",
                                                   "latter",
                                                   "latterly",
                                                   "least",
                                                   "less",
                                                   "lest",
                                                   "let",
                                                   "let's",
                                                   "lets",
                                                   "like",
                                                   "liked",
                                                   "likely",
                                                   "likewise",
                                                   "line",
                                                   "little",
                                                   "look",
                                                   "looking",
                                                   "looks",
                                                   "low",
                                                   "lower",
                                                   "ltd",
                                                   "m",
                                                   "made",
                                                   "mainly",
                                                   "make",
                                                   "makes",
                                                   "many",
                                                   "may",
                                                   "maybe",
                                                   "mayn't",
                                                   "me",
                                                   "mean",
                                                   "means",
                                                   "meantime",
                                                   "meanwhile",
                                                   "merely",
                                                   "mg",
                                                   "might",
                                                   "mightn't",
                                                   "mill",
                                                   "million",
                                                   "mine",
                                                   "minus",
                                                   "miss",
                                                   "ml",
                                                   "more",
                                                   "moreover",
                                                   "most",
                                                   "mostly",
                                                   "move",
                                                   "mr",
                                                   "mrs",
                                                   "much",
                                                   "mug",
                                                   "must",
                                                   "mustn't",
                                                   "my",
                                                   "myself",
                                                   "myse”",
                                                   "n",
                                                   "na",
                                                   "name",
                                                   "namely",
                                                   "nay",
                                                   "nd",
                                                   "near",
                                                   "nearly",
                                                   "necessarily",
                                                   "necessary",
                                                   "need",
                                                   "needn't",
                                                   "needs",
                                                   "neither",
                                                   "never",
                                                   "neverf",
                                                   "neverless",
                                                   "nevertheless",
                                                   "new",
                                                   "next",
                                                   "nine",
                                                   "ninety",
                                                   "no",
                                                   "no-one",
                                                   "nobody",
                                                   "non",
                                                   "none",
                                                   "nonetheless",
                                                   "noone",
                                                   "nor",
                                                   "normally",
                                                   "nos",
                                                   "not",
                                                   "noted",
                                                   "nothing",
                                                   "notwithstanding",
                                                   "novel",
                                                   "now",
                                                   "nowhere",
                                                   "o",
                                                   "obtain",
                                                   "obtained",
                                                   "obviously",
                                                   "of",
                                                   "off",
                                                   "often",
                                                   "oh",
                                                   "ok",
                                                   "okay",
                                                   "old",
                                                   "omitted",
                                                   "on",
                                                   "once",
                                                   "one",
                                                   "one's",
                                                   "ones",
                                                   "only",
                                                   "onto",
                                                   "opposite",
                                                   "or",
                                                   "ord",
                                                   "other",
                                                   "others",
                                                   "otherwise",
                                                   "ought",
                                                   "oughtn't",
                                                   "our",
                                                   "ours",
                                                   "ourselves",
                                                   "out",
                                                   "outside",
                                                   "over",
                                                   "overall",
                                                   "owing",
                                                   "own",
                                                   "p",
                                                   "page",
                                                   "pages",
                                                   "part",
                                                   "particular",
                                                   "particularly",
                                                   "past",
                                                   "per",
                                                   "perhaps",
                                                   "placed",
                                                   "please",
                                                   "plus",
                                                   "poorly",
                                                   "possible",
                                                   "possibly",
                                                   "potentially",
                                                   "pp",
                                                   "predominantly",
                                                   "present",
                                                   "presumably",
                                                   "previously",
                                                   "primarily",
                                                   "probably",
                                                   "promptly",
                                                   "proud",
                                                   "provided",
                                                   "provides",
                                                   "put",
                                                   "q",
                                                   "que",
                                                   "quickly",
                                                   "quite",
                                                   "qv",
                                                   "r",
                                                   "ran",
                                                   "rather",
                                                   "rd",
                                                   "re",
                                                   "readily",
                                                   "really",
                                                   "reasonably",
                                                   "recent",
                                                   "recently",
                                                   "ref",
                                                   "refs",
                                                   "regarding",
                                                   "regardless",
                                                   "regards",
                                                   "related",
                                                   "relatively",
                                                   "research",
                                                   "respectively",
                                                   "resulted",
                                                   "resulting",
                                                   "results",
                                                   "right",
                                                   "round",
                                                   "run",
                                                   "s",
                                                   "said",
                                                   "same",
                                                   "saw",
                                                   "say",
                                                   "saying",
                                                   "says",
                                                   "sec",
                                                   "second",
                                                   "secondly",
                                                   "section",
                                                   "see",
                                                   "seeing",
                                                   "seem",
                                                   "seemed",
                                                   "seeming",
                                                   "seems",
                                                   "seen",
                                                   "self",
                                                   "selves",
                                                   "sensible",
                                                   "sent",
                                                   "serious",
                                                   "seriously",
                                                   "seven",
                                                   "several",
                                                   "shall",
                                                   "shan't",
                                                   "she",
                                                   "she'd",
                                                   "she'll",
                                                   "she's",
                                                   "shed",
                                                   "shes",
                                                   "should",
                                                   "shouldn't",
                                                   "show",
                                                   "showed",
                                                   "shown",
                                                   "showns",
                                                   "shows",
                                                   "side",
                                                   "significant",
                                                   "significantly",
                                                   "similar",
                                                   "similarly",
                                                   "since",
                                                   "sincere",
                                                   "six",
                                                   "sixty",
                                                   "slightly",
                                                   "so",
                                                   "some",
                                                   "somebody",
                                                   "someday",
                                                   "somehow",
                                                   "someone",
                                                   "somethan",
                                                   "something",
                                                   "sometime",
                                                   "sometimes",
                                                   "somewhat",
                                                   "somewhere",
                                                   "soon",
                                                   "sorry",
                                                   "specifically",
                                                   "specified",
                                                   "specify",
                                                   "specifying",
                                                   "state",
                                                   "states",
                                                   "still",
                                                   "stop",
                                                   "strongly",
                                                   "sub",
                                                   "substantially",
                                                   "successfully",
                                                   "such",
                                                   "sufficiently",
                                                   "suggest",
                                                   "sup",
                                                   "sure",
                                                   "system",
                                                   "t",
                                                   "t's",
                                                   "take",
                                                   "taken",
                                                   "taking",
                                                   "tell",
                                                   "ten",
                                                   "tends",
                                                   "th",
                                                   "than",
                                                   "thank",
                                                   "thanks",
                                                   "thanx",
                                                   "that",
                                                   "that'll",
                                                   "that's",
                                                   "that've",
                                                   "thats",
                                                   "the",
                                                   "their",
                                                   "theirs",
                                                   "them",
                                                   "themselves",
                                                   "then",
                                                   "thence",
                                                   "there",
                                                   "there'd",
                                                   "there'll",
                                                   "there're",
                                                   "there's",
                                                   "there've",
                                                   "thereafter",
                                                   "thereby",
                                                   "thered",
                                                   "therefore",
                                                   "therein",
                                                   "thereof",
                                                   "therere",
                                                   "theres",
                                                   "thereto",
                                                   "thereupon",
                                                   "these",
                                                   "they",
                                                   "they'd",
                                                   "they'll",
                                                   "they're",
                                                   "they've",
                                                   "theyd",
                                                   "theyre",
                                                   "thick",
                                                   "thin",
                                                   "thing",
                                                   "things",
                                                   "think",
                                                   "third",
                                                   "thirty",
                                                   "this",
                                                   "thorough",
                                                   "thoroughly",
                                                   "those",
                                                   "thou",
                                                   "though",
                                                   "thoughh",
                                                   "thousand",
                                                   "three",
                                                   "throug",
                                                   "through",
                                                   "throughout",
                                                   "thru",
                                                   "thus",
                                                   "til",
                                                   "till",
                                                   "tip",
                                                   "to",
                                                   "together",
                                                   "too",
                                                   "took",
                                                   "top",
                                                   "toward",
                                                   "towards",
                                                   "tried",
                                                   "tries",
                                                   "truly",
                                                   "try",
                                                   "trying",
                                                   "ts",
                                                   "twelve",
                                                   "twenty",
                                                   "twice",
                                                   "two",
                                                   "u",
                                                   "un",
                                                   "under",
                                                   "underneath",
                                                   "undoing",
                                                   "unfortunately",
                                                   "unless",
                                                   "unlike",
                                                   "unlikely",
                                                   "until",
                                                   "unto",
                                                   "up",
                                                   "upon",
                                                   "ups",
                                                   "upwards",
                                                   "us",
                                                   "use",
                                                   "used",
                                                   "useful",
                                                   "usefully",
                                                   "usefulness",
                                                   "uses",
                                                   "using",
                                                   "usually",
                                                   "uucp",
                                                   "v",
                                                   "value",
                                                   "various",
                                                   "versus",
                                                   "very",
                                                   "via",
                                                   "viz",
                                                   "vol",
                                                   "vols",
                                                   "vs",
                                                   "w",
                                                   "want",
                                                   "wants",
                                                   "was",
                                                   "wasn't",
                                                   "way",
                                                   "we",
                                                   "we'd",
                                                   "we'll",
                                                   "we're",
                                                   "we've",
                                                   "wed",
                                                   "welcome",
                                                   "well",
                                                   "went",
                                                   "were",
                                                   "weren't",
                                                   "what",
                                                   "what'll",
                                                   "what's",
                                                   "what've",
                                                   "whatever",
                                                   "whats",
                                                   "when",
                                                   "when's",
                                                   "whence",
                                                   "whenever",
                                                   "where",
                                                   "where's",
                                                   "whereafter",
                                                   "whereas",
                                                   "whereby",
                                                   "wherein",
                                                   "wheres",
                                                   "whereupon",
                                                   "wherever",
                                                   "whether",
                                                   "which",
                                                   "whichever",
                                                   "while",
                                                   "whilst",
                                                   "whim",
                                                   "whither",
                                                   "who",
                                                   "who'd",
                                                   "who'll",
                                                   "who's",
                                                   "whod",
                                                   "whoever",
                                                   "whole",
                                                   "whom",
                                                   "whomever",
                                                   "whos",
                                                   "whose",
                                                   "why",
                                                   "why's",
                                                   "widely",
                                                   "will",
                                                   "willing",
                                                   "wish",
                                                   "with",
                                                   "within",
                                                   "without",
                                                   "won't",
                                                   "wonder",
                                                   "words",
                                                   "world",
                                                   "would",
                                                   "wouldn't",
                                                   "www",
                                                   "x",
                                                   "y",
                                                   "yes",
                                                   "yet",
                                                   "you",
                                                   "you'd",
                                                   "you'll",
                                                   "you're",
                                                   "you've",
                                                   "youd",
                                                   "your",
                                                   "youre",
                                                   "yours",
                                                   "yourself",
                                                   "yourselves",
                                                   "z",
                                                   "zero",
                                               });

  return kEnglishStopwords.contains(word);
}

}  // namespace app_list