chromium/chromeos/ash/components/string_matching/tokenized_string_match.cc

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chromeos/ash/components/string_matching/tokenized_string_match.h"

#include <stddef.h>

#include <cmath>

#include "base/i18n/string_search.h"
#include "base/strings/string_util.h"
#include "chromeos/ash/components/string_matching/prefix_matcher.h"

namespace ash::string_matching {

namespace {

// Used for each character if there is no prefix match.
const double kIsSubstringMultiplier = 0.4;

// A relevance score that represents no match.
const double kNoMatchScore = 0.0;

}  // namespace

TokenizedStringMatch::TokenizedStringMatch() : relevance_(kNoMatchScore) {}

TokenizedStringMatch::~TokenizedStringMatch() = default;

double TokenizedStringMatch::Calculate(const TokenizedString& query,
                                       const TokenizedString& text) {
  relevance_ = kNoMatchScore;
  hits_.clear();

  // If there is an exact match, relevance will be 1.0 and there is only 1 hit
  // that is the entire text/query.
  const auto& query_text = query.text();
  const auto& text_text = text.text();
  const auto query_size = query_text.size();
  const auto text_size = text_text.size();
  if (query_size > 0 && query_size == text_size &&
      base::EqualsCaseInsensitiveASCII(query_text, text_text)) {
    hits_.emplace_back(0, query_size);
    relevance_ = 1.0;
    return true;
  }

  PrefixMatcher matcher(query, text);
  if (matcher.Match()) {
    relevance_ = matcher.relevance();
    hits_.assign(matcher.hits().begin(), matcher.hits().end());
  }

  // Substring match as a fallback.
  if (relevance_ == kNoMatchScore) {
    size_t substr_match_start = 0;
    size_t substr_match_length = 0;
    if (base::i18n::StringSearchIgnoringCaseAndAccents(
            query.text(), text.text(), &substr_match_start,
            &substr_match_length)) {
      relevance_ = kIsSubstringMultiplier * substr_match_length;
      hits_.emplace_back(substr_match_start,
                         substr_match_start + substr_match_length);
    }
  }

  // Temper the relevance score with an exponential curve. Each point of
  // relevance (roughly, each keystroke) is worth less than the last. This means
  // that typing a few characters of a word is enough to promote matches very
  // high, with any subsequent characters being worth comparatively less.
  relevance_ = 1.0 - std::pow(0.5, relevance_);

  return relevance_;
}

double TokenizedStringMatch::Calculate(const std::u16string& query,
                                       const std::u16string& text) {
  const TokenizedString tokenized_query(query);
  const TokenizedString tokenized_text(text);
  return Calculate(tokenized_query, tokenized_text);
}

}  // namespace ash::string_matching