chromium/chrome/browser/ash/app_list/search/files/file_search_provider.cc

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/ash/app_list/search/files/file_search_provider.h"

#include <cmath>
#include <utility>

#include "base/containers/fixed_flat_map.h"
#include "base/files/file_enumerator.h"
#include "base/i18n/case_conversion.h"
#include "base/i18n/rtl.h"
#include "base/metrics/histogram_macros.h"
#include "base/ranges/algorithm.h"
#include "base/strings/strcat.h"
#include "base/strings/utf_string_conversions.h"
#include "base/task/task_traits.h"
#include "base/task/thread_pool.h"
#include "base/time/time.h"
#include "chrome/browser/ash/app_list/search/files/diacritics_checker.h"
#include "chrome/browser/ash/app_list/search/files/file_result.h"
#include "chrome/browser/ash/app_list/search/search_features.h"
#include "chrome/browser/ash/app_list/search/types.h"
#include "chrome/browser/ash/file_manager/path_util.h"
#include "chrome/browser/ash/file_manager/trash_common_util.h"
#include "chrome/browser/profiles/profile.h"

namespace app_list {

namespace {

using ::ash::string_matching::TokenizedString;

constexpr char kFileSearchSchema[] = "file_search://";
constexpr int kMaxResults = 25;
constexpr int kSearchTimeoutMs = 100;
constexpr double kRelevanceThreshold = 0.79;

// Construct a case-insensitive and accent-insensitive fnmatch query from
// |query|. E.g. for abc123, the result would be *[aAáàâäāåÁÀÂÄĀÅ][bB][cC]123*.
// Accent-insensitivity covers Latin-script accented characters for our
// initial implementation.
// We still honor the accented characters in |query|, and only enable
// case-insensitivity for them. E.g. ádd, the result would be *[áÁ]dd*.
std::string CreateFnmatchQuery(const std::u16string& query_input) {
  static constexpr auto conversion_map =
      base::MakeFixedFlatMap<char16_t, std::u16string_view>({
          {u'a', u"[aAáàâäāåÁÀÂÄĀÅ]"},
          {u'c', u"[cçCÇ]"},
          {u'e', u"[eEéèêëēÉÈÊËĒ]"},
          {u'i', u"[iIíìîïīÍÌÎÏĪ]"},
          {u'n', u"[nNñÑ]"},
          {u'o', u"[oOóòôöōøÓÒÔÖŌØ]"},
          {u'u', u"[uUúùûüūÚÙÛÜŪ]"},
          {u'y', u"[yYýỳŷÿȳÝỲŶŸȲ]"},
      });

  std::vector<std::u16string> query_pieces = {u"*"};
  size_t sequence_start = 0;
  const std::u16string query = base::i18n::ToLower(query_input);
  for (size_t i = 0; i < query.size(); ++i) {
    if ((query[i] >= u'a' && query[i] <= u'z') ||
        HasDiacritics(query.substr(i, 1))) {
      if (sequence_start != i) {
        query_pieces.push_back(
            query.substr(sequence_start, i - sequence_start));
      }

      auto it = conversion_map.find(query[i]);
      if (it != conversion_map.end()) {
        std::u16string piece(it->second);
        query_pieces.push_back(std::move(piece));
      } else {
        query_pieces.push_back(u"[");
        query_pieces.push_back(query.substr(i, 1));
        query_pieces.push_back(base::i18n::ToUpper(query.substr(i, 1)));
        query_pieces.push_back(u"]");
      }

      sequence_start = i + 1;
    }
  }
  if (sequence_start != query.size()) {
    query_pieces.push_back(query.substr(sequence_start));
  }
  query_pieces.push_back(u"*");

  return base::UTF16ToUTF8(base::StrCat(query_pieces));
}

// Returns a vector of matched filepaths and a bool indicating whether or not
// the path is a directory.
std::vector<FileSearchProvider::FileInfo> SearchFilesByPattern(
    const base::FilePath& root_path,
    const std::u16string& query,
    const base::TimeTicks& query_start_time,
    const std::vector<base::FilePath> trash_paths,
    const int file_type) {
  base::FileEnumerator enumerator(
      root_path,
      /*recursive=*/true, file_type, CreateFnmatchQuery(query),
      base::FileEnumerator::FolderSearchPolicy::ALL);

  const auto time_limit = base::Milliseconds(kSearchTimeoutMs);
  bool timed_out = false;
  std::vector<FileSearchProvider::FileInfo> matched_paths;
  for (base::FilePath path = enumerator.Next(); !path.empty();
       path = enumerator.Next()) {
    // Exclude any paths that are parented at an enabled trash location.
    if (base::ranges::any_of(trash_paths,
                             [&path](const base::FilePath& trash_path) {
                               return trash_path.IsParent(path);
                             })) {
      continue;
    }

    matched_paths.emplace_back(
        path, enumerator.GetInfo().IsDirectory(),
        base::Time::FromTimeT(enumerator.GetInfo().stat().st_atime));

    if (matched_paths.size() == kMaxResults) {
      break;
    } else if (base::TimeTicks::Now() - query_start_time > time_limit) {
      timed_out = true;
      break;
    }
  }
  UMA_HISTOGRAM_BOOLEAN("Apps.AppList.FileSearchProvider.TimedOut", timed_out);
  return matched_paths;
}

}  // namespace

FileSearchProvider::FileSearchProvider(Profile* profile, int file_type)
    : SearchProvider(SearchCategory::kFiles),
      profile_(profile),
      thumbnail_loader_(profile),
      root_path_(file_manager::util::GetMyFilesFolderForProfile(profile)),
      file_type_(file_type) {
  DCHECK(profile_);
  DCHECK(!root_path_.empty());
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}

FileSearchProvider::~FileSearchProvider() = default;

ash::AppListSearchResultType FileSearchProvider::ResultType() const {
  return ash::AppListSearchResultType::kFileSearch;
}

void FileSearchProvider::Start(const std::u16string& query) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  query_start_time_ = base::TimeTicks::Now();

  weak_factory_.InvalidateWeakPtrs();

  last_query_ = query;
  last_tokenized_query_.emplace(query, TokenizedString::Mode::kWords);

  // Generate a vector of `base::FilePath`s that can be handed to the
  // `SearchFilesByPattern`. Trash can be dynamically turned on/off via an
  // enterprise policy, so this needs to be verified on search instead of
  // precomputed.
  if (trash_paths_.empty()) {
    auto enabled_trash_locations =
        file_manager::trash::GenerateEnabledTrashLocationsForProfile(
            profile_, /*base_path=*/base::FilePath());
    for (const auto& it : enabled_trash_locations) {
      trash_paths_.emplace_back(
          it.first.Append(it.second.relative_folder_path));
    }
  }
  base::ThreadPool::PostTaskAndReplyWithResult(
      FROM_HERE, {base::MayBlock(), base::TaskPriority::USER_BLOCKING},
      base::BindOnce(SearchFilesByPattern, root_path_, query, query_start_time_,
                     (file_manager::trash::IsTrashEnabledForProfile(profile_)
                          ? trash_paths_
                          : std::vector<base::FilePath>()),
                     file_type_),
      base::BindOnce(&FileSearchProvider::OnSearchComplete,
                     weak_factory_.GetWeakPtr()));
}

void FileSearchProvider::StopQuery() {
  weak_factory_.InvalidateWeakPtrs();

  last_query_.clear();
  last_tokenized_query_.reset();
}

void FileSearchProvider::OnSearchComplete(
    std::vector<FileSearchProvider::FileInfo> paths) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

  SearchProvider::Results results;
  for (const auto& path : paths) {
    double relevance = FileResult::CalculateRelevance(
        last_tokenized_query_, path.path, path.last_accessed);
    DCHECK((relevance >= 0.0) && (relevance <= 1.0));
    if (search_features::IsLauncherFuzzyMatchAcrossProvidersEnabled() &&
        relevance < kRelevanceThreshold) {
      continue;
    }
    results.push_back(MakeResult(path, relevance));
  }

  SwapResults(&results);
  UMA_HISTOGRAM_TIMES("Apps.AppList.FileSearchProvider.Latency",
                      base::TimeTicks::Now() - query_start_time_);
}

std::unique_ptr<FileResult> FileSearchProvider::MakeResult(
    const FileSearchProvider::FileInfo& path,
    const double relevance) {
  const auto type = path.is_directory ? FileResult::Type::kDirectory
                                      : FileResult::Type::kFile;
  // Use the parent directory name as details text. Take care to remove newlines
  // and handle RTL as this is displayed directly.
  std::u16string parent_dir_name = base::CollapseWhitespace(
      path.path.DirName().BaseName().LossyDisplayName(), true);
  base::i18n::SanitizeUserSuppliedString(&parent_dir_name);

  auto result = std::make_unique<FileResult>(
      /*id=*/kFileSearchSchema + path.path.value(), path.path, parent_dir_name,
      ash::AppListSearchResultType::kFileSearch,
      ash::SearchResultDisplayType::kList, relevance, last_query_, type,
      profile_, &thumbnail_loader_);
  return result;
}

}  // namespace app_list