// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/ash/components/file_manager/indexing/file_index.h"
#include "base/time/time.h"
namespace ash::file_manager {
FileIndex::FileIndex(std::unique_ptr<IndexStorage> storage)
: storage_(std::move(storage)) {}
FileIndex::~FileIndex() = default;
OpResults FileIndex::Init() {
return storage_->Init() ? OpResults::kSuccess : OpResults::kUninitialized;
}
OpResults FileIndex::PutFileInfo(const FileInfo& file_info) {
return storage_->PutFileInfo(file_info) == -1 ? OpResults::kGenericError
: OpResults::kSuccess;
}
OpResults FileIndex::SetTerms(const std::vector<Term>& terms, const GURL& url) {
if (terms.empty()) {
return OpResults::kArgumentError;
}
// Arrange terms by field and remove duplicates and convert to internal IDs.
int64_t url_id = storage_->GetUrlId(url);
if (url_id == -1) {
return OpResults::kFileMissing;
}
std::set<int64_t> term_id_set = ConvertToTermIds(terms);
// If the given url_id already had some terms associated with it, remove terms
// not specified in terms vector. Say, if url_id had terms {t1, t3, t8}
// associated with it, and terms was {t1, t2}, we would compute {t3, t8} as
// the difference between two collections and remove those.
std::set<int64_t> url_term_ids = storage_->GetTermIdsForUrl(url_id);
if (!url_term_ids.empty()) {
std::set<int64_t> to_remove_terms;
std::set_difference(
url_term_ids.begin(), url_term_ids.end(), term_id_set.begin(),
term_id_set.end(),
std::inserter(to_remove_terms, to_remove_terms.begin()));
storage_->DeleteTermIdsForUrl(to_remove_terms, url_id);
}
storage_->AddTermIdsForUrl(term_id_set, url_id);
return OpResults::kSuccess;
}
OpResults FileIndex::MoveFile(const GURL& old_url, const GURL& new_url) {
DCHECK(old_url.is_valid());
DCHECK(new_url.is_valid());
// Check for no-op.
if (old_url == new_url) {
return OpResults::kSuccess;
}
// Phase 1: Run some diagnostics; not strictly necessary but it gives more
// accurate error reporting.
int64_t old_url_id = storage_->GetUrlId(old_url);
if (old_url_id < 0) {
return OpResults::kFileMissing;
}
int64_t new_url_id = storage_->GetUrlId(new_url);
if (new_url_id != -1) {
return OpResults::kFileExists;
}
std::optional<FileInfo> file_info = storage_->GetFileInfo(old_url_id);
if (!file_info.has_value()) {
return OpResults::kFileMissing;
}
// Phase 2: Just make the move by updating URL.
return storage_->MoveUrl(old_url, new_url) == -1 ? OpResults::kGenericError
: OpResults::kSuccess;
}
OpResults FileIndex::RemoveFile(const GURL& url) {
int64_t url_id = storage_->GetUrlId(url);
if (url_id < 0) {
return OpResults::kSuccess;
}
const std::set<int64_t>& url_term_ids = storage_->GetTermIdsForUrl(url_id);
for (int64_t term_id : url_term_ids) {
storage_->DeleteFromPostingList(term_id, url_id);
}
storage_->DeleteFileInfo(url_id);
storage_->DeleteUrl(url);
return OpResults::kSuccess;
}
OpResults FileIndex::RemoveTerms(const std::vector<Term>& terms,
const GURL& url) {
int64_t url_id = storage_->GetUrlId(url);
if (url_id < 0) {
return OpResults::kSuccess;
}
std::set<int64_t> term_ids;
for (const Term& t : terms) {
int64_t id_with_field = storage_->GetTermId(t);
if (id_with_field != -1) {
term_ids.emplace(id_with_field);
}
int64_t global_id = storage_->GetTermId(Term("", t.token()));
if (global_id != -1) {
term_ids.emplace(global_id);
}
}
for (int64_t term_id : term_ids) {
storage_->DeleteFromPostingList(term_id, url_id);
}
return OpResults::kSuccess;
}
OpResults FileIndex::AddTerms(const std::vector<Term>& terms, const GURL& url) {
if (terms.empty()) {
return OpResults::kSuccess;
}
int64_t url_id = storage_->GetUrlId(url);
if (url_id == -1) {
return OpResults::kFileMissing;
}
std::set<int64_t> term_id_set = ConvertToTermIds(terms);
storage_->AddTermIdsForUrl(term_id_set, url_id);
return OpResults::kSuccess;
}
// Searches the index for file info matching the specified query.
SearchResults FileIndex::Search(const Query& query) {
const std::vector<Term>& terms = query.terms();
SearchResults results;
if (terms.empty()) {
// Technically, an empty query matches every file, but we treat this
// as empty match.
return results;
}
std::set<int64_t> matched_url_ids;
bool first = true;
for (const Term& term : terms) {
int64_t term_id = storage_->GetTermId(term);
if (term_id == -1) {
return results;
}
const std::set<int64_t> url_ids = storage_->GetUrlIdsForTermId(term_id);
if (url_ids.empty()) {
return results;
}
if (first) {
matched_url_ids = url_ids;
first = false;
} else {
std::set<int64_t> intersection;
std::set_intersection(matched_url_ids.begin(), matched_url_ids.end(),
url_ids.begin(), url_ids.end(),
std::inserter(intersection, intersection.begin()));
matched_url_ids = intersection;
}
if (matched_url_ids.empty()) {
break;
}
}
if (matched_url_ids.empty()) {
return results;
}
for (const int64_t url_id : matched_url_ids) {
std::optional<FileInfo> file_info = storage_->GetFileInfo(url_id);
DCHECK(file_info.has_value());
// TODO(b:327535200): Add true score.
results.matches.emplace_back(Match(1, file_info.value()));
}
// TODO(b:327535200): Correctly compute total_matches.
results.total_matches = results.matches.size();
return results;
}
std::set<int64_t> FileIndex::ConvertToTermIds(const std::vector<Term>& terms) {
std::set<int64_t> term_ids;
for (const Term& term : terms) {
DCHECK(!term.field().empty());
term_ids.emplace(storage_->GetOrCreateTermId(term));
term_ids.emplace(storage_->GetOrCreateTermId(Term("", term.token())));
}
return term_ids;
}
} // namespace ash::file_manager