#include "components/history_embeddings/history_embeddings_service.h"
#include <algorithm>
#include <numeric>
#include <tuple>
#include "base/feature_list.h"
#include "base/files/file_path.h"
#include "base/functional/bind.h"
#include "base/metrics/histogram_functions.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/task/sequenced_task_runner.h"
#include "base/task/task_traits.h"
#include "base/task/thread_pool.h"
#include "base/time/time.h"
#include "base/timer/elapsed_timer.h"
#include "base/token.h"
#include "base/uuid.h"
#include "components/history/core/browser/history_types.h"
#include "components/history/core/browser/url_database.h"
#include "components/history/core/browser/url_row.h"
#include "components/history_embeddings/history_embeddings_features.h"
#include "components/history_embeddings/ml_answerer.h"
#include "components/history_embeddings/ml_embedder.h"
#include "components/history_embeddings/mock_answerer.h"
#include "components/history_embeddings/mock_embedder.h"
#include "components/history_embeddings/scheduling_embedder.h"
#include "components/history_embeddings/sql_database.h"
#include "components/history_embeddings/vector_database.h"
#include "components/optimization_guide/core/model_quality/feature_type_map.h"
#include "components/optimization_guide/core/model_quality/model_quality_log_entry.h"
#include "components/optimization_guide/core/optimization_guide_model_executor.h"
#include "components/os_crypt/async/browser/os_crypt_async.h"
#include "components/page_content_annotations/core/page_content_annotations_service.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/weak_document_ptr.h"
#include "mojo/public/cpp/bindings/callback_helpers.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "services/service_manager/public/cpp/interface_provider.h"
#include "third_party/blink/public/mojom/content_extraction/inner_text.mojom.h"
#include "third_party/farmhash/src/src/farmhash.h"
#include "url/gurl.h"
namespace history_embeddings {
void RecordQueryFiltered(QueryFiltered status) { … }
void RecordExtractionCancelled(ExtractionCancelled reason) { … }
uint32_t HashString(std::string_view str) { … }
void OnGotInnerText(mojo::Remote<blink::mojom::InnerTextAgent> remote,
base::TimeTicks start_time,
base::OnceCallback<void(std::vector<std::string>)> callback,
blink::mojom::InnerTextFramePtr mojo_frame) { … }
void FinishSearchResultWithHistory(
const scoped_refptr<base::SequencedTaskRunner> task_runner,
SearchResultCallback callback,
SearchResult result,
std::vector<ScoredUrlRow> scored_url_rows,
history::HistoryBackend* history_backend,
history::URLDatabase* url_database) { … }
size_t CountWords(const std::string& s) { … }
float GetScoreThreshold(const EmbedderMetadata& embedder_metadata) { … }
ScoredUrlRow::ScoredUrlRow(ScoredUrl scored_url)
: … { … }
ScoredUrlRow::ScoredUrlRow(const ScoredUrlRow&) = default;
ScoredUrlRow::ScoredUrlRow(ScoredUrlRow&&) = default;
ScoredUrlRow::~ScoredUrlRow() = default;
ScoredUrlRow& ScoredUrlRow::operator=(const ScoredUrlRow&) = default;
ScoredUrlRow& ScoredUrlRow::operator=(ScoredUrlRow&&) = default;
std::string ScoredUrlRow::GetBestPassage() const { … }
std::vector<size_t> ScoredUrlRow::GetBestScoreIndices(
size_t min_count,
size_t min_word_count) const { … }
SearchResult::SearchResult() = default;
SearchResult::SearchResult(SearchResult&&) = default;
SearchResult::~SearchResult() = default;
SearchResult& SearchResult::operator=(SearchResult&&) = default;
SearchResult SearchResult::Clone() { … }
const std::string& SearchResult::AnswerText() const { … }
size_t SearchResult::AnswerIndex() const { … }
HistoryEmbeddingsService::HistoryEmbeddingsService(
history::HistoryService* history_service,
page_content_annotations::PageContentAnnotationsService*
page_content_annotations_service,
optimization_guide::OptimizationGuideModelProvider*
optimization_guide_model_provider,
optimization_guide::OptimizationGuideDecider* optimization_guide_decider,
PassageEmbeddingsServiceController* service_controller,
os_crypt_async::OSCryptAsync* os_crypt_async,
optimization_guide::OptimizationGuideModelExecutor*
optimization_guide_model_executor)
: … { … }
HistoryEmbeddingsService::~HistoryEmbeddingsService() = default;
bool HistoryEmbeddingsService::IsEligible(const GURL& url) { … }
void HistoryEmbeddingsService::OnOsCryptAsyncReady(
EmbedderMetadata metadata,
os_crypt_async::Encryptor encryptor,
bool success) { … }
void HistoryEmbeddingsService::OnEmbedderMetadataReady(
EmbedderMetadata metadata) { … }
void HistoryEmbeddingsService::RetrievePassages(
history::URLID url_id,
history::VisitID visit_id,
base::Time visit_time,
content::WeakDocumentPtr weak_render_frame_host) { … }
void HistoryEmbeddingsService::Search(
std::string query,
std::optional<base::Time> time_range_start,
size_t count,
SearchResultCallback callback) { … }
void HistoryEmbeddingsService::OnQueryEmbeddingComputed(
SearchResultCallback callback,
SearchResult result,
std::vector<std::string> query_passages,
std::vector<Embedding> query_embeddings,
ComputeEmbeddingsStatus status) { … }
base::WeakPtr<HistoryEmbeddingsService> HistoryEmbeddingsService::AsWeakPtr() { … }
void HistoryEmbeddingsService::SendQualityLog(
SearchResult& result,
optimization_guide::proto::UserFeedback user_feedback,
std::set<size_t> selections,
size_t num_entered_characters,
bool from_omnibox_history_scope) { … }
void HistoryEmbeddingsService::Shutdown() { … }
void HistoryEmbeddingsService::OnHistoryDeletions(
history::HistoryService* history_service,
const history::DeletionInfo& deletion_info) { … }
HistoryEmbeddingsService::Storage::Storage(const base::FilePath& storage_dir)
: … { … }
void HistoryEmbeddingsService::Storage::SetEmbedderMetadata(
EmbedderMetadata metadata,
os_crypt_async::Encryptor encryptor) { … }
void HistoryEmbeddingsService::Storage::ProcessAndStorePassages(
UrlPassages url_passages,
std::vector<Embedding> embeddings) { … }
std::vector<ScoredUrlRow> HistoryEmbeddingsService::Storage::Search(
base::WeakPtr<std::atomic<size_t>> weak_latest_query_id,
size_t query_id,
Embedding query_embedding,
std::optional<base::Time> time_range_start,
size_t count) { … }
void HistoryEmbeddingsService::Storage::HandleHistoryDeletions(
bool for_all_history,
history::URLRows deleted_rows,
std::set<history::VisitID> deleted_visit_ids) { … }
void HistoryEmbeddingsService::Storage::DeleteDataForTesting(
bool delete_passages,
bool delete_embeddings) { … }
std::vector<UrlPassages>
HistoryEmbeddingsService::Storage::CollectPassagesWithoutEmbeddings() { … }
std::optional<UrlPassagesEmbeddings>
HistoryEmbeddingsService::Storage::GetUrlData(history::URLID url_id) { … }
QualityLogEntry HistoryEmbeddingsService::PrepareQualityLogEntry() { … }
void HistoryEmbeddingsService::OnPassagesRetrieved(
std::optional<UrlPassagesEmbeddings> existing_url_data,
UrlPassages url_passages,
std::vector<std::string> passages) { … }
void HistoryEmbeddingsService::OnPassagesEmbeddingsComputed(
std::unordered_map<std::string, Embedding> embedding_cache,
UrlPassages url_passages,
std::vector<std::string> passages,
std::vector<Embedding> embeddings,
ComputeEmbeddingsStatus status) { … }
void HistoryEmbeddingsService::OnSearchCompleted(
SearchResultCallback callback,
SearchResult result,
std::vector<ScoredUrlRow> scored_url_rows) { … }
void HistoryEmbeddingsService::DeterminePassageVisibility(
SearchResultCallback callback,
SearchResult result,
std::vector<ScoredUrlRow> scored_url_rows) { … }
void HistoryEmbeddingsService::OnPassageVisibilityCalculated(
SearchResultCallback callback,
SearchResult result,
std::vector<ScoredUrlRow> scored_url_rows,
const std::vector<page_content_annotations::BatchAnnotationResult>&
annotation_results) { … }
void HistoryEmbeddingsService::OnPrimarySearchResultReady(
SearchResultCallback callback,
SearchResult result) { … }
void HistoryEmbeddingsService::OnAnswerComputed(
SearchResultCallback callback,
SearchResult search_result,
AnswererResult answerer_result) { … }
void HistoryEmbeddingsService::RebuildAbsentEmbeddings(
std::vector<UrlPassages> all_url_passages) { … }
void HistoryEmbeddingsService::RetrievePassagesWithUrlData(
history::URLID url_id,
history::VisitID visit_id,
base::Time visit_time,
content::WeakDocumentPtr weak_render_frame_host,
base::Time time_before_database_access,
std::optional<UrlPassagesEmbeddings> existing_url_data) { … }
bool HistoryEmbeddingsService::QueryIsFiltered(
const std::string& raw_query) const { … }
}