chromium/components/history_embeddings/history_embeddings_service.cc

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/history_embeddings/history_embeddings_service.h"

#include <algorithm>
#include <numeric>
#include <tuple>

#include "base/feature_list.h"
#include "base/files/file_path.h"
#include "base/functional/bind.h"
#include "base/metrics/histogram_functions.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/task/sequenced_task_runner.h"
#include "base/task/task_traits.h"
#include "base/task/thread_pool.h"
#include "base/time/time.h"
#include "base/timer/elapsed_timer.h"
#include "base/token.h"
#include "base/uuid.h"
#include "components/history/core/browser/history_types.h"
#include "components/history/core/browser/url_database.h"
#include "components/history/core/browser/url_row.h"
#include "components/history_embeddings/history_embeddings_features.h"
#include "components/history_embeddings/ml_answerer.h"
#include "components/history_embeddings/ml_embedder.h"
#include "components/history_embeddings/mock_answerer.h"
#include "components/history_embeddings/mock_embedder.h"
#include "components/history_embeddings/scheduling_embedder.h"
#include "components/history_embeddings/sql_database.h"
#include "components/history_embeddings/vector_database.h"
#include "components/optimization_guide/core/model_quality/feature_type_map.h"
#include "components/optimization_guide/core/model_quality/model_quality_log_entry.h"
#include "components/optimization_guide/core/optimization_guide_model_executor.h"
#include "components/os_crypt/async/browser/os_crypt_async.h"
#include "components/page_content_annotations/core/page_content_annotations_service.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/weak_document_ptr.h"
#include "mojo/public/cpp/bindings/callback_helpers.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "services/service_manager/public/cpp/interface_provider.h"
#include "third_party/blink/public/mojom/content_extraction/inner_text.mojom.h"
#include "third_party/farmhash/src/src/farmhash.h"
#include "url/gurl.h"

namespace history_embeddings {

void RecordQueryFiltered(QueryFiltered status) {}

void RecordExtractionCancelled(ExtractionCancelled reason) {}

uint32_t HashString(std::string_view str) {}

void OnGotInnerText(mojo::Remote<blink::mojom::InnerTextAgent> remote,
                    base::TimeTicks start_time,
                    base::OnceCallback<void(std::vector<std::string>)> callback,
                    blink::mojom::InnerTextFramePtr mojo_frame) {}

void FinishSearchResultWithHistory(
    const scoped_refptr<base::SequencedTaskRunner> task_runner,
    SearchResultCallback callback,
    SearchResult result,
    std::vector<ScoredUrlRow> scored_url_rows,
    history::HistoryBackend* history_backend,
    history::URLDatabase* url_database) {}

size_t CountWords(const std::string& s) {}

// When `kSearchScoreThreshold` is set <0, the threshold in the model metadata
// will be used. If the metadata also doesn't specify a threshold (old models
// don't), then 0.9 will be used. This allows finch and command line to override
// the threshold if necessary while ensuring different users with different
// models are all using the correct threshold for their model.
float GetScoreThreshold(const EmbedderMetadata& embedder_metadata) {}

////////////////////////////////////////////////////////////////////////////////

ScoredUrlRow::ScoredUrlRow(ScoredUrl scored_url)
    :{}
ScoredUrlRow::ScoredUrlRow(const ScoredUrlRow&) = default;
ScoredUrlRow::ScoredUrlRow(ScoredUrlRow&&) = default;
ScoredUrlRow::~ScoredUrlRow() = default;
ScoredUrlRow& ScoredUrlRow::operator=(const ScoredUrlRow&) = default;
ScoredUrlRow& ScoredUrlRow::operator=(ScoredUrlRow&&) = default;

std::string ScoredUrlRow::GetBestPassage() const {}

std::vector<size_t> ScoredUrlRow::GetBestScoreIndices(
    size_t min_count,
    size_t min_word_count) const {}

////////////////////////////////////////////////////////////////////////////////

SearchResult::SearchResult() = default;
SearchResult::SearchResult(SearchResult&&) = default;
SearchResult::~SearchResult() = default;
SearchResult& SearchResult::operator=(SearchResult&&) = default;

SearchResult SearchResult::Clone() {}

const std::string& SearchResult::AnswerText() const {}

size_t SearchResult::AnswerIndex() const {}

////////////////////////////////////////////////////////////////////////////////

HistoryEmbeddingsService::HistoryEmbeddingsService(
    history::HistoryService* history_service,
    page_content_annotations::PageContentAnnotationsService*
        page_content_annotations_service,
    optimization_guide::OptimizationGuideModelProvider*
        optimization_guide_model_provider,
    optimization_guide::OptimizationGuideDecider* optimization_guide_decider,
    PassageEmbeddingsServiceController* service_controller,
    os_crypt_async::OSCryptAsync* os_crypt_async,
    optimization_guide::OptimizationGuideModelExecutor*
        optimization_guide_model_executor)
    :{}

HistoryEmbeddingsService::~HistoryEmbeddingsService() = default;

bool HistoryEmbeddingsService::IsEligible(const GURL& url) {}

void HistoryEmbeddingsService::OnOsCryptAsyncReady(
    EmbedderMetadata metadata,
    os_crypt_async::Encryptor encryptor,
    bool success) {}

void HistoryEmbeddingsService::OnEmbedderMetadataReady(
    EmbedderMetadata metadata) {}

void HistoryEmbeddingsService::RetrievePassages(
    history::URLID url_id,
    history::VisitID visit_id,
    base::Time visit_time,
    content::WeakDocumentPtr weak_render_frame_host) {}

void HistoryEmbeddingsService::Search(
    std::string query,
    std::optional<base::Time> time_range_start,
    size_t count,
    SearchResultCallback callback) {}

void HistoryEmbeddingsService::OnQueryEmbeddingComputed(
    SearchResultCallback callback,
    SearchResult result,
    std::vector<std::string> query_passages,
    std::vector<Embedding> query_embeddings,
    ComputeEmbeddingsStatus status) {}

base::WeakPtr<HistoryEmbeddingsService> HistoryEmbeddingsService::AsWeakPtr() {}

void HistoryEmbeddingsService::SendQualityLog(
    SearchResult& result,
    optimization_guide::proto::UserFeedback user_feedback,
    std::set<size_t> selections,
    size_t num_entered_characters,
    bool from_omnibox_history_scope) {}

void HistoryEmbeddingsService::Shutdown() {}

void HistoryEmbeddingsService::OnHistoryDeletions(
    history::HistoryService* history_service,
    const history::DeletionInfo& deletion_info) {}

HistoryEmbeddingsService::Storage::Storage(const base::FilePath& storage_dir)
    :{}

void HistoryEmbeddingsService::Storage::SetEmbedderMetadata(
    EmbedderMetadata metadata,
    os_crypt_async::Encryptor encryptor) {}

void HistoryEmbeddingsService::Storage::ProcessAndStorePassages(
    UrlPassages url_passages,
    std::vector<Embedding> embeddings) {}

std::vector<ScoredUrlRow> HistoryEmbeddingsService::Storage::Search(
    base::WeakPtr<std::atomic<size_t>> weak_latest_query_id,
    size_t query_id,
    Embedding query_embedding,
    std::optional<base::Time> time_range_start,
    size_t count) {}

void HistoryEmbeddingsService::Storage::HandleHistoryDeletions(
    bool for_all_history,
    history::URLRows deleted_rows,
    std::set<history::VisitID> deleted_visit_ids) {}

void HistoryEmbeddingsService::Storage::DeleteDataForTesting(
    bool delete_passages,
    bool delete_embeddings) {}

std::vector<UrlPassages>
HistoryEmbeddingsService::Storage::CollectPassagesWithoutEmbeddings() {}

std::optional<UrlPassagesEmbeddings>
HistoryEmbeddingsService::Storage::GetUrlData(history::URLID url_id) {}

QualityLogEntry HistoryEmbeddingsService::PrepareQualityLogEntry() {}

void HistoryEmbeddingsService::OnPassagesRetrieved(
    std::optional<UrlPassagesEmbeddings> existing_url_data,
    UrlPassages url_passages,
    std::vector<std::string> passages) {}

void HistoryEmbeddingsService::OnPassagesEmbeddingsComputed(
    std::unordered_map<std::string, Embedding> embedding_cache,
    UrlPassages url_passages,
    std::vector<std::string> passages,
    std::vector<Embedding> embeddings,
    ComputeEmbeddingsStatus status) {}

void HistoryEmbeddingsService::OnSearchCompleted(
    SearchResultCallback callback,
    SearchResult result,
    std::vector<ScoredUrlRow> scored_url_rows) {}

void HistoryEmbeddingsService::DeterminePassageVisibility(
    SearchResultCallback callback,
    SearchResult result,
    std::vector<ScoredUrlRow> scored_url_rows) {}

void HistoryEmbeddingsService::OnPassageVisibilityCalculated(
    SearchResultCallback callback,
    SearchResult result,
    std::vector<ScoredUrlRow> scored_url_rows,
    const std::vector<page_content_annotations::BatchAnnotationResult>&
        annotation_results) {}

void HistoryEmbeddingsService::OnPrimarySearchResultReady(
    SearchResultCallback callback,
    SearchResult result) {}

void HistoryEmbeddingsService::OnAnswerComputed(
    SearchResultCallback callback,
    SearchResult search_result,
    AnswererResult answerer_result) {}

void HistoryEmbeddingsService::RebuildAbsentEmbeddings(
    std::vector<UrlPassages> all_url_passages) {}

void HistoryEmbeddingsService::RetrievePassagesWithUrlData(
    history::URLID url_id,
    history::VisitID visit_id,
    base::Time visit_time,
    content::WeakDocumentPtr weak_render_frame_host,
    base::Time time_before_database_access,
    std::optional<UrlPassagesEmbeddings> existing_url_data) {}

bool HistoryEmbeddingsService::QueryIsFiltered(
    const std::string& raw_query) const {}

}  // namespace history_embeddings