chromium/components/history_embeddings/history_embeddings_service.h

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_HISTORY_EMBEDDINGS_HISTORY_EMBEDDINGS_SERVICE_H_
#define COMPONENTS_HISTORY_EMBEDDINGS_HISTORY_EMBEDDINGS_SERVICE_H_

#include <atomic>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#include "base/callback_list.h"
#include "base/files/file_path.h"
#include "base/functional/callback.h"
#include "base/functional/callback_helpers.h"
#include "base/gtest_prod_util.h"
#include "base/memory/weak_ptr.h"
#include "base/threading/sequence_bound.h"
#include "base/time/time.h"
#include "components/history/core/browser/history_service.h"
#include "components/history/core/browser/history_service_observer.h"
#include "components/history/core/browser/history_types.h"
#include "components/history/core/browser/url_database.h"
#include "components/history/core/browser/url_row.h"
#include "components/history_embeddings/answerer.h"
#include "components/history_embeddings/passage_embeddings_service_controller.h"
#include "components/history_embeddings/sql_database.h"
#include "components/history_embeddings/vector_database.h"
#include "components/keyed_service/core/keyed_service.h"
#include "components/optimization_guide/core/model_quality/model_quality_log_entry.h"
#include "components/optimization_guide/core/optimization_guide_decider.h"
#include "components/optimization_guide/proto/features/common_quality_data.pb.h"
#include "components/os_crypt/async/common/encryptor.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/weak_document_ptr.h"

class HistoryEmbeddingsInteractiveTest;

namespace optimization_guide {
class OptimizationGuideModelExecutor;
class OptimizationGuideModelProvider;
}  // namespace optimization_guide

namespace page_content_annotations {
class BatchAnnotationResult;
class PageContentAnnotationsService;
}  // namespace page_content_annotations

namespace os_crypt_async {
class OSCryptAsync;
}

namespace history_embeddings {

class Answerer;
class Embedder;

// Counts the # of ' ' vanilla-space characters in `s`.
// TODO(crbug.com/343256907): Should work on international inputs which may:
//   a) Use special whitespace, OR
//   b) Not use whitespace for word breaks (e.g. Thai).
//   `String16VectorFromString16()` is the omnibox solution. We could probably
//   just replace-all `CountWords(s)` ->
//   `String16VectorFromString16(CleanUpTitleForMatching(s, nullptr)).size()`.
size_t CountWords(const std::string& s);

// A single item that forms part of a search result; combines metadata found in
// the history embeddings database with additional info from history database.
struct ScoredUrlRow {};

struct SearchResult {};

SearchResultCallback;

QualityLogEntry;

class HistoryEmbeddingsService : public KeyedService,
                                 public history::HistoryServiceObserver {};

// This corresponds to UMA histogram enum `EmbeddingsQueryFiltered`
// in tools/metrics/histograms/metadata/history/enums.xml
enum class QueryFiltered {};

// Record UMA histogram with query filter status.
void RecordQueryFiltered(QueryFiltered status);

// This corresponds to UMA histogram enum `EmbeddingsExtractionCancelled`
// in tools/metrics/histograms/metadata/history/enums.xml
enum class ExtractionCancelled {};

// Record UMA histogram with cancellation reason when extraction,
// embedding, etc. is cancelled before completion and storage.
void RecordExtractionCancelled(ExtractionCancelled reason);

// Hash function used for query filtering.
uint32_t HashString(std::string_view str);

}  // namespace history_embeddings

#endif  // COMPONENTS_HISTORY_EMBEDDINGS_HISTORY_EMBEDDINGS_SERVICE_H_