chromium/third_party/blink/common/privacy_budget/aggregating_sample_collector.h

// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef THIRD_PARTY_BLINK_COMMON_PRIVACY_BUDGET_AGGREGATING_SAMPLE_COLLECTOR_H_
#define THIRD_PARTY_BLINK_COMMON_PRIVACY_BUDGET_AGGREGATING_SAMPLE_COLLECTOR_H_

#include <cstdint>
#include <unordered_map>
#include <vector>

#include "base/containers/flat_set.h"
#include "base/synchronization/lock.h"
#include "base/thread_annotations.h"
#include "base/time/time.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
#include "services/metrics/public/mojom/ukm_interface.mojom.h"
#include "third_party/blink/public/common/common_export.h"
#include "third_party/blink/public/common/privacy_budget/identifiability_sample_collector.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"

namespace blink {

// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class PrivacyBudgetRecordedSample {};

// An `IdentifiabilitySampleCollector` that does the following:
//
// * De-duplicates recorded samples so that the same
//   〈IdentifiableSurface,IdentifiableToken〉 tuple doesn't get sent to the
//   UkmRecorder more than once per `ukm::SourceId`.
//
// * Caps the number of samples that can be recorded against the same surface
//   per `ukm::SourceId`. Drops samples in excess of
//   kMaxTrackedSamplesPerSurfaces.
//
// * Caps the total number of surfaces that can be tracked for a single process.
//   Drops samples in excess of kMaxTrackedSurfaces.
//
// * Buffers metrics instead of invoking `UkmRecorder::Record` each time
//   a sample arrives.
//
//   * The number of metrics so buffered is capped at kMaxUnsentSamples. If more
//     than this many are to be buffered, then flushes all unsent metrics.
//
//   * The age of metrics so buffered is capped at kMaxUnsentSampleAge. If
//     samples have been sitting in the unsent buffer for longer than that,
//     flushes all unsent metrics.
//
//   * In so buffering, organizes observed metrics into the fewest number of
//     `UkmEntry` instances that are required to record them via `UkmRecorder.
//
// The goal, obviously is to prevent the identifiability study from DoSing the
// browser process and the UKM subsystem since there can be lots of metrics
// being recorded.
class BLINK_COMMON_EXPORT_PRIVATE AggregatingSampleCollector
    : public IdentifiabilitySampleCollector {};

namespace internal {
// Accesses the global `AggregatingSampleCollector` instance. On non-test
// targets or test targets with no `ScopedSwitchSampleCollector`
// override this is also what's returned by
// `IdentifiabilitySampleCollector::Get()`.
BLINK_COMMON_EXPORT_PRIVATE AggregatingSampleCollector* GetCollectorInstance();
}  // namespace internal

}  // namespace blink
#endif  // THIRD_PARTY_BLINK_COMMON_PRIVACY_BUDGET_AGGREGATING_SAMPLE_COLLECTOR_H_