chromium/chrome/browser/extensions/activity_log/counting_policy.cc

// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// A policy for storing activity log data to a database that performs
// aggregation to reduce the size of the database.  The database layout is
// nearly the same as FullStreamUIPolicy, which stores a complete log, with a
// few changes:
//   - a "count" column is added to track how many log records were merged
//     together into this row
//   - the "time" column measures the most recent time that the current row was
//     updated
// When writing a record, if a row already exists where all other columns
// (extension_id, action_type, api_name, args, urls, etc.) all match, and the
// previous time falls within today (the current time), then the count field on
// the old row is incremented.  Otherwise, a new row is written.
//
// For many text columns, repeated strings are compressed by moving string
// storage to a separate table ("string_ids") and storing only an identifier in
// the logging table.  For example, if the api_name_x column contained the
// value 4 and the string_ids table contained a row with primary key 4 and
// value 'tabs.query', then the api_name field should be taken to have the
// value 'tabs.query'.  Each column ending with "_x" is compressed in this way.
// All lookups are to the string_ids table, except for the page_url_x and
// arg_url_x columns, which are converted via the url_ids table (this
// separation of URL values is to help simplify history clearing).
//
// The activitylog_uncompressed view allows for simpler reading of the activity
// log contents with identifiers already translated to string values.

#include "chrome/browser/extensions/activity_log/counting_policy.h"

#include <stddef.h>

#include <map>
#include <string>
#include <vector>

#include "base/files/file_path.h"
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/json/json_reader.h"
#include "base/json/json_string_value_serializer.h"
#include "base/memory/ptr_util.h"
#include "base/memory/scoped_refptr.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "chrome/browser/extensions/activity_log/activity_log_task_runner.h"
#include "chrome/common/chrome_constants.h"
#include "sql/statement.h"
#include "sql/transaction.h"

namespace {

Action;

// Delay between cleaning passes (to delete old action records) through the
// database.
constexpr base::TimeDelta kCleaningDelay =;

// We should log the arguments to these API calls.  Be careful when
// constructing this allowlist to not keep arguments that might compromise
// privacy by logging too much data to the activity log.
//
// TODO(mvrable): The contents of this allowlist should be reviewed and
// expanded as needed.
struct ApiList {};

const ApiList kAlwaysLog[] =;

// Columns in the main database table.  See the file-level comment for a
// discussion of how data is stored and the meanings of the _x columns.
const char* const kTableContentFields[] =;
const char* const kTableFieldTypes[] =;

// Miscellaneous SQL commands for initializing the database; these should be
// idempotent.
static const char kPolicyMiscSetup[] =// The activitylog_uncompressed view performs string lookups for simpler
    // access to the log data.
    "DROP VIEW IF EXISTS activitylog_uncompressed;\n"
    "CREATE VIEW activitylog_uncompressed AS\n"
    "SELECT count,\n"
    "    x1.value AS extension_id,\n"
    "    time,\n"
    "    action_type,\n"
    "    x2.value AS api_name,\n"
    "    x3.value AS args,\n"
    "    x4.value AS page_url,\n"
    "    x5.value AS page_title,\n"
    "    x6.value AS arg_url,\n"
    "    x7.value AS other,\n"
    "    activitylog_compressed.rowid AS activity_id\n"
    "FROM activitylog_compressed\n"
    "    LEFT JOIN string_ids AS x1 ON (x1.id = extension_id_x)\n"
    "    LEFT JOIN string_ids AS x2 ON (x2.id = api_name_x)\n"
    "    LEFT JOIN string_ids AS x3 ON (x3.id = args_x)\n"
    "    LEFT JOIN url_ids    AS x4 ON (x4.id = page_url_x)\n"
    "    LEFT JOIN string_ids AS x5 ON (x5.id = page_title_x)\n"
    "    LEFT JOIN url_ids    AS x6 ON (x6.id = arg_url_x)\n"
    "    LEFT JOIN string_ids AS x7 ON (x7.id = other_x);\n"
    // An index on all fields except count and time: all the fields that aren't
    // changed when incrementing a count.  This should accelerate finding the
    // rows to update (at worst several rows will need to be checked to find
    // the one in the right time range).
    "CREATE INDEX IF NOT EXISTS activitylog_compressed_index\n"
    "ON activitylog_compressed(extension_id_x, action_type, api_name_x,\n"
    "    args_x, page_url_x, page_title_x, arg_url_x, other_x)";

// SQL statements to clean old, unused entries out of the string and URL id
// tables.
static const char kStringTableCleanup[] =;
static const char kUrlTableCleanup[] =;

}  // namespace

namespace extensions {

const char CountingPolicy::kReadViewName[] =;

CountingPolicy::CountingPolicy(Profile* profile)
    :{}

CountingPolicy::~CountingPolicy() {}

bool CountingPolicy::InitDatabase(sql::Database* db) {}

void CountingPolicy::ProcessAction(scoped_refptr<Action> action) {}

void CountingPolicy::QueueAction(scoped_refptr<Action> action) {}

bool CountingPolicy::FlushDatabase(sql::Database* db) {}

std::unique_ptr<Action::ActionVector> CountingPolicy::DoReadFilteredData(
    const std::string& extension_id,
    const Action::ActionType type,
    const std::string& api_name,
    const std::string& page_url,
    const std::string& arg_url,
    const int days_ago) {}

void CountingPolicy::DoRemoveActions(const std::vector<int64_t>& action_ids) {}

void CountingPolicy::DoRemoveURLs(const std::vector<GURL>& restrict_urls) {}

void CountingPolicy::DoRemoveExtensionData(const std::string& extension_id) {}

void CountingPolicy::DoDeleteDatabase() {}

void CountingPolicy::ReadFilteredData(
    const std::string& extension_id,
    const Action::ActionType type,
    const std::string& api_name,
    const std::string& page_url,
    const std::string& arg_url,
    const int days_ago,
    base::OnceCallback<void(std::unique_ptr<Action::ActionVector>)> callback) {}

void CountingPolicy::RemoveActions(const std::vector<int64_t>& action_ids) {}

void CountingPolicy::RemoveURLs(const std::vector<GURL>& restrict_urls) {}

void CountingPolicy::RemoveExtensionData(const std::string& extension_id) {}

void CountingPolicy::DeleteDatabase() {}

void CountingPolicy::OnDatabaseFailure() {}

void CountingPolicy::OnDatabaseClose() {}

// Cleans old records from the activity log database.
bool CountingPolicy::CleanOlderThan(sql::Database* db,
                                    const base::Time& cutoff) {}

// Cleans unused interned strings from the database.  This should be run after
// deleting rows from the main log table to clean out stale values.
bool CountingPolicy::CleanStringTables(sql::Database* db) {}

void CountingPolicy::Close() {}

}  // namespace extensions