chromium/components/feedback/redaction_tool/redaction_tool.cc

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "components/feedback/redaction_tool/redaction_tool.h"

#include <algorithm>
#include <set>
#include <string_view>
#include <utility>
#include <vector>

#include "base/files/file_path.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/task/sequenced_task_runner.h"
#include "base/threading/thread_restrictions.h"
#include "build/chromeos_buildflags.h"
#include "components/autofill/core/common/credit_card_number_validation.h"
#include "components/feedback/redaction_tool/ip_address.h"
#include "components/feedback/redaction_tool/pii_types.h"
#ifdef USE_SYSTEM_RE2
#include <re2/re2.h>
#else
#include "third_party/re2/src/re2/re2.h"
#endif  // USE_SYSTEM_RE2

RE2;
IPAddress;

namespace redaction {

namespace features {
COMPONENT_EXPORT(REDACTION_TOOL)
BASE_FEATURE(kEnableCreditCardRedaction,
             "EnableCreditCardRedaction",
             base::FEATURE_ENABLED_BY_DEFAULT);

COMPONENT_EXPORT(REDACTION_TOOL)
BASE_FEATURE(kEnableIbanRedaction,
             "EnableIbanRedaction",
             base::FEATURE_ENABLED_BY_DEFAULT);
}  // namespace features

namespace {

// The |kCustomPatternsWithContext| array defines patterns to match and
// redact. Each pattern needs to define three capturing parentheses groups:
//
// - a group for the pattern before the identifier to be redacted;
// - a group for the identifier to be redacted;
// - a group for the pattern after the identifier to be redacted.
//
// The first and the last capture group are the origin of the "WithContext"
// suffix in the name of this constant.
//
// Every matched identifier (in the context of the whole pattern) is redacted
// by replacing it with an incremental instance identifier. Every different
// pattern defines a separate instance identifier space. See the unit test for
// RedactionToolTest::RedactCustomPatterns for pattern redaction examples.
//
// Useful regular expression syntax:
//
// +? is a non-greedy (lazy) +.
// \b matches a word boundary.
// (?i) turns on case insensitivity for the remainder of the regex.
// (?-s) turns off "dot matches newline" for the remainder of the regex.
// (?:regex) denotes non-capturing parentheses group.
CustomPatternWithAlias kCustomPatternsWithContext[] =;

bool MaybeUnmapAddress(IPAddress* addr) {}

bool MaybeUntranslateAddress(IPAddress* addr) {}

// If |addr| points to a valid IPv6 address, this function truncates it at /32.
bool MaybeTruncateIPv6(IPAddress* addr) {}

// Returns an appropriately scrubbed version of |addr| if applicable.
std::string MaybeScrubIPAddress(const std::string& addr) {}

// Some strings can contain pieces that match like IPv4 addresses but aren't.
// This function can be used to determine if this was the case by evaluating
// the skipped piece. It returns true, if the matched address was erroneous
// and should be skipped instead.
bool ShouldSkipIPAddress(std::string_view skipped) {}

// Helper macro: Non capturing group
#define NCG(x)
// Helper macro: Optional non capturing group
#define OPT_NCG(x)

//////////////////////////////////////////////////////////////////////////
// Patterns for URLs, or better IRIs, based on RFC 3987 with an artificial
// limitation on the scheme to increase precision. Otherwise anything
// like "ID:" would be considered an IRI.

#define UNRESERVED
#define RESERVED
#define SUB_DELIMS
#define GEN_DELIMS

#define DIGIT
#define HEXDIG

#define PCT_ENCODED

#define DEC_OCTET

#define IPV4ADDRESS

#define H16
#define LS32
#define WB

// clang-format off
#define IPV6ADDRESS
// clang-format on

#define IPVFUTURE

#define IP_LITERAL

#define PORT

// This is a diversion of RFC 3987
#define SCHEME

#define IPRIVATE

#define UCSCHAR

#define IUNRESERVED

#define IPCHAR
#define IFRAGMENT
#define IQUERY

#define ISEGMENT
#define ISEGMENT_NZ
#define ISEGMENT_NZ_NC

#define IPATH_EMPTY
#define IPATH_ROOTLESS
#define IPATH_NOSCHEME
#define IPATH_ABSOLUTE
#define IPATH_ABEMPTY

#define IPATH

#define IREG_NAME

#define IHOST
#define IUSERINFO
#define IAUTHORITY

#define IRELATIVE_PART

#define IRELATIVE_REF

// RFC 3987 requires IPATH_EMPTY here but it is omitted so that statements
// that end with "Android:" for example are not considered a URL.
#define IHIER_PART

#define ABSOLUTE_IRI

#define IRI

#define IRI_REFERENCE

// TODO(battre): Use http://tools.ietf.org/html/rfc5322 to represent email
// addresses. Capture names as well ("First Lastname" <[email protected]>).

// The |kCustomPatternWithoutContext| array defines further patterns to match
// and redact. Each pattern consists of a single capturing group.
CustomPatternWithAlias kCustomPatternsWithoutContext[] =;

// Like RE2's FindAndConsume, searches for the first occurrence of |pattern| in
// |input| and consumes the bytes until the end of the pattern matching. Unlike
// FindAndConsume, the bytes skipped before the match of |pattern| are stored
// in |skipped_input|. |args| needs to contain at least one element.
// Returns whether a match was found.
//
// Example: input = "aaabbbc", pattern = "(b+)" leads to skipped_input = "aaa",
// args[0] = "bbb", and the beginning input is moved to the right so that it
// only contains "c".
// Example: input = "aaabbbc", pattern = "(z+)" leads to input = "aaabbbc",
// the args values are not modified and skipped_input is not modified.
bool FindAndConsumeAndGetSkippedN(std::string_view* input,
                                  const re2::RE2& pattern,
                                  std::string_view* skipped_input,
                                  std::string_view* args[],
                                  int argc) {}

// All |match_groups| need to be of type std::string_view*.
template <typename... Arg>
bool FindAndConsumeAndGetSkipped(std::string_view* input,
                                 const re2::RE2& pattern,
                                 std::string_view* skipped_input,
                                 Arg*... match_groups) {}

bool HasRepeatedChar(std::string_view text, char c) {}

// The following MAC addresses will not be redacted as they are not specific
// to a device but have general meanings.
const char* const kUnredactedMacAddresses[] =;
constexpr size_t kNumUnredactedMacs =;

bool IsFeatureEnabled(const base::Feature& feature) {}
}  // namespace

RedactionTool::RedactionTool(const char* const* first_party_extension_ids)
    :{}

RedactionTool::RedactionTool(
    const char* const* first_party_extension_ids,
    std::unique_ptr<RedactionToolMetricsRecorder> metrics_recorder)
    :{}

RedactionTool::~RedactionTool() {}

std::map<PIIType, std::set<std::string>> RedactionTool::Detect(
    const std::string& input) {}

std::string RedactionTool::Redact(const std::string& input,
                                  const base::Location& location) {}

std::string RedactionTool::RedactAndKeepSelected(
    const std::string& input,
    const std::set<PIIType>& pii_types_to_keep,
    const base::Location& location) {}

void RedactionTool::EnableCreditCardRedaction(const bool enabled) {}

RE2* RedactionTool::GetRegExp(const std::string& pattern) {}

std::string RedactionTool::RedactMACAddresses(
    const std::string& input,
    std::map<PIIType, std::set<std::string>>* detected) {}

std::string RedactionTool::RedactHashes(
    const std::string& input,
    std::map<PIIType, std::set<std::string>>* detected) {}

std::string RedactionTool::RedactAndroidAppStoragePaths(
    const std::string& input,
    std::map<PIIType, std::set<std::string>>* detected) {}

std::string RedactionTool::RedactCreditCardNumbers(
    const std::string& input,
    std::map<PIIType, std::set<std::string>>* detected) {}

std::string RedactionTool::RedactIbans(
    const std::string& input,
    std::map<PIIType, std::set<std::string>>* detected) {}

std::string RedactionTool::RedactAndKeepSelectedCustomPatterns(
    std::string input,
    const std::set<PIIType>& pii_types_to_keep) {}

void RedactionTool::DetectWithCustomPatterns(
    std::string input,
    std::map<PIIType, std::set<std::string>>* detected) {}

RedactionToolCaller RedactionTool::GetCaller(const base::Location& location) {}

std::string RedactionTool::RedactCustomPatternWithContext(
    const std::string& input,
    const CustomPatternWithAlias& pattern,
    std::map<PIIType, std::set<std::string>>* detected) {}

// This takes a |url| argument and returns true if the URL is exempt from
// redaction, returns false otherwise.
bool IsUrlExempt(std::string_view url,
                 const char* const* first_party_extension_ids) {}

std::string RedactionTool::RedactCustomPatternWithoutContext(
    const std::string& input,
    const CustomPatternWithAlias& pattern,
    std::map<PIIType, std::set<std::string>>* detected) {}

RedactionToolContainer::RedactionToolContainer(
    scoped_refptr<base::SequencedTaskRunner> task_runner,
    const char* const* first_party_extension_ids)
    :{}

RedactionToolContainer::RedactionToolContainer(
    scoped_refptr<base::SequencedTaskRunner> task_runner,
    const char* const* first_party_extension_ids,
    std::unique_ptr<RedactionToolMetricsRecorder> metrics_recorder)
    :{}

RedactionToolContainer::~RedactionToolContainer() {}

RedactionTool* RedactionToolContainer::Get() {}

}  // namespace redaction