// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
package reputation;
message FlaggedPage {
enum FlagType {
UNKNOWN = 0;
BAD_REP = 1;
YOUNG_DOMAIN = 2;
}
// |pattern| is a full URL, without scheme/username/password/port, such as
// example.test/test-path-for-safety-tips/test.html.
optional string pattern = 1;
optional FlagType type = 2;
}
message UrlPattern {
// |pattern| is a full URL, without scheme/username/password/port, such as
// example.test/test-path-for-safety-tips/test.html. Also see the comment for
// |allowed_pattern| field.
optional string pattern = 1;
// The index of any cohort[s] that this entry is allowed to spoof. If this
// field is unset and the pattern is allowlisted, the pattern may spoof any
// domain. Has no meaning when used in canonical_pattern.
repeated uint32 cohort_index = 2 [packed = true];
}
message HostPattern {
// |regex| is a regular expression that matches allowlisted hostnames.
// This is different than UrlPattern.pattern which matches URLs.
// IMPORTANT: Don't forget to escape dots when used as label separators.
optional string regex = 1;
}
message HeuristicLaunchConfig {
// The heuristic to be launched with a warning UI.
// Important: Changes to heuristics MUST be added as a NEW heuristic (e.g.
// by adding a new enum value with "_V2" at the end).
// Otherwise, rolling out the new version will also enable the buggy version
// of the heuristic on older versions of Chrome.
//
// The values in this enum are intended to be temporary and used only for
// new heuristic launches. Do not reuse this enum for any other purpose,
// use the existing LookalikeHeuristic enum in this file.
enum Heuristic {
HEURISTIC_UNKNOWN = 0;
HEURISTIC_CHARACTER_SWAP_ENGAGED_SITES = 1;
HEURISTIC_CHARACTER_SWAP_TOP_SITES = 2;
HEURISTIC_COMBO_SQUATTING_TOP_DOMAINS = 3;
HEURISTIC_COMBO_SQUATTING_ENGAGED_SITES = 4;
}
optional Heuristic heuristic = 1;
// Percentage of all sites this heuristic should be enabled on the Stable
// Channel. The determination of which sites are enabled is based on hash
// prefixes of the sites.
// A value of 0 means the heuristic isn't enabled on any site on Stable.
// A value of 100 means the heuristic is enabled on all sites on Stable.
//
// If a launch config is found for a heuristic, the heuristic is enabled for
// 90% of sites on Canary/Dev and 50% on Beta, regardless of the value of this
// field.
optional uint32 launch_percentage = 2;
}
// A set of domains allowed to be spoofed by a given allowlist entry.
// allowed_pattern or canonical_pattern indices may appear in multiple
// Cohorts, and multiple allowed_patterns may point to the same Cohort.
message Cohort {
// Indexes in `allowed_pattern` in this cohort.
repeated uint32 allowed_index = 1 [packed = true];
// Indexes in `canonical_pattern` in this cohort.
repeated uint32 canonical_index = 2 [packed = true];
}
// Configuration for the safety tips component. A binary version of this proto
// will be distributed to Chrome clients via component updater. The binary will
// contain a single instance of this message.
message SafetyTipsConfig {
optional uint32 version_id = 1;
// List of pages on which to show the Safety Tip UX. This must be sorted and
// may contain duplicate patterns (when flagged with multiple FlagTypes).
repeated FlaggedPage flagged_page = 2;
// List of patterns that are explicitly allowed. This must be sorted. Used to
// mitigate false positives in Safety Tips and Lookalike warnings.
// - For safety tips, the pattern can be a URL or a full suffix/prefix
// expression used for SafeBrowsing. See
// https://developers.google.com/safe-browsing/v4/urls-hashing#suffixprefix-expressions.
// - Lookalike warnings operate on eTLD+1, so it only makes sense for this
// to be eTLD+1, such as "google.com/".
repeated UrlPattern allowed_pattern = 3;
// Similar to allowed_pattern, but used to allowlist targets for some of the
// heuristics.
// - For edit distance, this is the matched domain. For example, consider edit
// distance flagging foo1.com, foo2.com, ... as a spoof of foo.com. If
// we are fairly sure that these are all separate and legitimate sites,
// allowlisting foo.com is much easier than allowlisting fooN.coms.
// - For target embedding, this is the embedded target. Some organizations use
// lookalike subdomains to proxy popular domains or customize their content
// for these popular domains. E.g. google-scholar-com.university.edu.
// In these cases it's simpler to allowlist the target instead of the
// embedder.
repeated HostPattern allowed_target_pattern = 4;
// A *sorted* list of common words. These words are combined with the list at
// components/url_formatter/spoof_checks/common_words. The combined list is
// used in some lookalike heuristics to prevent common false positives.
repeated string common_word = 5;
// Launch configurations for new heuristics. Each new heuristic being launched
// gets its own config. Multiple heuristics can be enabled at the same time.
repeated HeuristicLaunchConfig launch_config = 6;
// canonical_pattern is a list of hostnames that are only ever spoofed, and do
// no spoofing of their own. Entries are pointed to by one or more Cohort.
repeated UrlPattern canonical_pattern = 7;
// A Cohort is a set of domains that may be spoofed by the allowed_pattern
// that points to it.
repeated Cohort cohort = 8;
}