#ifdef UNSAFE_BUFFERS_BUILD
#pragma allow_unsafe_buffers
#endif
#include "components/lookalikes/core/lookalike_url_util.h"
#include <algorithm>
#include <string_view>
#include <utility>
#include "base/containers/contains.h"
#include "base/functional/callback.h"
#include "base/hash/sha1.h"
#include "base/i18n/char_iterator.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/strcat.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/trace_event/trace_event.h"
#include "base/values.h"
#include "build/build_config.h"
#include "components/lookalikes/core/safety_tips_config.h"
#include "components/security_interstitials/core/pref_names.h"
#include "components/url_formatter/spoof_checks/common_words/common_words_util.h"
#include "components/url_formatter/spoof_checks/top_domains/top_bucket_domains.h"
#include "components/url_formatter/spoof_checks/top_domains/top_domain_util.h"
#include "components/url_formatter/url_formatter.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/base/url_util.h"
ComboSquattingParams;
DomainInfo;
GetDomainInfo;
HasOneCharacterSwap;
IsEditDistanceAtMostOne;
LookalikeTargetAllowlistChecker;
LookalikeUrlMatchType;
NavigationSuggestionEvent;
TopBucketDomainsParams;
namespace {
const char kDigitChars[] = …;
const size_t kMinE2LDLengthForTargetEmbedding = …;
const char* kLocalAdditionalCommonWords[] = …;
const char* kDomainsPermittedInEndEmbeddings[] = …;
const char kTargetEmbeddingSeparators[] = …;
const char* kPrivateRegistriesTreatedAsPublic[] = …;
TopBucketDomainsParams* GetTopDomainParams() { … }
const size_t kMinimumE2LDLengthToShowPunycodeInterstitial = …;
const int kDefaultLaunchPercentageOnCanaryDev = …;
const int kDefaultLaunchPercentageOnBeta = …;
constexpr std::pair<const char*, const char*> kBrandNamesForCSQ[] = …;
const char* kSkeletonsOfPopularKeywordsForCSQ[] = …;
const size_t kMinBrandNameLengthForComboSquatting = …;
ComboSquattingParams* GetComboSquattingParams() { … }
bool SkeletonsMatch(const url_formatter::Skeletons& skeletons1,
const url_formatter::Skeletons& skeletons2) { … }
std::string GetMatchingSiteEngagementDomain(
const std::vector<DomainInfo>& engaged_sites,
const DomainInfo& navigated_domain) { … }
bool GetSimilarDomainFromTopBucket(
const DomainInfo& navigated_domain,
const LookalikeTargetAllowlistChecker& target_allowlisted,
std::string* matched_domain,
LookalikeUrlMatchType* match_type) { … }
bool GetSimilarDomainFromEngagedSites(
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& target_allowlisted,
std::string* matched_domain,
LookalikeUrlMatchType* match_type) { … }
void RecordEvent(NavigationSuggestionEvent event) { … }
std::vector<std::string_view> SplitDomainIntoTokens(
const std::string& hostname) { … }
bool ASubdomainIsAllowlisted(
const base::span<const std::string_view>& domain_labels,
const LookalikeTargetAllowlistChecker& in_target_allowlist) { … }
std::string GetMatchingTopDomainWithoutSeparators(
std::string_view potential_target) { … }
bool IsETLDPlusOneOrTrivialSubdomain(const DomainInfo& host) { … }
bool DoesETLDPlus1MatchTopDomainOrEngagedSite(
const DomainInfo& domain,
const std::vector<DomainInfo>& engaged_sites,
std::string* embedded_target) { … }
bool UsesCommonWord(const reputation::SafetyTipsConfig* config_proto,
const DomainInfo& domain) { … }
bool IsEmbeddingItself(const base::span<const std::string_view>& domain_labels,
const std::string& embedding_domain) { … }
std::string GetE2LDWithDeFactoPublicRegistries(
const std::string& domain_and_registry) { … }
bool IsCrossTLDMatch(const DomainInfo& embedded_target,
const std::string& embedding_domain) { … }
bool EndsWithPermittedDomains(const DomainInfo& embedded_target,
const std::string& embedding_domain) { … }
bool IsAllowedToBeEmbedded(
const DomainInfo& embedded_target,
const base::span<const std::string_view>& subdomain_span,
const LookalikeTargetAllowlistChecker& in_target_allowlist,
const std::string& embedding_domain,
const reputation::SafetyTipsConfig* config_proto) { … }
char GetFirstDifferentChar(const std::string& str1, const std::string& str2) { … }
bool IsComboSquattingCandidate(const std::string& brand) { … }
std::vector<std::pair<std::string, std::string>> GetBrandNamesFromEngagedSites(
const std::vector<DomainInfo>& engaged_sites) { … }
std::string GetRegistry(const DomainInfo& navigated_domain) { … }
std::string FindMatchedDomainForHardCodedComboSquatting(
const std::string& brand_name,
const DomainInfo& navigated_domain) { … }
std::string FindMatchedDomainForSiteEngagementComboSquatting(
const std::string& brand_name,
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites) { … }
bool IsComboSquatting(
const std::vector<std::pair<std::string, std::string>>& brand_names,
const ComboSquattingParams& combo_squatting_params,
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites,
std::string* matched_domain,
bool is_hard_coded) { … }
}
namespace lookalikes {
const char kInterstitialHistogramName[] = …;
void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry) { … }
std::string GetConsoleMessage(const GURL& lookalike_url,
bool is_new_heuristic) { … }
DomainInfo::DomainInfo(
const std::string& arg_hostname,
const std::string& arg_domain_and_registry,
const std::string& arg_domain_without_registry,
const url_formatter::IDNConversionResult& arg_idn_result,
const url_formatter::Skeletons& arg_skeletons,
const url_formatter::Skeletons& arg_domain_without_registry_skeletons)
: … { … }
DomainInfo::~DomainInfo() = default;
DomainInfo::DomainInfo(const DomainInfo&) = default;
DomainInfo GetDomainInfo(const std::string& hostname) { … }
DomainInfo GetDomainInfo(const GURL& url) { … }
std::string GetETLDPlusOne(const std::string& hostname) { … }
bool IsEditDistanceAtMostOne(const std::u16string& str1,
const std::u16string& str2) { … }
bool IsLikelyEditDistanceFalsePositive(const DomainInfo& navigated_domain,
const DomainInfo& matched_domain) { … }
bool IsLikelyCharacterSwapFalsePositive(const DomainInfo& navigated_domain,
const DomainInfo& matched_domain) { … }
bool IsTopDomain(const DomainInfo& domain_info) { … }
bool GetMatchingDomain(
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& in_target_allowlist,
const reputation::SafetyTipsConfig* config_proto,
std::string* matched_domain,
LookalikeUrlMatchType* match_type) { … }
void RecordUMAFromMatchType(LookalikeUrlMatchType match_type) { … }
TargetEmbeddingType GetTargetEmbeddingType(
const std::string& hostname,
const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& in_target_allowlist,
const reputation::SafetyTipsConfig* config_proto,
std::string* safe_hostname) { … }
TargetEmbeddingType SearchForEmbeddings(
const std::string& hostname,
const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& in_target_allowlist,
const reputation::SafetyTipsConfig* config_proto,
bool safety_tips_allowed,
std::string* safe_hostname) { … }
bool IsASCII(UChar32 codepoint) { … }
bool IsEmojiRelatedCodepoint(UChar32 codepoint) { … }
bool IsASCIIAndEmojiOnly(std::u16string_view text) { … }
bool IsPunycodeInterstitialCandidate(const DomainInfo& domain) { … }
bool ShouldBlockBySpoofCheckResult(const DomainInfo& navigated_domain) { … }
bool IsAllowedByEnterprisePolicy(const PrefService* pref_service,
const GURL& url) { … }
void SetEnterpriseAllowlistForTesting(PrefService* pref_service,
const std::vector<std::string>& hosts) { … }
bool HasOneCharacterSwap(const std::u16string& str1,
const std::u16string& str2) { … }
void SetTopBucketDomainsParamsForTesting(const TopBucketDomainsParams& params) { … }
void ResetTopBucketDomainsParamsForTesting() { … }
bool IsHeuristicEnabledForHostname(
const reputation::SafetyTipsConfig* config_proto,
const reputation::HeuristicLaunchConfig::Heuristic heuristic,
const std::string& lookalike_etld_plus_one,
version_info::Channel channel) { … }
void SetComboSquattingParamsForTesting(const ComboSquattingParams& params) { … }
void ResetComboSquattingParamsForTesting() { … }
ComboSquattingType GetComboSquattingType(
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites,
std::string* matched_domain) { … }
bool IsSafeTLD(const std::string& hostname) { … }
LookalikeActionType GetActionForMatchType(
const reputation::SafetyTipsConfig* config,
version_info::Channel channel,
const std::string& etld_plus_one,
LookalikeUrlMatchType match_type) { … }
GURL GetSuggestedURL(LookalikeUrlMatchType match_type,
const GURL& navigated_url,
const std::string& matched_hostname) { … }
}