// Copyright 2020 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UTIL_H_ #define COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UTIL_H_ #include <string> #include <vector> #include "base/functional/callback.h" #include "base/memory/raw_ptr_exclusion.h" #include "components/lookalikes/core/safety_tips.pb.h" #include "components/pref_registry/pref_registry_syncable.h" #include "components/prefs/pref_service.h" #include "components/url_formatter/url_formatter.h" #include "components/version_info/channel.h" #include "url/gurl.h" namespace lookalikes { // Name of the histogram recorded by the interstitial for lookalike match types. extern const char kInterstitialHistogramName[]; // Register applicable preferences with the provided registry. void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry); // Returns the console message to be shown in devtools when a URL is flagged by // a lookalike heuristic. If is_new_heuristic is true, the message is for a new // heuristic that's not fully launched and it has an extra line about future // behavior of Chrome. std::string GetConsoleMessage(const GURL& lookalike_url, bool is_new_heuristic); // Used for |GetTargetEmbeddingType| return value. It shows if the target // embedding triggers on the input domain, and if it does, what type of warning // should be shown to the user. enum class TargetEmbeddingType { … }; // Used for |GetComboSquattingType| return value. // It shows if the brand name in the flagged domain // comes from the hard-coded brand names or from site engagements. enum class ComboSquattingType { … }; // Used for UKM. There is only a single LookalikeUrlMatchType per navigation. enum class LookalikeUrlMatchType { … }; // Used for UKM. There is only a single LookalikeUrlBlockingPageUserAction per // navigation. enum class LookalikeUrlBlockingPageUserAction { … }; // Used for metrics. Multiple events can occur per navigation. enum class NavigationSuggestionEvent { … }; struct TopBucketDomainsParams { … }; struct ComboSquattingParams { … }; struct DomainInfo { … }; // Returns a DomainInfo instance computed from |hostname|. Will return empty // fields for non-unique hostnames (e.g. site.test), localhost or sites whose // eTLD+1 is empty. DomainInfo GetDomainInfo(const std::string& hostname); // Convenience function for returning GetDomainInfo(url.host()). DomainInfo GetDomainInfo(const GURL& url); // Returns true if the Levenshtein distance between |str1| and |str2| is at most // one. This has O(max(n,m)) complexity as opposed to O(n*m) of the usual edit // distance computation. bool IsEditDistanceAtMostOne(const std::u16string& str1, const std::u16string& str2); // Returns whether |navigated_domain| and |matched_domain| are likely to be edit // distance false positives, and thus the user should *not* be warned. // // Assumes |navigated_domain| and |matched_domain| are edit distance of 1 apart. bool IsLikelyEditDistanceFalsePositive(const DomainInfo& navigated_domain, const DomainInfo& matched_domain); // Returns whether |navigated_domain| and |matched_domain| are likely to be // character swap false positives, and thus the user should *not* be warned. // // Assumes |navigated_domain| and |matched_domain| are within 1 character swap. bool IsLikelyCharacterSwapFalsePositive(const DomainInfo& navigated_domain, const DomainInfo& matched_domain); // Returns true if the domain given by |domain_info| is a top domain. bool IsTopDomain(const DomainInfo& domain_info); // Returns eTLD+1 of |hostname|. This excludes private registries, and returns // "blogspot.com" for "test.blogspot.com" (blogspot.com is listed as a private // registry). We do this to be consistent with url_formatter's top domain list // which doesn't have a notion of private registries. std::string GetETLDPlusOne(const std::string& hostname); // Records an interstitial histogram entry for the given match type. void RecordUMAFromMatchType(LookalikeUrlMatchType match_type); LookalikeTargetAllowlistChecker; // Returns true if a domain is visually similar to the hostname of |url|. The // matching domain can be a top domain or an engaged site. Similarity // check is made using both visual skeleton and edit distance comparison. If // this returns true, match details will be written into |matched_domain|. // Pointer arguments can't be nullptr. bool GetMatchingDomain( const DomainInfo& navigated_domain, const std::vector<DomainInfo>& engaged_sites, const LookalikeTargetAllowlistChecker& in_target_allowlist, const reputation::SafetyTipsConfig* config_proto, std::string* matched_domain, LookalikeUrlMatchType* match_type); // Checks to see if a URL is a target embedding lookalike. This function sets // |safe_hostname| to the url of the embedded target domain. See the unit tests // for what qualifies as target embedding. TargetEmbeddingType GetTargetEmbeddingType( const std::string& hostname, const std::vector<DomainInfo>& engaged_sites, const LookalikeTargetAllowlistChecker& in_target_allowlist, const reputation::SafetyTipsConfig* config_proto, std::string* safe_hostname); // Same as GetTargetEmbeddingType, but explicitly state whether or not a safety // tip is permitted via |safety_tips_allowed|. Safety tips are presently only // used for tail embedding (e.g. "evil-google.com"). This function may return // kSafetyTip preferentially to kInterstitial -- call with !safety_tips_allowed // if you're interested in determining if there's *also* an interstitial. TargetEmbeddingType SearchForEmbeddings( const std::string& hostname, const std::vector<DomainInfo>& engaged_sites, const LookalikeTargetAllowlistChecker& in_target_allowlist, const reputation::SafetyTipsConfig* config_proto, bool safety_tips_allowed, std::string* safe_hostname); // Returns true if a navigation to an IDN should be blocked. bool ShouldBlockBySpoofCheckResult(const DomainInfo& navigated_domain); // Checks whether the given url is allowlisted by enterprise policy, and // thus no warnings should be shown on that host. bool IsAllowedByEnterprisePolicy(const PrefService* pref_service, const GURL& url); // Add the given hosts to the allowlist policy setting. void SetEnterpriseAllowlistForTesting(PrefService* pref_service, const std::vector<std::string>& hosts); // Returns true if |str1| and |str2| are identical except that two adjacent // characters are swapped. E.g. example.com vs exapmle.com. bool HasOneCharacterSwap(const std::u16string& str1, const std::u16string& str2); // Sets information about top bucket domains for testing. void SetTopBucketDomainsParamsForTesting(const TopBucketDomainsParams& params); // Resets information about top bucket domains for testing. void ResetTopBucketDomainsParamsForTesting(); // Returns true if the launch configuration provided by the component updater // enables `heuristic` for the given `etld_plus_one`. bool IsHeuristicEnabledForHostname( const reputation::SafetyTipsConfig* config_proto, reputation::HeuristicLaunchConfig::Heuristic heuristic, const std::string& lookalike_etld_plus_one, version_info::Channel channel); // Set brand names and keywords for testing Combo Squatting heuristic. void SetComboSquattingParamsForTesting(const ComboSquattingParams& params); // Reset brand names and keywords after testing Combo Squatting heuristic. void ResetComboSquattingParamsForTesting(); // Check if |navigated_domain| is Combo Squatting lookalike. // It gets |engaged_sites| to use its brand names in addition to hard coded // brand names. The function sets |matched_domain| to suggest to the user // instead of the Combo Squatting domain. ComboSquattingType GetComboSquattingType( const DomainInfo& navigated_domain, const std::vector<DomainInfo>& engaged_sites, std::string* matched_domain); // Returns true if `etld_plus_one` has a TLD that's considered safe for // lookalike checks, such as government sites. bool IsSafeTLD(const std::string& hostname); // The action to take for a given lookalike match. enum class LookalikeActionType { … }; // Returns the action to take for the given `etld_plus_one` and lookalike // `match_type`. Uses `config` to check whether the heuristic UI is enabled // via gradual rollout. LookalikeActionType GetActionForMatchType( const reputation::SafetyTipsConfig* config, version_info::Channel channel, const std::string& etld_plus_one, LookalikeUrlMatchType match_type); // Returns the suggested URL for the given parameters. Returns an https URL for // top domain matches because it's more likely for top sites to support https. GURL GetSuggestedURL(LookalikeUrlMatchType match_type, const GURL& navigated_url, const std::string& matched_hostname); } // namespace lookalikes #endif // COMPONENTS_LOOKALIKES_CORE_LOOKALIKE_URL_UTIL_H_