chromium/chrome/browser/lookalikes/lookalike_url_navigation_throttle_browsertest.cc

// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/containers/contains.h"
#include "base/functional/bind.h"
#include "base/path_service.h"
#include "base/strings/pattern.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/test/scoped_feature_list.h"
#include "base/test/simple_test_clock.h"
#include "build/build_config.h"
#include "chrome/browser/history/history_service_factory.h"
#include "chrome/browser/history/history_test_utils.h"
#include "chrome/browser/lookalikes/lookalike_test_helper.h"
#include "chrome/browser/lookalikes/lookalike_url_blocking_page.h"
#include "chrome/browser/lookalikes/lookalike_url_navigation_throttle.h"
#include "chrome/browser/lookalikes/lookalike_url_service.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/ui/browser.h"
#include "chrome/browser/ui/browser_commands.h"
#include "chrome/common/chrome_features.h"
#include "chrome/common/webui_url_constants.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "chrome/test/base/ui_test_utils.h"
#include "components/lookalikes/core/lookalike_url_util.h"
#include "components/lookalikes/core/safety_tip_test_utils.h"
#include "components/lookalikes/core/safety_tips_config.h"
#include "components/network_session_configurator/common/network_switches.h"
#include "components/omnibox/browser/location_bar_model.h"
#include "components/security_interstitials/content/security_interstitial_page.h"
#include "components/security_interstitials/content/security_interstitial_tab_helper.h"
#include "components/security_interstitials/core/metrics_helper.h"
#include "components/site_engagement/content/site_engagement_score.h"
#include "components/site_engagement/content/site_engagement_service.h"
#include "components/ukm/test_ukm_recorder.h"
#include "components/url_formatter/spoof_checks/top_domains/test_top_bucket_domains.h"
#include "content/public/browser/web_contents.h"
#include "content/public/common/content_paths.h"
#include "content/public/test/browser_test.h"
#include "content/public/test/content_mock_cert_verifier.h"
#include "content/public/test/prerender_test_util.h"
#include "content/public/test/signed_exchange_browser_test_helper.h"
#include "content/public/test/test_navigation_observer.h"
#include "content/public/test/url_loader_interceptor.h"
#include "net/cert/mock_cert_verifier.h"
#include "net/cert/test_root_certs.h"
#include "net/dns/mock_host_resolver.h"
#include "net/test/cert_test_util.h"
#include "net/test/embedded_test_server/http_request.h"
#include "net/test/embedded_test_server/http_response.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_source.h"
#include "services/network/public/cpp/network_switches.h"
#include "services/network/public/cpp/simple_url_loader.h"
#include "ui/base/window_open_disposition.h"

namespace {

LookalikeUrlMatchType;
NavigationSuggestionEvent;
MetricsHelper;
SecurityInterstitialCommand;
UkmEntry;
GetDomainInfo;
kInterstitialHistogramName;
LookalikeUrlBlockingPageUserAction;

// An engagement score above MEDIUM.
const int kHighEngagement =;

// An engagement score below MEDIUM.
const int kLowEngagement =;

// The UMA metric names registered by metrics_helper.
const char kInterstitialDecisionMetric[] =;
const char kInterstitialInteractionMetric[] =;

const char kConsoleMessage[] =;

enum class PrewarmLookalike {};

static std::unique_ptr<net::test_server::HttpResponse>
NetworkErrorResponseHandler(const net::test_server::HttpRequest& request) {}

security_interstitials::SecurityInterstitialPage* GetCurrentInterstitial(
    content::WebContents* web_contents) {}

security_interstitials::SecurityInterstitialPage::TypeID GetInterstitialType(
    content::WebContents* web_contents) {}

// Sets the absolute Site Engagement |score| for the testing origin.
void SetEngagementScore(Browser* browser, const GURL& url, double score) {}

bool IsUrlShowing(Browser* browser) {}

// Navigate to |url| and wait for the load to complete before returning.
// Simulates a link click navigation. We don't use
// ui_test_utils::NavigateToURL(const GURL&) because it simulates the user
// typing the URL, causing the site to have a site engagement score of at
// least LOW.
void NavigateToURLSync(Browser* browser, const GURL& url) {}

// Load given URL and verify that it loaded an interstitial and hid the URL.
void LoadAndCheckInterstitialAt(Browser* browser, const GURL& url) {}

void SendInterstitialCommand(content::WebContents* web_contents,
                             SecurityInterstitialCommand command) {}

void SendInterstitialCommandSync(Browser* browser,
                                 SecurityInterstitialCommand command,
                                 bool punycode_interstitial = false) {}

// Verify that no interstitial is shown, regardless of feature state.
void TestInterstitialNotShown(Browser* browser, const GURL& navigated_url) {}

// Add an allowlist with entries that aren't allowlisted for all domains.
void ConfigureAllowlistWithScopes() {}

}  // namespace

class LookalikeUrlNavigationThrottleBrowserTest
    : public InProcessBrowserTest,
      public testing::WithParamInterface<PrewarmLookalike> {};

INSTANTIATE_TEST_SUITE_P();

// Navigating to a non-IDN shouldn't show an interstitial or record metrics.
// TODO(https://crbug.com1207573): re-enable when flakiness is fixed.
#if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_WIN) || BUILDFLAG(IS_MAC)
#define MAYBE_NonIdn_NoMatch
#else
#define MAYBE_NonIdn_NoMatch
#endif
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       MAYBE_NonIdn_NoMatch) {}

// Navigating to a domain whose visual representation does not look like a
// top domain shouldn't show an interstitial or record metrics.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       NonTopDomainIdn_NoInterstitial) {}

// If the user has engaged with the domain before, metrics shouldn't be recorded
// and the interstitial shouldn't be shown, even if the domain is visually
// similar to a top domain.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_TopDomain_EngagedSite_NoMatch) {}

// Navigate to a domain whose visual representation looks like a top domain.
// This should record metrics. It should also show a lookalike warning
// interstitial if configured via a feature param.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_TopDomain_Match) {}

// Navigate to a domain that would trigger the warning, but doesn't because it
// fails-safe when the allowlist isn't available.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       NoMatchOnAllowlistMissing) {}

// Embedding a top domain should show an interstitial when enabled. If disabled
// this would trigger safety tips when target embedding feature parameter is
// enabled for safety tips.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_TopDomain_Match) {}

// Embedding a top domain would normally show an interstitial, but shouldn't
// here because it's narrowly allowlisted.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_ScopedAllowlistMatch) {}

// Same as TargetEmbedding_ScopedAllowlistMatch, but the attacker-controlled
// domain is spoofing an unauthorized victim. This should show a warning.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_ScopedAllowlistMatchWrongDomain) {}

// Same as TargetEmbedding_TopDomain_Match, but has a redirect where the first
// and last URLs are both target embedding matches. Should only record
// metrics for the first URL. Regression test for crbug.com/1136296.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_TopDomain_Redirect_Match) {}

// Target embedding should not trigger on allowlisted embedder domains.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_EmbedderAllowlist) {}

// Target embedding should not trigger on allowlisted target domains.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_TargetAllowlist) {}

// Target embedding shouldn't trigger on component-delivered common words.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_ComponentCommonWords) {}

// Navigate to a domain target embedding a domain with no separators, but that
// matches the target allowlist.  Regression test for crbug.com/1127450.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TargetEmbedding_TargetAllowlistWithNoSeparators) {}

// Similar to Idn_TopDomain_Match but the domain is not in top 500. Should not
// show an interstitial, but should still record metrics.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_TopDomain_Match_Not500) {}

// Same as Idn_TopDomain_Match, but this time the domain contains characters
// from different scripts, failing the checks in IDN spoof checker before
// reaching the top domain check. In this case, the end result is the same, but
// the reason we fall back to punycode is different.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_TopDomainMixedScript_Match) {}

// The navigated domain will fall back to punycode because it fails standard
// ICU spoof checks in the IDN spoof checker. However, no interstitial will be
// shown as the domain name is single character.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Punycode_ShortHostname_NoInterstitial) {}

// Same as Punycode_ShortHostname_NoInterstitial but also has target embedding.
// Should show an interstitial this time.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Punycode_ShortHostname_TargetEmbedding_Interstitial) {}

// The navigated domain will fall back to punycode because it fails spoof checks
// in IDN spoof checker. The heuristic that changes this domain to punycode
// (latin middle dot) is configured to show a punycode interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Punycode_NoSuggestedUrl_Interstitial) {}

// The navigated domain will fall back to punycode because it fails spoof checks
// in IDN spoof checker. The heuristic that changes this domain to punycode
// (latin middle dot) is configured to show a punycode interstitial, but the
// domain is allowlisted.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Punycode_NoSuggestedUrl_Allowlisted) {}

// The navigated domain will fall back to punycode because it fails spoof checks
// in IDN spoof checker. The heuristic that changes this domain to punycode
// (latin middle dot) is configured to show a punycode interstitial. The domain
// is also caught by the target embedding heuristic. Target embedding should
// take priority.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       PunycodeAndTargetEmbedding_NoSuggestedUrl_Interstitial) {}

// The navigated domain itself is a top domain or a subdomain of a top domain.
// Should not record metrics. The top domain list doesn't contain any IDN, so
// this only tests the case where the subdomains are IDNs.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TopDomainIdnSubdomain_NoMatch) {}

// Schemes other than HTTP and HTTPS should be ignored.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       TopDomainChromeUrl_NoMatch) {}

// Navigate to a domain within an edit distance of 1 to an engaged domain.
// This should record metrics, but should not show a lookalike warning
// interstitial yet.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       EditDistance_EngagedDomain_Match) {}

// Navigate to a domain within a character swap of 1 to a top domain.
// This should not record interstitial metrics as it'll display a safety tip.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       CharacterSwap_TopDomain_Match_ShouldNotRecordMetrics) {}

// Tests that a hostname on a safe TLD can spoof another hostname without a
// lookalike warning.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_SafeTLD_CanSpoof) {}

// Navigate to a domain within an edit distance of 1 to a top domain.
// This should record metrics, but should not show a lookalike warning
// interstitial yet.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       EditDistance_TopDomain_Match) {}

// Navigate to a domain within an edit distance of 1 to a top domain, but that
// matches the allowlist. This should neither record metrics nor show an
// interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       EditDistance_TopDomain_Target_Allowlist) {}

// Navigate to a domain within an edit distance of 1 to an engaged domain, but
// that matches the allowlist. This should neither record metrics nor show an
// interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       EditDistance_EngagedDomain_Target_Allowlist) {}

// Tests negative examples for the edit distance.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       EditDistance_TopDomain_NoMatch) {}

// Tests negative examples for the edit distance with engaged sites.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       EditDistance_SiteEngagement_NoMatch) {}

// Test that the heuristics are not triggered with net errors.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       NetError_SiteEngagement_Interstitial) {}

// Same as NetError_SiteEngagement_Interstitial, but triggered by a top domain.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       NetError_TopDomain_Interstitial) {}

// TODO(crbug.com/40146482): Enable test when MacOS flake is fixed.
// TODO(crbug.com/40706320): Enable test when Win/Linux flake is fixed.
#if BUILDFLAG(IS_MAC) || BUILDFLAG(IS_WIN) || BUILDFLAG(IS_LINUX)
#define MAYBE_Idn_SiteEngagement_Match
#else
#define MAYBE_Idn_SiteEngagement_Match
#endif

// Navigate to a domain whose visual representation looks like a domain with a
// site engagement score above a certain threshold. This should record metrics.
// It should also show lookalike warning interstitial if configured via
// a feature param.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       MAYBE_Idn_SiteEngagement_Match) {}

// The site redirects to the matched site, this should not show
// an interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_SiteEngagement_SafeRedirect) {}

// The site redirects to the matched site, but the redirect chain has more than
// two redirects.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_SiteEngagement_MidRedirectSpoofsIgnored) {}

// The site is allowed by the component updater.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       AllowedByComponentUpdater) {}

// The site is allowed by enterprise policy.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       AllowedByPolicy) {}

// Tests negative examples for all heuristics.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       NonUniqueDomains_NoMatch) {}

// Navigate to a domain whose visual representation looks both like a domain
// with a site engagement score and also a top domain. This should record
// metrics for a site engagement match because of the order of checks. It should
// also show lookalike warning interstitial if configured via a feature param.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_SiteEngagementAndTopDomain_Match) {}

// Similar to Idn_SiteEngagement_Match, but tests a single domain. Also checks
// that the list of engaged sites in incognito and the main profile don't affect
// each other.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_SiteEngagement_Match_Incognito) {}

// Test that navigations to a site with a high engagement score shouldn't
// record metrics or show interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_SiteEngagement_Match_IgnoreHighlyEngagedSite) {}

// Test that an engaged site with a scheme other than HTTP or HTTPS should be
// ignored.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Idn_SiteEngagement_IgnoreChromeUrl) {}

// IDNs with a single label should be properly handled. There are two cases
// where this might occur:
// 1. The navigated URL is an IDN with a single label.
// 2. One of the engaged sites is an IDN with a single label.
// Neither of these should cause a crash.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       IdnWithSingleLabelShouldNotCauseACrash) {}

// Ensure that dismissing the interstitial works, and the result is remembered
// in the current tab.  This should record metrics on the first visit, but not
// the second.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Interstitial_Dismiss) {}

// Navigate to lookalike domains that redirect to benign domains and ensure that
// we display an interstitial along the way.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       Interstitial_CapturesRedirects) {}

// Verify that a warning, when ignored, applies to the entire eTLD+1, not just
// the navigated origin.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       AllowlistAppliesToETLDPlusOne) {}

// Verify that the user action in UKM is recorded even when we navigate away
// from the interstitial without interacting with it.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       UkmRecordedAfterNavigateAway) {}

// Verify that the user action in UKM is recorded properly when the user accepts
// the navigation suggestion.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       UkmRecordedAfterSuggestionAccepted) {}

// Verify that the user action in UKM is recorded properly when the user ignores
// the navigation suggestion.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       UkmRecordedAfterSuggestionIgnored) {}

// Verify that the URL shows normally on pages after a lookalike interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       UrlShownAfterInterstitial) {}

// Verify that bypassing warnings in the main profile does not affect incognito.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       MainProfileDoesNotAffectIncognito) {}

// Verify that bypassing warnings in incognito does not affect the main profile.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       IncognitoDoesNotAffectMainProfile) {}

// Verify reloading the page does not result in dismissing an interstitial.
// Regression test for crbug/941886.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       RefreshDoesntDismiss) {}

// Navigate to a URL that triggers combo squatting heuristic via the
// hard coded brand name list. This should record metrics but shouldn't show
// an interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       ComboSquatting_ShouldRecordMetricsWithoutUI) {}

// Navigate to a URL that triggers combo squatting heuristic via a
// brand name from engaged sites. This should record metrics but shouldn't show
// an interstitial.
IN_PROC_BROWSER_TEST_P(
    LookalikeUrlNavigationThrottleBrowserTest,
    ComboSquatting_EngagedSites_ShouldRecordMetricsWithoutUI) {}

// Combo Squatting shouldn't trigger on allowlisted sites and no
// UKM should be recorded.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
                       ComboSquatting_ShouldNotTriggeredForAllowlist) {}

scoped_refptr<net::X509Certificate> LoadCertificate() {}

// Tests for Signed Exchanges.
class LookalikeUrlNavigationThrottleSignedExchangeBrowserTest
    : public LookalikeUrlNavigationThrottleBrowserTest {};

INSTANTIATE_TEST_SUITE_P();

// Navigates to a 127.0.0.1 URL that serves a signed exchange for
// google-com.example.org. This navigation should be blocked by the target
// embedding interstitial. We only test target embedding here because we can
// test it with a subdomain of example.org (which is the domain used by SGX test
// code). Testing an ETLD+1 such as googlé.com would require generating a custom
// cert.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleSignedExchangeBrowserTest,
                       InnerUrlIsLookalike_ShouldBlock) {}

// Navigates to a lookalike URL (google-com.test.com) that serves a signed
// exchange for test.example.org. This should not be blocked.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleSignedExchangeBrowserTest,
                       OuterUrlIsLookalike_ShouldNotBlock) {}

// Navigates to a lookalike URL (google-com.test.com) that serves a signed
// exchange for test.example.org. This should not be blocked.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleSignedExchangeBrowserTest,
                       OuterUrlIsLookalikeButNotSignedExchange_ShouldNotBlock) {}

// Navigates to a lookalike URL (google-com.test.com) that serves a signed
// exchange for google-com.example.org.
// Both the outer URL (i.e. cache) and the inner URL are lookalikes so this
// should be blocked.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleSignedExchangeBrowserTest,
                       InnerAndOuterUrlsAreLookalikes_ShouldBlock) {}

// TODO(meacer): Add a test for a failed SGX response. It should be treated
// as a normal redirect. In fact, InnerAndOuterUrlsLookalikes_ShouldBlock
// is actually testing this right now, fix it.

class LookalikeUrlNavigationThrottlePrerenderBrowserTest
    : public LookalikeUrlNavigationThrottleBrowserTest {};

INSTANTIATE_TEST_SUITE_P();

IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottlePrerenderBrowserTest,
                       ShowInterstitialAfterActivation) {}