idn_spoof_checker_unittest.cc | Explore in Territory

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_formatter/spoof_checks/idn_spoof_checker.h"

#include <stddef.h>
#include <string.h>

#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/scoped_feature_list.h"
#include "build/build_config.h"
#include "components/url_formatter/spoof_checks/skeleton_generator.h"
#include "components/url_formatter/url_formatter.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/icu/source/common/unicode/uvernum.h"
#include "url/gurl.h"
#include "url/url_features.h"

namespace url_formatter {

namespace {

// Expected result of the IDN conversion.
enum class Result { … };

// Alias the values to make the tests less verbose.
const Result kSafe = …;
const Result kUnsafe = …;
const Result kInvalid = …;

struct IDNTestCase { … };

// These cases MUST be generated with the script
// tools/security/idn_test_case_generator.py.
// See documentation there: you can either run it from the command line or call
// the make_case function directly from the Python shell (which may be easier
// for entering Unicode text).
//
// Do NOT generate these cases by hand.
//
// Q: Why not just do this conversion right here in the test, rather than having
//    a Python script to generate it?
// A: Because then we would have to rely on complex logic (IDNA encoding) in the
//    test itself; the same code we are trying to test. By using Python's IDN
//    encoder to generate the test data, we independently verify that our
//    algorithm is correct.
const IDNTestCase kIdnCases[] = …;

namespace test {
#include "components/url_formatter/spoof_checks/top_domains/idn_test_domains-trie-inc.cc"
}

bool IsPunycode(const std::u16string& s) { … }

}  // namespace

// IDNA mode to use in tests.
enum class IDNAMode { … };

class IDNSpoofCheckerTest : public ::testing::Test,
                            public ::testing::WithParamInterface<IDNAMode> { … };

INSTANTIATE_TEST_SUITE_P(…);

// Test that a domain entered as punycode is decoded to unicode if safe,
// otherwise is left in punycode.
//
// TODO(crbug.com/40664864): This should also check if a domain entered as
// unicode is properly decoded or not-decoded. This is important in cases where
// certain unicode characters are canonicalized to other characters.
// E.g. Mathematical Monospace Small A (U+1D68A) is canonicalized to "a" when
// used in a domain name.
TEST_P(IDNSpoofCheckerTest, IDNToUnicode) { … }

// Same as IDNToUnicode but only tests hostnames with deviation characters.
TEST_P(IDNSpoofCheckerTest, IDNToUnicodeDeviationCharacters) { … }

TEST_P(IDNSpoofCheckerTest, GetSimilarTopDomain) { … }

TEST_P(IDNSpoofCheckerTest, LookupSkeletonInTopDomains) { … }

// Same test as LookupSkeletonInTopDomains but using the real top domain list.
TEST(IDNSpoofCheckerNoFixtureTest, LookupSkeletonInTopDomains) { … }

// Check the unsafe version of IDNToUnicode. Even though the input domain
// matches a top domain, it should still be converted to unicode.
TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { … }

// Checks that skeletons are properly generated for domains with blocked
// characters after using UnsafeIDNToUnicodeWithDetails.
TEST(IDNSpoofCheckerNoFixtureTest, Skeletons) { … }

TEST(IDNSpoofCheckerNoFixtureTest, MultipleSkeletons) { … }

TEST(IDNSpoofCheckerNoFixtureTest, AlternativeSkeletons) { … }

TEST(IDNSpoofCheckerNoFixtureTest, MaybeRemoveDiacritics) { … }

TEST(IDNSpoofCheckerNoFixtureTest, GetDeviationCharacter) { … }

}  // namespace url_formatter
chromium/components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc