// Copyright 2015 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/url_formatter/spoof_checks/idn_spoof_checker.h" #include <stddef.h> #include <string.h> #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" #include "base/test/scoped_feature_list.h" #include "build/build_config.h" #include "components/url_formatter/spoof_checks/skeleton_generator.h" #include "components/url_formatter/url_formatter.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/icu/source/common/unicode/uvernum.h" #include "url/gurl.h" #include "url/url_features.h" namespace url_formatter { namespace { // Expected result of the IDN conversion. enum class Result { … }; // Alias the values to make the tests less verbose. const Result kSafe = …; const Result kUnsafe = …; const Result kInvalid = …; struct IDNTestCase { … }; // These cases MUST be generated with the script // tools/security/idn_test_case_generator.py. // See documentation there: you can either run it from the command line or call // the make_case function directly from the Python shell (which may be easier // for entering Unicode text). // // Do NOT generate these cases by hand. // // Q: Why not just do this conversion right here in the test, rather than having // a Python script to generate it? // A: Because then we would have to rely on complex logic (IDNA encoding) in the // test itself; the same code we are trying to test. By using Python's IDN // encoder to generate the test data, we independently verify that our // algorithm is correct. const IDNTestCase kIdnCases[] = …; namespace test { #include "components/url_formatter/spoof_checks/top_domains/idn_test_domains-trie-inc.cc" } bool IsPunycode(const std::u16string& s) { … } } // namespace // IDNA mode to use in tests. enum class IDNAMode { … }; class IDNSpoofCheckerTest : public ::testing::Test, public ::testing::WithParamInterface<IDNAMode> { … }; INSTANTIATE_TEST_SUITE_P(…); // Test that a domain entered as punycode is decoded to unicode if safe, // otherwise is left in punycode. // // TODO(crbug.com/40664864): This should also check if a domain entered as // unicode is properly decoded or not-decoded. This is important in cases where // certain unicode characters are canonicalized to other characters. // E.g. Mathematical Monospace Small A (U+1D68A) is canonicalized to "a" when // used in a domain name. TEST_P(IDNSpoofCheckerTest, IDNToUnicode) { … } // Same as IDNToUnicode but only tests hostnames with deviation characters. TEST_P(IDNSpoofCheckerTest, IDNToUnicodeDeviationCharacters) { … } TEST_P(IDNSpoofCheckerTest, GetSimilarTopDomain) { … } TEST_P(IDNSpoofCheckerTest, LookupSkeletonInTopDomains) { … } // Same test as LookupSkeletonInTopDomains but using the real top domain list. TEST(IDNSpoofCheckerNoFixtureTest, LookupSkeletonInTopDomains) { … } // Check the unsafe version of IDNToUnicode. Even though the input domain // matches a top domain, it should still be converted to unicode. TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { … } // Checks that skeletons are properly generated for domains with blocked // characters after using UnsafeIDNToUnicodeWithDetails. TEST(IDNSpoofCheckerNoFixtureTest, Skeletons) { … } TEST(IDNSpoofCheckerNoFixtureTest, MultipleSkeletons) { … } TEST(IDNSpoofCheckerNoFixtureTest, AlternativeSkeletons) { … } TEST(IDNSpoofCheckerNoFixtureTest, MaybeRemoveDiacritics) { … } TEST(IDNSpoofCheckerNoFixtureTest, GetDeviationCharacter) { … } } // namespace url_formatter