// Copyright 2013 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // ICU-based IDNA converter. #include <stdint.h> #include <stdlib.h> #include <string.h> #include <ostream> #include "base/check_op.h" #include "base/numerics/safe_conversions.h" #include "third_party/icu/source/common/unicode/uidna.h" #include "third_party/icu/source/common/unicode/utypes.h" #include "url/url_canon_icu.h" #include "url/url_canon_internal.h" // for _itoa_s #include "url/url_features.h" namespace url { namespace { // Use UIDNA, a C pointer to a UTS46/IDNA 2008 handling object opened with // uidna_openUTS46(). // // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned // code points allowed) to IDNA 2008 with the backward compatibility in mind. // What it does: // // 1. Use the up-to-date Unicode data. // 2. Define a case folding/mapping with the up-to-date Unicode data as // in IDNA 2003. // 3. If `use_idna_non_transitional` is true, use non-transitional mechanism for // 4 deviation characters (sharp-s, final sigma, ZWJ and ZWNJ) per // url.spec.whatwg.org. // 4. Continue to allow symbols and punctuations. // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. // 6. Do not apply STD3 rules // 7. Do not allow unassigned code points. // // It also closely matches what IE 10 does except for the BiDi check ( // http://goo.gl/3XBhqw ). // See http://http://unicode.org/reports/tr46/ and references therein // for more details. UIDNA* CreateIDNA(bool use_idna_non_transitional) { … } UIDNA* GetUIDNA() { … } } // namespace // Converts the Unicode input representing a hostname to ASCII using IDN rules. // The output must be ASCII, but is represented as wide characters. // // On success, the output will be filled with the ASCII host name and it will // return true. Unlike most other canonicalization functions, this assumes that // the output is empty. The beginning of the host will be at offset 0, and // the length of the output will be set to the length of the new host name. // // On error, this will return false. The output in this case is undefined. // TODO(jungshik): use UTF-8/ASCII version of nameToASCII. // Change the function signature and callers accordingly to avoid unnecessary // conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII // version with StringByteSink. That way, we can avoid C wrappers and additional // string conversion. bool IDNToASCII(std::u16string_view src, CanonOutputW* output) { … } } // namespace url