chromium/url/url_idna_icu.cc

// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// ICU-based IDNA converter.

#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include <ostream>

#include "base/check_op.h"
#include "base/numerics/safe_conversions.h"
#include "third_party/icu/source/common/unicode/uidna.h"
#include "third_party/icu/source/common/unicode/utypes.h"
#include "url/url_canon_icu.h"
#include "url/url_canon_internal.h"  // for _itoa_s
#include "url/url_features.h"

namespace url {

namespace {

// Use UIDNA, a C pointer to a UTS46/IDNA 2008 handling object opened with
// uidna_openUTS46().
//
// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
// code points allowed) to IDNA 2008 with the backward compatibility in mind.
// What it does:
//
// 1. Use the up-to-date Unicode data.
// 2. Define a case folding/mapping with the up-to-date Unicode data as
//    in IDNA 2003.
// 3. If `use_idna_non_transitional` is true, use non-transitional mechanism for
//    4 deviation characters (sharp-s, final sigma, ZWJ and ZWNJ) per
//    url.spec.whatwg.org.
// 4. Continue to allow symbols and punctuations.
// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
// 6. Do not apply STD3 rules
// 7. Do not allow unassigned code points.
//
// It also closely matches what IE 10 does except for the BiDi check (
// http://goo.gl/3XBhqw ).
// See http://http://unicode.org/reports/tr46/ and references therein
// for more details.
UIDNA* CreateIDNA(bool use_idna_non_transitional) {}

UIDNA* GetUIDNA() {}

}  // namespace

// Converts the Unicode input representing a hostname to ASCII using IDN rules.
// The output must be ASCII, but is represented as wide characters.
//
// On success, the output will be filled with the ASCII host name and it will
// return true. Unlike most other canonicalization functions, this assumes that
// the output is empty. The beginning of the host will be at offset 0, and
// the length of the output will be set to the length of the new host name.
//
// On error, this will return false. The output in this case is undefined.
// TODO(jungshik): use UTF-8/ASCII version of nameToASCII.
// Change the function signature and callers accordingly to avoid unnecessary
// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
// version with StringByteSink. That way, we can avoid C wrappers and additional
// string conversion.
bool IDNToASCII(std::u16string_view src, CanonOutputW* output) {}

}  // namespace url