// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2002-2011, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: punycode.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2002jan31 * created by: Markus W. Scherer */ /* This ICU code derived from: */ /* punycode.c 0.4.0 (2001-Nov-17-Sat) http://www.cs.berkeley.edu/~amc/idn/ Adam M. Costello http://www.nicemice.net/amc/ Disclaimer and license Regarding this entire document or any portion of it (including the pseudocode and C code), the author makes no guarantees and is not responsible for any damage resulting from its use. The author grants irrevocable permission to anyone to use, modify, and distribute it in any way that does not diminish the rights of anyone else to use, modify, and distribute it, provided that redistributed derivative works do not contain misleading author or version information. Derivative works need not be licensed under similar terms. */ /* * ICU modifications: * - ICU data types and coding conventions * - ICU string buffer handling with implicit source lengths * and destination preflighting * - UTF-16 handling */ #include "unicode/utypes.h" #if !UCONFIG_NO_IDNA #include "unicode/ustring.h" #include "unicode/utf.h" #include "unicode/utf16.h" #include "ustr_imp.h" #include "cstring.h" #include "cmemory.h" #include "punycode.h" #include "uassert.h" /* Punycode ----------------------------------------------------------------- */ /* Punycode parameters for Bootstring */ #define BASE … #define TMIN … #define TMAX … #define SKEW … #define DAMP … #define INITIAL_BIAS … #define INITIAL_N … /* "Basic" Unicode/ASCII code points */ #define _HYPHEN … #define DELIMITER … #define _ZERO_ … #define _NINE … #define _SMALL_A … #define _SMALL_Z … #define _CAPITAL_A … #define _CAPITAL_Z … #define IS_BASIC(c) … #define IS_BASIC_UPPERCASE(c) … /** * digitToBasic() returns the basic code point whose value * (when used for representing integers) is d, which must be in the * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is * nonzero, in which case the uppercase form is used. */ static inline char digitToBasic(int32_t digit, UBool uppercase) { … } /** * @return the numeric value of a basic code point (for use in representing integers) * in the range 0 to BASE-1, or a negative value if cp is invalid. */ static int32_t decodeDigit(int32_t cp) { … } static inline char asciiCaseMap(char b, UBool uppercase) { … } /* Punycode-specific Bootstring code ---------------------------------------- */ /* * The following code omits the {parts} of the pseudo-algorithm in the spec * that are not used with the Punycode parameter set. */ /* Bias adaptation function. */ static int32_t adaptBias(int32_t delta, int32_t length, UBool firstTime) { … } namespace { // ICU-13727: Limit input length for n^2 algorithm // where well-formed strings are at most 59 characters long. constexpr int32_t ENCODE_MAX_CODE_UNITS= …; constexpr int32_t DECODE_MAX_CHARS= …; } // namespace // encode U_CAPI int32_t u_strToPunycode(const char16_t *src, int32_t srcLength, char16_t *dest, int32_t destCapacity, const UBool *caseFlags, UErrorCode *pErrorCode) { … } // decode U_CAPI int32_t u_strFromPunycode(const char16_t *src, int32_t srcLength, char16_t *dest, int32_t destCapacity, UBool *caseFlags, UErrorCode *pErrorCode) { … } /* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */ #endif /* #if !UCONFIG_NO_IDNA */