// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (C) 1997-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * * File ULOC.CPP * * Modification History: * * Date Name Description * 04/01/97 aliu Creation. * 08/21/98 stephen JDK 1.2 sync * 12/08/98 rtg New Locale implementation and C API * 03/15/99 damiba overhaul. * 04/06/99 stephen changed setDefault() to realloc and copy * 06/14/99 stephen Changed calls to ures_open for new params * 07/21/99 stephen Modified setDefault() to propagate to C++ * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, * brought canonicalization code into line with spec *****************************************************************************/ /* POSIX's locale format, from putil.c: [no spaces] ll [ _CC ] [ . MM ] [ @ VV] l = lang, C = ctry, M = charmap, V = variant */ #include "unicode/bytestream.h" #include "unicode/errorcode.h" #include "unicode/stringpiece.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/uloc.h" #include "bytesinkutil.h" #include "putilimp.h" #include "ustr_imp.h" #include "ulocimp.h" #include "umutex.h" #include "cstring.h" #include "cmemory.h" #include "locmap.h" #include "uarrsort.h" #include "uenumimp.h" #include "uassert.h" #include "charstr.h" U_NAMESPACE_USE /* ### Declarations **************************************************/ /* Locale stuff from locid.cpp */ U_CFUNC void locale_set_default(const char *id); U_CFUNC const char *locale_get_default(); /* ### Data tables **************************************************/ /** * Table of language codes, both 2- and 3-letter, with preference * given to 2-letter codes where possible. Includes 3-letter codes * that lack a 2-letter equivalent. * * This list must be in sorted order. This list is returned directly * to the user by some API. * * This list must be kept in sync with LANGUAGES_3, with corresponding * entries matched. * * This table should be terminated with a nullptr entry, followed by a * second list, and another nullptr entry. The first list is visible to * user code when this array is returned by API. The second list * contains codes we support, but do not expose through user API. * * Notes * * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to * include the revisions up to 2001/7/27 *CWB* * * The 3 character codes are the terminology codes like RFC 3066. This * is compatible with prior ICU codes * * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the * table but now at the end of the table because 3 character codes are * duplicates. This avoids bad searches going from 3 to 2 character * codes. * * The range qaa-qtz is reserved for local use */ /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ /* ISO639 table version is 20150505 */ /* Subsequent hand addition of selected languages */ static const char * const LANGUAGES[] = …; static const char* const DEPRECATED_LANGUAGES[]= …; static const char* const REPLACEMENT_LANGUAGES[]= …; /** * Table of 3-letter language codes. * * This is a lookup table used to convert 3-letter language codes to * their 2-letter equivalent, where possible. It must be kept in sync * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the * same language as LANGUAGES_3[i]. The commented-out lines are * copied from LANGUAGES to make eyeballing this baby easier. * * Where a 3-letter language code has no 2-letter equivalent, the * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. * * This table should be terminated with a nullptr entry, followed by a * second list, and another nullptr entry. The two lists correspond to * the two lists in LANGUAGES. */ /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ /* ISO639 table version is 20150505 */ /* Subsequent hand addition of selected languages */ static const char * const LANGUAGES_3[] = …; /** * Table of 2-letter country codes. * * This list must be in sorted order. This list is returned directly * to the user by some API. * * This list must be kept in sync with COUNTRIES_3, with corresponding * entries matched. * * This table should be terminated with a nullptr entry, followed by a * second list, and another nullptr entry. The first list is visible to * user code when this array is returned by API. The second list * contains codes we support, but do not expose through user API. * * Notes: * * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added * new codes keeping the old ones for compatibility updated to include * 1999/12/03 revisions *CWB* * * RO(ROM) is now RO(ROU) according to * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html */ static const char * const COUNTRIES[] = …; static const char* const DEPRECATED_COUNTRIES[] = …; static const char* const REPLACEMENT_COUNTRIES[] = …; /** * Table of 3-letter country codes. * * This is a lookup table used to convert 3-letter country codes to * their 2-letter equivalent. It must be kept in sync with COUNTRIES. * For all valid i, COUNTRIES[i] must refer to the same country as * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES * to make eyeballing this baby easier. * * This table should be terminated with a nullptr entry, followed by a * second list, and another nullptr entry. The two lists correspond to * the two lists in COUNTRIES. */ static const char * const COUNTRIES_3[] = …; CanonicalizationMap; /** * A map to canonicalize locale IDs. This handles a variety of * different semantic kinds of transformations. */ static const CanonicalizationMap CANONICALIZE_MAP[] = …; /* ### BCP47 Conversion *******************************************/ /* Test if the locale id has BCP47 u extension and does not have '@' */ #define _hasBCP47Extension(id) … /* Gets the size of the shortest subtag in the given localeID. */ static int32_t getShortestSubtagLength(const char *localeID) { … } /* ### Keywords **************************************************/ #define UPRV_ISDIGIT(c) … #define UPRV_ISALPHANUM(c) … /* Punctuation/symbols allowed in legacy key values */ #define UPRV_OK_VALUE_PUNCTUATION(c) … #define ULOC_KEYWORD_BUFFER_LEN … #define ULOC_MAX_NO_KEYWORDS … U_CAPI const char * U_EXPORT2 locale_getKeywordsStart(const char *localeID) { … } /** * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] * @param keywordName incoming name to be canonicalized * @param status return status (keyword too long) * @return length of the keyword name */ static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) { … } KeywordStruct; static int32_t U_CALLCONV compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { … } U_CFUNC void ulocimp_getKeywords(const char *localeID, char prev, ByteSink& sink, UBool valuesToo, UErrorCode *status) { … } U_CAPI int32_t U_EXPORT2 uloc_getKeywordValue(const char* localeID, const char* keywordName, char* buffer, int32_t bufferCapacity, UErrorCode* status) { … } U_CAPI void U_EXPORT2 ulocimp_getKeywordValue(const char* localeID, const char* keywordName, icu::ByteSink& sink, UErrorCode* status) { … } U_CAPI int32_t U_EXPORT2 uloc_setKeywordValue(const char* keywordName, const char* keywordValue, char* buffer, int32_t bufferCapacity, UErrorCode* status) { … } /* ### ID parsing implementation **************************************************/ #define _isPrefixLetter(a) … /*returns true if one of the special prefixes is here (s=string) 'x-' or 'i-' */ #define _isIDPrefix(s) … /* Dot terminates it because of POSIX form where dot precedes the codepage * except for variant */ #define _isTerminator(a) … /** * Lookup 'key' in the array 'list'. The array 'list' should contain * a nullptr entry, followed by more entries, and a second nullptr entry. * * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or * COUNTRIES_3. */ static int16_t _findIndex(const char* const* list, const char* key) { … } U_CFUNC const char* uloc_getCurrentCountryID(const char* oldID){ … } U_CFUNC const char* uloc_getCurrentLanguageID(const char* oldID){ … } /* * the internal functions _getLanguage(), _getCountry(), _getVariant() * avoid duplicating code to handle the earlier locale ID pieces * in the functions for the later ones by * setting the *pEnd pointer to where they stopped parsing * * TODO try to use this in Locale */ CharString U_EXPORT2 ulocimp_getLanguage(const char *localeID, const char **pEnd, UErrorCode &status) { … } CharString U_EXPORT2 ulocimp_getScript(const char *localeID, const char **pEnd, UErrorCode &status) { … } CharString U_EXPORT2 ulocimp_getCountry(const char *localeID, const char **pEnd, UErrorCode &status) { … } /** * @param needSeparator if true, then add leading '_' if any variants * are added to 'variant' */ static void _getVariant(const char *localeID, char prev, ByteSink& sink, UBool needSeparator) { … } /* Keyword enumeration */ UKeywordsContext; U_CDECL_BEGIN static void U_CALLCONV uloc_kw_closeKeywords(UEnumeration *enumerator) { … } static int32_t U_CALLCONV uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { … } static const char * U_CALLCONV uloc_kw_nextKeyword(UEnumeration* en, int32_t* resultLength, UErrorCode* /*status*/) { … } static void U_CALLCONV uloc_kw_resetKeywords(UEnumeration* en, UErrorCode* /*status*/) { … } U_CDECL_END static const UEnumeration gKeywordsEnum = …; U_CAPI UEnumeration* U_EXPORT2 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) { … } U_CAPI UEnumeration* U_EXPORT2 uloc_openKeywords(const char* localeID, UErrorCode* status) { … } /* bit-flags for 'options' parameter of _canonicalize */ #define _ULOC_STRIP_KEYWORDS … #define _ULOC_CANONICALIZE … #define OPTION_SET(options, mask) … static const char i_default[] = …; #define I_DEFAULT_LENGTH … /** * Canonicalize the given localeID, to level 1 or to level 2, * depending on the options. To specify level 1, pass in options=0. * To specify level 2, pass in options=_ULOC_CANONICALIZE. * * This is the code underlying uloc_getName and uloc_canonicalize. */ static void _canonicalize(const char* localeID, ByteSink& sink, uint32_t options, UErrorCode* err) { … } /* ### ID parsing API **************************************************/ U_CAPI int32_t U_EXPORT2 uloc_getParent(const char* localeID, char* parent, int32_t parentCapacity, UErrorCode* err) { … } U_CAPI void U_EXPORT2 ulocimp_getParent(const char* localeID, icu::ByteSink& sink, UErrorCode* err) { … } U_CAPI int32_t U_EXPORT2 uloc_getLanguage(const char* localeID, char* language, int32_t languageCapacity, UErrorCode* err) { … } U_CAPI int32_t U_EXPORT2 uloc_getScript(const char* localeID, char* script, int32_t scriptCapacity, UErrorCode* err) { … } U_CAPI int32_t U_EXPORT2 uloc_getCountry(const char* localeID, char* country, int32_t countryCapacity, UErrorCode* err) { … } U_CAPI int32_t U_EXPORT2 uloc_getVariant(const char* localeID, char* variant, int32_t variantCapacity, UErrorCode* err) { … } U_CAPI int32_t U_EXPORT2 uloc_getName(const char* localeID, char* name, int32_t nameCapacity, UErrorCode* err) { … } U_CAPI void U_EXPORT2 ulocimp_getName(const char* localeID, ByteSink& sink, UErrorCode* err) { … } U_CAPI int32_t U_EXPORT2 uloc_getBaseName(const char* localeID, char* name, int32_t nameCapacity, UErrorCode* err) { … } U_CAPI void U_EXPORT2 ulocimp_getBaseName(const char* localeID, ByteSink& sink, UErrorCode* err) { … } U_CAPI int32_t U_EXPORT2 uloc_canonicalize(const char* localeID, char* name, int32_t nameCapacity, UErrorCode* err) { … } U_CAPI void U_EXPORT2 ulocimp_canonicalize(const char* localeID, ByteSink& sink, UErrorCode* err) { … } U_CAPI const char* U_EXPORT2 uloc_getISO3Language(const char* localeID) { … } U_CAPI const char* U_EXPORT2 uloc_getISO3Country(const char* localeID) { … } U_CAPI uint32_t U_EXPORT2 uloc_getLCID(const char* localeID) { … } U_CAPI int32_t U_EXPORT2 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, UErrorCode *status) { … } /* ### Default locale **************************************************/ U_CAPI const char* U_EXPORT2 uloc_getDefault() { … } U_CAPI void U_EXPORT2 uloc_setDefault(const char* newDefaultLocale, UErrorCode* err) { … } /** * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer * to an array of pointers to arrays of char. All of these pointers are owned * by ICU-- do not delete them, and do not write through them. The array is * terminated with a null pointer. */ U_CAPI const char* const* U_EXPORT2 uloc_getISOLanguages() { … } /** * Returns a list of all 2-letter country codes defined in ISO 639. This is a * pointer to an array of pointers to arrays of char. All of these pointers are * owned by ICU-- do not delete them, and do not write through them. The array is * terminated with a null pointer. */ U_CAPI const char* const* U_EXPORT2 uloc_getISOCountries() { … } U_CAPI const char* U_EXPORT2 uloc_toUnicodeLocaleKey(const char* keyword) { … } U_CAPI const char* U_EXPORT2 uloc_toUnicodeLocaleType(const char* keyword, const char* value) { … } static UBool isWellFormedLegacyKey(const char* legacyKey) { … } static UBool isWellFormedLegacyType(const char* legacyType) { … } U_CAPI const char* U_EXPORT2 uloc_toLegacyKey(const char* keyword) { … } U_CAPI const char* U_EXPORT2 uloc_toLegacyType(const char* keyword, const char* value) { … } /*eof*/