chromium/third_party/icu/source/common/uloc_tag.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2009-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

#include "unicode/bytestream.h"
#include "unicode/utypes.h"
#include "unicode/ures.h"
#include "unicode/localpointer.h"
#include "unicode/putil.h"
#include "unicode/uenum.h"
#include "unicode/uloc.h"
#include "ustr_imp.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
#include "uassert.h"


/* struct holding a single variant */
VariantListEntry;

/* struct holding a single attribute value */
struct AttributeListEntry : public icu::UMemory {};

/* struct holding a single extension */
struct ExtensionListEntry : public icu::UMemory {};

#define MAXEXTLANG
ULanguageTag;

#define MINLEN
#define SEP
#define PRIVATEUSE
#define LDMLEXT

#define LOCALE_SEP
#define LOCALE_EXT_SEP
#define LOCALE_KEYWORD_SEP
#define LOCALE_KEY_TYPE_SEP

#define ISALPHA(c)
#define ISNUMERIC(c)

static const char EMPTY[] =;
static const char LANG_UND[] =;
static const char PRIVATEUSE_KEY[] =;
static const char _POSIX[] =;
static const char POSIX_KEY[] =;
static const char POSIX_VALUE[] =;
static const char LOCALE_ATTRIBUTE_KEY[] =;
static const char PRIVUSE_VARIANT_PREFIX[] =;
static const char LOCALE_TYPE_YES[] =;

#define LANG_UND_LEN

/*
 Updated on 2018-09-12 from
 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

 This table has 2 parts. The part for
 legacy language tags (marked as “Type: grandfathered” in BCP 47)
 is generated by the following scripts from the IANA language tag registry.

 curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
 egrep -A 7 'Type: grandfathered' | \
 egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
 awk -n '/Tag/ {printf("    \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
 tr 'A-Z' 'a-z'


 The 2nd part is made of five ICU-specific entries. They're kept for
 the backward compatibility for now, even though there are no preferred
 values. They may have to be removed for the strict BCP 47 compliance.

*/
static const char* const LEGACY[] =;

/*
 Updated on 2018-09-12 from
 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

 The table lists redundant tags with preferred value in the IANA language tag registry.
 It's generated with the following command:

 curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
 grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
 awk -n '/Tag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
 tr 'A-Z' 'a-z'

 In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
 a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
*/

static const char* const REDUNDANT[] =;

/*
  Updated on 2018-09-12 from
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

  grep 'Type: language' -A 7 language-subtag-registry  | egrep 'Subtag|Prefe' | \
  grep -B1 'Preferred' | grep -v '^--' | \
  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'

  Make sure that 2-letter language subtags come before 3-letter subtags.
*/
static const char DEPRECATEDLANGS[][4] =;

/*
  Updated on 2018-04-24 from

  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
  grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
  grep -B1 'Preferred' | \
  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
*/
static const char DEPRECATEDREGIONS[][3] =;

/*
* -------------------------------------------------
*
* These ultag_ functions may be exposed as APIs later
*
* -------------------------------------------------
*/

static ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);

static void
ultag_close(ULanguageTag* langtag);

static const char*
ultag_getLanguage(const ULanguageTag* langtag);

#if 0
static const char*
ultag_getJDKLanguage(const ULanguageTag* langtag);
#endif

static const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);

static int32_t
ultag_getExtlangSize(const ULanguageTag* langtag);

static const char*
ultag_getScript(const ULanguageTag* langtag);

static const char*
ultag_getRegion(const ULanguageTag* langtag);

static const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx);

static int32_t
ultag_getVariantsSize(const ULanguageTag* langtag);

static const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);

static const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);

static int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag);

static const char*
ultag_getPrivateUse(const ULanguageTag* langtag);

#if 0
static const char*
ultag_getLegacy(const ULanguageTag* langtag);
#endif

U_NAMESPACE_BEGIN

/*
* -------------------------------------------------
*
* Language subtag syntax validation functions
*
* -------------------------------------------------
*/

static UBool
_isAlphaString(const char* s, int32_t len) {}

static UBool
_isNumericString(const char* s, int32_t len) {}

static UBool
_isAlphaNumericString(const char* s, int32_t len) {}

static UBool
_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {}

U_CFUNC UBool
ultag_isLanguageSubtag(const char* s, int32_t len) {}

static UBool
_isExtlangSubtag(const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isScriptSubtag(const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isRegionSubtag(const char* s, int32_t len) {}

static UBool
_isVariantSubtag(const char* s, int32_t len) {}

static UBool
_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isVariantSubtags(const char* s, int32_t len) {}

// This is for the ICU-specific "lvariant" handling.
static UBool
_isPrivateuseVariantSubtag(const char* s, int32_t len) {}

static UBool
_isExtensionSingleton(const char* s, int32_t len) {}

static UBool
_isExtensionSubtag(const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isExtensionSubtags(const char* s, int32_t len) {}

static UBool
_isPrivateuseValueSubtag(const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {}

U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char* s, int32_t len) {}

U_CFUNC UBool
_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {}

U_CFUNC UBool
ultag_isUnicodeLocaleType(const char*s, int32_t len) {}

static UBool
_isTKey(const char* s, int32_t len)
{}

U_CAPI const char * U_EXPORT2
ultag_getTKeyStart(const char *localeID) {}

static UBool
_isTValue(const char* s, int32_t len)
{}

static UBool
_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
{}

static UBool
_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
{}

static UBool
_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
{}

U_CFUNC UBool
ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
{}

U_CFUNC UBool
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {}


/*
* -------------------------------------------------
*
* Helper functions
*
* -------------------------------------------------
*/

static UBool
_addVariantToList(VariantListEntry **first, VariantListEntry *var) {}

static UBool
_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {}


static UBool
_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {}

static void
_initializeULanguageTag(ULanguageTag* langtag) {}

static void
_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {}

static void
_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {}

static void
_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {}

static void _sortVariants(VariantListEntry* first) {}

static void
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {}

static void
_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {}

/**
 * Append keywords parsed from LDML extension value
 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
 * Note: char* buf is used for storing keywords
 */
static void
_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, UBool *posixVariant, UErrorCode *status) {}


static void
_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {}

static void
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {}

/*
* -------------------------------------------------
*
* ultag_ functions
*
* -------------------------------------------------
*/

/* Bit flags used by the parser */
#define LANG
#define EXTL
#define SCRT
#define REGN
#define VART
#define EXTS
#define EXTV
#define PRIV

/**
 * Ticket #12705 - The optimizer in Visual Studio 2015 Update 3 has problems optimizing this function.
 * As a work-around, optimization is disabled for this function on VS2015 and VS2017.
 * This work-around should be removed once the following versions of Visual Studio are no
 * longer supported: All versions of VS2015/VS2017, and versions of VS2019 below 16.4.
 */
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", off )
#endif

static ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {}

// Ticket #12705 - Turn optimization back on.
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", on )
#endif

static void
ultag_close(ULanguageTag* langtag) {}

static const char*
ultag_getLanguage(const ULanguageTag* langtag) {}

#if 0
static const char*
ultag_getJDKLanguage(const ULanguageTag* langtag) {
    int32_t i;
    for (i = 0; DEPRECATEDLANGS[i] != nullptr; i += 2) {
        if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
            return DEPRECATEDLANGS[i + 1];
        }
    }
    return langtag->language;
}
#endif

static const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {}

static int32_t
ultag_getExtlangSize(const ULanguageTag* langtag) {}

static const char*
ultag_getScript(const ULanguageTag* langtag) {}

static const char*
ultag_getRegion(const ULanguageTag* langtag) {}

static const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {}

static int32_t
ultag_getVariantsSize(const ULanguageTag* langtag) {}

static const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {}

static const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {}

static int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag) {}

static const char*
ultag_getPrivateUse(const ULanguageTag* langtag) {}

#if 0
static const char*
ultag_getLegacy(const ULanguageTag* langtag) {
    return langtag->legacy;
}
#endif


/*
* -------------------------------------------------
*
* Locale/BCP47 conversion APIs, exposed as uloc_*
*
* -------------------------------------------------
*/
U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char* localeID,
                   char* langtag,
                   int32_t langtagCapacity,
                   UBool strict,
                   UErrorCode* status) {}


U_CAPI void U_EXPORT2
ulocimp_toLanguageTag(const char* localeID,
                      icu::ByteSink& sink,
                      UBool strict,
                      UErrorCode* status) {}


U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char* langtag,
                    char* localeID,
                    int32_t localeIDCapacity,
                    int32_t* parsedLength,
                    UErrorCode* status) {}


U_CAPI void U_EXPORT2
ulocimp_forLanguageTag(const char* langtag,
                       int32_t tagLen,
                       icu::ByteSink& sink,
                       int32_t* parsedLength,
                       UErrorCode* status) {}