godot/thirdparty/icu4c/common/uloc_tag.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2009-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

#include <optional>
#include <string_view>
#include <utility>

#include "unicode/bytestream.h"
#include "unicode/utypes.h"
#include "unicode/ures.h"
#include "unicode/localpointer.h"
#include "unicode/putil.h"
#include "unicode/uenum.h"
#include "unicode/uloc.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
#include "uassert.h"

namespace {

/* struct holding a single variant */
VariantListEntry;

/* struct holding a single attribute value */
struct AttributeListEntry : public icu::UMemory {};

/* struct holding a single extension */
struct ExtensionListEntry : public icu::UMemory {};

#define MAXEXTLANG
ULanguageTag;

#define MINLEN
#define SEP
#define PRIVATEUSE
#define LDMLEXT

#define LOCALE_SEP
#define LOCALE_EXT_SEP
#define LOCALE_KEYWORD_SEP
#define LOCALE_KEY_TYPE_SEP

constexpr auto ISALPHA =;
inline bool ISNUMERIC(char c) {}

constexpr char EMPTY[] =;
constexpr char LANG_UND[] =;
constexpr char PRIVATEUSE_KEY[] =;
constexpr char _POSIX[] =;
constexpr char POSIX_KEY[] =;
constexpr char POSIX_VALUE[] =;
constexpr char LOCALE_ATTRIBUTE_KEY[] =;
constexpr char PRIVUSE_VARIANT_PREFIX[] =;
constexpr char LOCALE_TYPE_YES[] =;

#define LANG_UND_LEN

/*
 Updated on 2018-09-12 from
 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

 This table has 2 parts. The part for
 legacy language tags (marked as “Type: grandfathered” in BCP 47)
 is generated by the following scripts from the IANA language tag registry.

 curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
 egrep -A 7 'Type: grandfathered' | \
 egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
 awk -n '/Tag/ {printf("    \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
 tr 'A-Z' 'a-z'


 The 2nd part is made of five ICU-specific entries. They're kept for
 the backward compatibility for now, even though there are no preferred
 values. They may have to be removed for the strict BCP 47 compliance.

*/
constexpr const char* LEGACY[] =;

/*
 Updated on 2018-09-12 from
 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

 The table lists redundant tags with preferred value in the IANA language tag registry.
 It's generated with the following command:

 curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
 grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
 awk -n '/Tag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
 tr 'A-Z' 'a-z'

 In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
 a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
*/

constexpr const char* REDUNDANT[] =;

/*
  Updated on 2018-09-12 from
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .

  grep 'Type: language' -A 7 language-subtag-registry  | egrep 'Subtag|Prefe' | \
  grep -B1 'Preferred' | grep -v '^--' | \
  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'

  Make sure that 2-letter language subtags come before 3-letter subtags.
*/
constexpr char DEPRECATEDLANGS[][4] =;

/*
  Updated on 2018-04-24 from

  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
  grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
  grep -B1 'Preferred' | \
  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
*/
constexpr char DEPRECATEDREGIONS[][3] =;

/*
* -------------------------------------------------
*
* These ultag_ functions may be exposed as APIs later
*
* -------------------------------------------------
*/

ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode& status);

void
ultag_close(ULanguageTag* langtag);

const char*
ultag_getLanguage(const ULanguageTag* langtag);

#if 0
const char*
ultag_getJDKLanguage(const ULanguageTag* langtag);
#endif

const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);

int32_t
ultag_getExtlangSize(const ULanguageTag* langtag);

const char*
ultag_getScript(const ULanguageTag* langtag);

const char*
ultag_getRegion(const ULanguageTag* langtag);

const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx);

int32_t
ultag_getVariantsSize(const ULanguageTag* langtag);

const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);

const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);

int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag);

const char*
ultag_getPrivateUse(const ULanguageTag* langtag);

#if 0
const char*
ultag_getLegacy(const ULanguageTag* langtag);
#endif

}  // namespace

U_NAMESPACE_BEGIN

/*
* -------------------------------------------------
*
* Language subtag syntax validation functions
*
* -------------------------------------------------
*/

namespace {

bool
_isAlphaString(const char* s, int32_t len) {}

bool
_isNumericString(const char* s, int32_t len) {}

bool
_isAlphaNumericString(const char* s, int32_t len) {}

bool
_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {}

}  // namespace

bool
ultag_isLanguageSubtag(const char* s, int32_t len) {}

namespace {

bool
_isExtlangSubtag(const char* s, int32_t len) {}

}  // namespace

bool
ultag_isScriptSubtag(const char* s, int32_t len) {}

bool
ultag_isRegionSubtag(const char* s, int32_t len) {}

namespace {

bool
_isVariantSubtag(const char* s, int32_t len) {}

bool
_isSepListOf(bool (*test)(const char*, int32_t), const char* s, int32_t len) {}

}  // namespace

bool
ultag_isVariantSubtags(const char* s, int32_t len) {}

namespace {

// This is for the ICU-specific "lvariant" handling.
bool
_isPrivateuseVariantSubtag(const char* s, int32_t len) {}

bool
_isExtensionSingleton(const char* s, int32_t len) {}

bool
_isExtensionSubtag(const char* s, int32_t len) {}

}  // namespace

bool
ultag_isExtensionSubtags(const char* s, int32_t len) {}

namespace {

bool
_isPrivateuseValueSubtag(const char* s, int32_t len) {}

}  // namespace

bool
ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {}

bool
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {}

bool
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {}

bool
ultag_isUnicodeLocaleKey(const char* s, int32_t len) {}

bool
_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {}

bool
ultag_isUnicodeLocaleType(const char*s, int32_t len) {}

namespace {

bool
_isTKey(const char* s, int32_t len)
{}

}  // namespace

const char*
ultag_getTKeyStart(const char *localeID) {}

namespace {

bool
_isTValue(const char* s, int32_t len)
{}

bool
_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
{}

bool
_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
{}

bool
_isStatefulSepListOf(bool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
{}

}  // namespace

bool
ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
{}

bool
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {}

namespace {

/*
* -------------------------------------------------
*
* Helper functions
*
* -------------------------------------------------
*/

bool
_addVariantToList(VariantListEntry **first, icu::LocalPointer<VariantListEntry> var) {}

bool
_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {}

bool
_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, bool localeToBCP) {}

void
_initializeULanguageTag(ULanguageTag* langtag) {}

void
_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {}

void
_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {}

void
_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {}

void _sortVariants(VariantListEntry* first) {}

void
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) {}

void
_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool hadPosix, UErrorCode& status) {}

/**
 * Append keywords parsed from LDML extension value
 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
 * Note: char* buf is used for storing keywords
 */
void
_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, bool& posixVariant, UErrorCode& status) {}

void
_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode& status) {}

void
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) {}

/*
* -------------------------------------------------
*
* ultag_ functions
*
* -------------------------------------------------
*/

/* Bit flags used by the parser */
#define LANG
#define EXTL
#define SCRT
#define REGN
#define VART
#define EXTS
#define EXTV
#define PRIV

/**
 * Ticket #12705 - The optimizer in Visual Studio 2015 Update 3 has problems optimizing this function.
 * As a work-around, optimization is disabled for this function on VS2015 and VS2017.
 * This work-around should be removed once the following versions of Visual Studio are no
 * longer supported: All versions of VS2015/VS2017, and versions of VS2019 below 16.4.
 */
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", off )
#endif

ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode& status) {}

// Ticket #12705 - Turn optimization back on.
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
#pragma optimize( "", on )
#endif

void
ultag_close(ULanguageTag* langtag) {}

const char*
ultag_getLanguage(const ULanguageTag* langtag) {}

#if 0
const char*
ultag_getJDKLanguage(const ULanguageTag* langtag) {
    int32_t i;
    for (i = 0; DEPRECATEDLANGS[i] != nullptr; i += 2) {
        if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
            return DEPRECATEDLANGS[i + 1];
        }
    }
    return langtag->language;
}
#endif

const char*
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {}

int32_t
ultag_getExtlangSize(const ULanguageTag* langtag) {}

const char*
ultag_getScript(const ULanguageTag* langtag) {}

const char*
ultag_getRegion(const ULanguageTag* langtag) {}

const char*
ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {}

int32_t
ultag_getVariantsSize(const ULanguageTag* langtag) {}

const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {}

const char*
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {}

int32_t
ultag_getExtensionsSize(const ULanguageTag* langtag) {}

const char*
ultag_getPrivateUse(const ULanguageTag* langtag) {}

#if 0
const char*
ultag_getLegacy(const ULanguageTag* langtag) {
    return langtag->legacy;
}
#endif

}  // namespace

/*
* -------------------------------------------------
*
* Locale/BCP47 conversion APIs, exposed as uloc_*
*
* -------------------------------------------------
*/
U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char* localeID,
                   char* langtag,
                   int32_t langtagCapacity,
                   UBool strict,
                   UErrorCode* status) {}

U_EXPORT icu::CharString
ulocimp_toLanguageTag(const char* localeID,
                      bool strict,
                      UErrorCode& status) {}

U_EXPORT void
ulocimp_toLanguageTag(const char* localeID,
                      icu::ByteSink& sink,
                      bool strict,
                      UErrorCode& status) {}


U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char* langtag,
                    char* localeID,
                    int32_t localeIDCapacity,
                    int32_t* parsedLength,
                    UErrorCode* status) {}

U_EXPORT icu::CharString
ulocimp_forLanguageTag(const char* langtag,
                       int32_t tagLen,
                       int32_t* parsedLength,
                       UErrorCode& status) {}

U_EXPORT void
ulocimp_forLanguageTag(const char* langtag,
                       int32_t tagLen,
                       icu::ByteSink& sink,
                       int32_t* parsedLength,
                       UErrorCode& status) {}