chromium/third_party/icu/source/common/locmap.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 **********************************************************************
 *   Copyright (C) 1996-2016, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
 * Provides functionality for mapping between
 * LCID and Posix IDs or ICU locale to codepage
 *
 * Note: All classes and code in this file are
 *       intended for internal use only.
 *
 * Methods of interest:
 *   unsigned long convertToLCID(const char*);
 *   const char* convertToPosix(unsigned long);
 *
 * Kathleen Wilson, 4/30/96
 *
 *  Date        Name        Description
 *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
 *                          setId() method and safety check against 
 *                          MAX_ID_LENGTH.
 * 04/23/99     stephen     Added C wrapper for convertToPosix.
 * 09/18/00     george      Removed the memory leaks.
 * 08/23/01     george      Convert to C
 */

#include "locmap.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cstring.h"
#include "cmemory.h"
#include "ulocimp.h"
#include "unicode/uloc.h"

#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
#include <windows.h>
#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
#endif

/*
 * Note:
 * The mapping from Win32 locale ID numbers to POSIX locale strings should
 * be the faster one.
 *
 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
 */

/*
////////////////////////////////////////////////
//
// Internal Classes for LCID <--> POSIX Mapping
//
/////////////////////////////////////////////////
*/

ILcidPosixElement;

ILcidPosixMap;


/*
/////////////////////////////////////////////////
//
// Easy macros to make the LCID <--> POSIX Mapping
//
/////////////////////////////////////////////////
*/

/**
 * The standard one language/one country mapping for LCID.
 * The first element must be the language, and the following
 * elements are the language with the country.
 * @param hostID LCID in host format such as 0x044d
 * @param languageID posix ID of just the language such as 'de'
 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
 */
#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID)

/**
 * Define a subtable by ID
 * @param id the POSIX ID, either a language or language_TERRITORY
 */
#define ILCID_POSIX_SUBTABLE(id)


/**
 * Create the map for the posixID. This macro supposes that the language string
 * name is the same as the global variable name, and that the first element
 * in the ILcidPosixElement is just the language.
 * @param _posixID the full POSIX ID for this entry.
 */
#define ILCID_POSIX_MAP(_posixID)

/*
////////////////////////////////////////////
//
// Create the table of LCID to POSIX Mapping
// None of it should be dynamically created.
//
// Keep static locale variables inside the function so that
// it can be created properly during static init.
//
// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier 
//       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
//
//       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
//       maintained for support of older Windows version.
//       Update: Windows 7 (091130)
//
// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
//       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
//       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
//       to support other keywords in this mapping data, we must update the implementation.
////////////////////////////////////////////
*/

// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as 
// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.

ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)

ILCID_POSIX_SUBTABLE(ar) {};

ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)

ILCID_POSIX_SUBTABLE(az) {};

ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)

/*ILCID_POSIX_SUBTABLE(ber) {
    {0x5f,   "ber"},
    {0x045f, "ber_Arab_DZ"},
    {0x045f, "ber_Arab"},
    {0x085f, "ber_Latn_DZ"},
    {0x085f, "ber_Latn"}
};*/

ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)

ILCID_POSIX_SUBTABLE(bin) {};

ILCID_POSIX_SUBTABLE(bn) {};

ILCID_POSIX_SUBTABLE(bo) {};

ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)

ILCID_POSIX_SUBTABLE(ca) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)

ILCID_POSIX_SUBTABLE(chr) {};

// ICU has chosen different names for these.
ILCID_POSIX_SUBTABLE(ckb) {};

/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)

ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)

// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
ILCID_POSIX_SUBTABLE(de) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)

// Windows uses an empty string for 'invariant'
ILCID_POSIX_SUBTABLE(en) {};

ILCID_POSIX_SUBTABLE(en_US_POSIX) {};

// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
ILCID_POSIX_SUBTABLE(es) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)

/* ISO-639 doesn't distinguish between Persian and Dari.*/
ILCID_POSIX_SUBTABLE(fa) {};


/* duplicate for roundtripping */
ILCID_POSIX_SUBTABLE(fa_AF) {};

ILCID_POSIX_SUBTABLE(ff) {};

ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)

ILCID_POSIX_SUBTABLE(fr) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)

ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)

ILCID_POSIX_SUBTABLE(ga) {};

ILCID_POSIX_SUBTABLE(gd) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)

ILCID_POSIX_SUBTABLE(ha) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)

/* This LCID is really four different locales.*/
ILCID_POSIX_SUBTABLE(hr) {};

ILCID_POSIX_SUBTABLE(hsb) {};

ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)

ILCID_POSIX_SUBTABLE(ibb) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)

ILCID_POSIX_SUBTABLE(it) {};

ILCID_POSIX_SUBTABLE(iu) {};

ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)

ILCID_POSIX_SUBTABLE(ko) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)

ILCID_POSIX_SUBTABLE(ks) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */

ILCID_POSIX_SUBTABLE(la) {};

ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)

ILCID_POSIX_SUBTABLE(mn) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)

ILCID_POSIX_SUBTABLE(ms) {};

ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)

ILCID_POSIX_SUBTABLE(ne) {};

ILCID_POSIX_SUBTABLE(nl) {};

/* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
ILCID_POSIX_SUBTABLE(no) {};

ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)

ILCID_POSIX_SUBTABLE(om) {};

/* Declared as or_IN to get around compiler errors*/
ILCID_POSIX_SUBTABLE(or_IN) {};

ILCID_POSIX_SUBTABLE(pa) {};

ILCID_POSIX_SUBTABLE(pap) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)

ILCID_POSIX_SUBTABLE(pt) {};

ILCID_POSIX_SUBTABLE(qu) {};

ILCID_POSIX_SUBTABLE(quc) {};

ILCID_POSIX_SUBTABLE(qut) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)

ILCID_POSIX_SUBTABLE(ro) {};

// TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
// (Except that it's not invariant in ICU)
ILCID_POSIX_SUBTABLE(root) {};

ILCID_POSIX_SUBTABLE(ru) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)

ILCID_POSIX_SUBTABLE(sd) {};

ILCID_POSIX_SUBTABLE(se) {};

ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)

ILCID_POSIX_SUBTABLE(so) {};

ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)

ILCID_POSIX_SUBTABLE(sv) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)

ILCID_POSIX_SUBTABLE(ta) {};

ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)

/* Cyrillic based by default */
ILCID_POSIX_SUBTABLE(tg) {};

ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)

ILCID_POSIX_SUBTABLE(ti) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)

ILCID_POSIX_SUBTABLE(tn) {};

ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)

ILCID_POSIX_SUBTABLE(tzm) {};

ILCID_POSIX_SUBTABLE(ug) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)

ILCID_POSIX_SUBTABLE(ur) {};

ILCID_POSIX_SUBTABLE(uz) {};

ILCID_POSIX_SUBTABLE(ve) {};

ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)

ILCID_POSIX_SUBTABLE(yi) {};

ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)

// Windows & ICU tend to different names for some of these
// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
ILCID_POSIX_SUBTABLE(zh) {};

ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)

/* This must be static and grouped by LCID. */
static const ILcidPosixMap gPosixIDmap[] =;

static const uint32_t gLocaleCount =;

/**
 * Do not call this function. It is called by hostID.
 * The function is not private because this struct must stay as a C struct,
 * and this is an internal class.
 */
static int32_t
idCmp(const char* id1, const char* id2)
{}

/**
 * Searches for a Windows LCID
 *
 * @param posixID the Posix style locale id.
 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
 *               no equivalent Windows LCID.
 * @return the LCID
 */
static uint32_t
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
{}

static const char*
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
{}

/*
//////////////////////////////////////
//
// LCID --> POSIX
//
/////////////////////////////////////
*/
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
/*
 * Various language tags needs to be changed:
 * quz -> qu
 * prs -> fa
 */
#define FIX_LANGUAGE_ID_TAG

#endif

U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
{}

/*
//////////////////////////////////////
//
// POSIX --> LCID
// This should only be called from uloc_getLCID.
// The locale ID must be in canonical form.
//
/////////////////////////////////////
*/
U_CAPI uint32_t
uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
{}

U_CAPI uint32_t
uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
{}