uchar.h | Explore in Territory

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 1997-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*
* File UCHAR.H
*
* Modification History:
*
*   Date        Name        Description
*   04/02/97    aliu        Creation.
*   03/29/99    helena      Updated for C APIs.
*   4/15/99     Madhu       Updated for C Implementation and Javadoc
*   5/20/99     Madhu       Added the function u_getVersion()
*   8/19/1999   srl         Upgraded scripts to Unicode 3.0
*   8/27/1999   schererm    UCharDirection constants: U_...
*   11/11/1999  weiv        added u_isalnum(), cleaned comments
*   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
******************************************************************************
*/

#ifndef UCHAR_H
#define UCHAR_H

#include <stdbool.h>
#include "unicode/utypes.h"
#include "unicode/stringoptions.h"
#include "unicode/ucpmap.h"

#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)

#define USET_DEFINED

/**
 * USet is the C API type corresponding to C++ class UnicodeSet.
 * It is forward-declared here to avoid including unicode/uset.h file if related
 * APIs are not used.
 *
 * @see ucnv_getUnicodeSet
 * @stable ICU 2.4
 */
USet;

#endif


U_CDECL_BEGIN

/*==========================================================================*/
/* Unicode version number                                                   */
/*==========================================================================*/
/**
 * Unicode version number, default for the current ICU version.
 * The actual Unicode Character Database (UCD) data is stored in uprops.dat
 * and may be generated from UCD files from a different Unicode version.
 * Call u_getUnicodeVersion to get the actual Unicode version of the data.
 *
 * @see u_getUnicodeVersion
 * @stable ICU 2.0
 */
#define U_UNICODE_VERSION …

/**
 * \file
 * \brief C API: Unicode Properties
 *
 * This C API provides low-level access to the Unicode Character Database.
 * In addition to raw property values, some convenience functions calculate
 * derived properties, for example for Java-style programming.
 *
 * Unicode assigns each code point (not just assigned character) values for
 * many properties.
 * Most of them are simple boolean flags, or constants from a small enumerated list.
 * For some properties, values are strings or other relatively more complex types.
 *
 * For more information see
 * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
 * and the ICU User Guide chapter on Properties (https://unicode-org.github.io/icu/userguide/strings/properties).
 *
 * Many properties are accessible via generic functions that take a UProperty selector.
 * - u_hasBinaryProperty() returns a binary value (true/false) per property and code point.
 * - u_getIntPropertyValue() returns an integer value per property and code point.
 *   For each supported enumerated or catalog property, there is
 *   an enum type for all of the property's values, and
 *   u_getIntPropertyValue() returns the numeric values of those constants.
 * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
 *   all code points for which the property is true.
 * - u_getIntPropertyMap() returns a map for each
 *   ICU-supported enumerated/catalog/int-valued property which
 *   maps all Unicode code points to their values for that property.
 *
 * Many functions are designed to match java.lang.Character functions.
 * See the individual function documentation,
 * and see the JDK 1.4 java.lang.Character documentation
 * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
 *
 * There are also functions that provide easy migration from C/POSIX functions
 * like isblank(). Their use is generally discouraged because the C/POSIX
 * standards do not define their semantics beyond the ASCII range, which means
 * that different implementations exhibit very different behavior.
 * Instead, Unicode properties should be used directly.
 *
 * There are also only a few, broad C/POSIX character classes, and they tend
 * to be used for conflicting purposes. For example, the "isalpha()" class
 * is sometimes used to determine word boundaries, while a more sophisticated
 * approach would at least distinguish initial letters from continuation
 * characters (the latter including combining marks).
 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
 * Another example: There is no "istitle()" class for titlecase characters.
 *
 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
 * ICU implements them according to the Standard Recommendations in
 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
 *
 * API access for C/POSIX character classes is as follows:
 * - alpha:     u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
 * - lower:     u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
 * - upper:     u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
 * - punct:     u_ispunct(c)
 * - digit:     u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
 * - xdigit:    u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
 * - alnum:     u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
 * - space:     u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
 * - blank:     u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
 * - cntrl:     u_charType(c)==U_CONTROL_CHAR
 * - graph:     u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
 * - print:     u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
 *
 * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
 * the Standard Recommendations in UTS #18. Instead, they match Java
 * functions according to their API documentation.
 *
 * \htmlonly
 * The C/POSIX character classes are also available in UnicodeSet patterns,
 * using patterns like [:graph:] or \p{graph}.
 * \endhtmlonly
 *
 * Note: There are several ICU whitespace functions.
 * Comparison:
 * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
 *       most of general categories "Z" (separators) + most whitespace ISO controls
 *       (including no-break spaces, but excluding IS1..IS4)
 * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
 * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
 * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
 * - u_isblank: "horizontal spaces" = TAB + Zs
 */

/**
 * Constants.
 */

/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
#define UCHAR_MIN_VALUE …

/**
 * The highest Unicode code point value (scalar value) according to
 * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
 * For a single character, UChar32 is a simple type that can hold any code point value.
 *
 * @see UChar32
 * @stable ICU 2.0
 */
#define UCHAR_MAX_VALUE …

/**
 * Get a single-bit bit set (a flag) from a bit number 0..31.
 * @stable ICU 2.1
 */
#define U_MASK(x) …

/**
 * Selection constants for Unicode properties.
 * These constants are used in functions like u_hasBinaryProperty to select
 * one of the Unicode properties.
 *
 * The properties APIs are intended to reflect Unicode properties as defined
 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
 *
 * For details about the properties see
 * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
 *
 * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
 * then properties marked with "new in Unicode 3.2" are not or not fully available.
 * Check u_getUnicodeVersion to be sure.
 *
 * @see u_hasBinaryProperty
 * @see u_getIntPropertyValue
 * @see u_getUnicodeVersion
 * @stable ICU 2.1
 */
UProperty;

/**
 * Data for enumerated Unicode general category types.
 * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
 * @stable ICU 2.0
 */
UCharCategory;

/**
 * U_GC_XX_MASK constants are bit flags corresponding to Unicode
 * general category values.
 * For each category, the nth bit is set if the numeric value of the
 * corresponding UCharCategory constant is n.
 *
 * There are also some U_GC_Y_MASK constants for groups of general categories
 * like L for all letter categories.
 *
 * @see u_charType
 * @see U_GET_GC_MASK
 * @see UCharCategory
 * @stable ICU 2.1
 */
#define U_GC_CN_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LU_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LL_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LT_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LM_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LO_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_MN_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ME_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_MC_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ND_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_NL_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_NO_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZS_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZL_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZP_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CC_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CF_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CO_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CS_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PD_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PS_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PE_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PC_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PO_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SM_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SC_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SK_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SO_MASK …

/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PI_MASK …
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PF_MASK …


/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
#define U_GC_L_MASK …

/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
#define U_GC_LC_MASK …

/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
#define U_GC_M_MASK …

/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
#define U_GC_N_MASK …

/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
#define U_GC_Z_MASK …

/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
#define U_GC_C_MASK …

/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
#define U_GC_P_MASK …

/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
#define U_GC_S_MASK …

/**
 * This specifies the language directional property of a character set.
 * @stable ICU 2.0
 */
UCharDirection;

/**
 * Bidi Paired Bracket Type constants.
 *
 * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
 * @stable ICU 52
 */
UBidiPairedBracketType;

/**
 * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
 * @stable ICU 2.0
 */
enum UBlockCode { … };

/** @stable ICU 2.0 */
UBlockCode;

/**
 * East Asian Width constants.
 *
 * @see UCHAR_EAST_ASIAN_WIDTH
 * @see u_getIntPropertyValue
 * @stable ICU 2.2
 */
UEastAsianWidth;

/**
 * Selector constants for u_charName().
 * u_charName() returns the "modern" name of a
 * Unicode character; or the name that was defined in
 * Unicode version 1.0, before the Unicode standard merged
 * with ISO-10646; or an "extended" name that gives each
 * Unicode code point a unique name.
 *
 * @see u_charName
 * @stable ICU 2.0
 */
UCharNameChoice;

/**
 * Selector constants for u_getPropertyName() and
 * u_getPropertyValueName().  These selectors are used to choose which
 * name is returned for a given property or value.  All properties and
 * values have a long name.  Most have a short name, but some do not.
 * Unicode allows for additional names, beyond the long and short
 * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
 * i=1, 2,...
 *
 * @see u_getPropertyName()
 * @see u_getPropertyValueName()
 * @stable ICU 2.4
 */
UPropertyNameChoice;

/**
 * Decomposition Type constants.
 *
 * @see UCHAR_DECOMPOSITION_TYPE
 * @stable ICU 2.2
 */
UDecompositionType;

/**
 * Joining Type constants.
 *
 * @see UCHAR_JOINING_TYPE
 * @stable ICU 2.2
 */
UJoiningType;

/**
 * Joining Group constants.
 *
 * @see UCHAR_JOINING_GROUP
 * @stable ICU 2.2
 */
UJoiningGroup;

/**
 * Grapheme Cluster Break constants.
 *
 * @see UCHAR_GRAPHEME_CLUSTER_BREAK
 * @stable ICU 3.4
 */
UGraphemeClusterBreak;

/**
 * Word Break constants.
 * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
 *
 * @see UCHAR_WORD_BREAK
 * @stable ICU 3.4
 */
UWordBreakValues;

/**
 * Sentence Break constants.
 *
 * @see UCHAR_SENTENCE_BREAK
 * @stable ICU 3.4
 */
USentenceBreak;

/**
 * Line Break constants.
 *
 * @see UCHAR_LINE_BREAK
 * @stable ICU 2.2
 */
ULineBreak;

/**
 * Numeric Type constants.
 *
 * @see UCHAR_NUMERIC_TYPE
 * @stable ICU 2.2
 */
UNumericType;

/**
 * Hangul Syllable Type constants.
 *
 * @see UCHAR_HANGUL_SYLLABLE_TYPE
 * @stable ICU 2.6
 */
UHangulSyllableType;

/**
 * Indic Positional Category constants.
 *
 * @see UCHAR_INDIC_POSITIONAL_CATEGORY
 * @stable ICU 63
 */
UIndicPositionalCategory;

/**
 * Indic Syllabic Category constants.
 *
 * @see UCHAR_INDIC_SYLLABIC_CATEGORY
 * @stable ICU 63
 */
UIndicSyllabicCategory;

/**
 * Vertical Orientation constants.
 *
 * @see UCHAR_VERTICAL_ORIENTATION
 * @stable ICU 63
 */
UVerticalOrientation;

#ifndef U_HIDE_DRAFT_API
/**
 * Identifier Status constants.
 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
 *
 * @see UCHAR_IDENTIFIER_STATUS
 * @draft ICU 75
 */
UIdentifierStatus;

/**
 * Identifier Type constants.
 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
 *
 * @see UCHAR_IDENTIFIER_TYPE
 * @draft ICU 75
 */
UIdentifierType;
#endif  // U_HIDE_DRAFT_API

/**
 * Check a binary Unicode property for a code point.
 *
 * Unicode, especially in version 3.2, defines many more properties than the
 * original set in UnicodeData.txt.
 *
 * The properties APIs are intended to reflect Unicode properties as defined
 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
 * For details about the properties see http://www.unicode.org/ucd/ .
 * For names of Unicode properties see the UCD file PropertyAliases.txt.
 *
 * Important: If ICU is built with UCD files from Unicode versions below 3.2,
 * then properties marked with "new in Unicode 3.2" are not or not fully available.
 *
 * @param c Code point to test.
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START&lt;=which&lt;UCHAR_BINARY_LIMIT.
 * @return true or false according to the binary Unicode property value for c.
 *         Also false if 'which' is out of bounds or if the Unicode version
 *         does not have data for the property at all.
 *
 * @see UProperty
 * @see u_getBinaryPropertySet
 * @see u_getIntPropertyValue
 * @see u_getUnicodeVersion
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_hasBinaryProperty(UChar32 c, UProperty which);

/**
 * Returns true if the property is true for the string.
 * Same as u_hasBinaryProperty(single code point, which)
 * if the string contains exactly one code point.
 *
 * Most properties apply only to single code points.
 * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a>
 * defines several properties of strings.
 *
 * @param s String to test.
 * @param length Length of the string, or negative if NUL-terminated.
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START&lt;=which&lt;UCHAR_BINARY_LIMIT.
 * @return true or false according to the binary Unicode property value for the string.
 *         Also false if 'which' is out of bounds or if the Unicode version
 *         does not have data for the property at all.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getBinaryPropertySet
 * @see u_getIntPropertyValue
 * @see u_getUnicodeVersion
 * @stable ICU 70
 */
U_CAPI UBool U_EXPORT2
u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which);

/**
 * Returns a frozen USet for a binary property.
 * The library retains ownership over the returned object.
 * Sets an error code if the property number is not one for a binary property.
 *
 * The returned set contains all code points for which the property is true.
 *
 * @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the property as a set
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see Unicode::fromUSet
 * @stable ICU 63
 */
U_CAPI const USet * U_EXPORT2
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);

/**
 * Check if a code point has the Alphabetic Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
 * This is different from u_isalpha!
 * @param c Code point to test
 * @return true if the code point has the Alphabetic Unicode property, false otherwise
 *
 * @see UCHAR_ALPHABETIC
 * @see u_isalpha
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isUAlphabetic(UChar32 c);

/**
 * Check if a code point has the Lowercase Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
 * This is different from u_islower!
 * @param c Code point to test
 * @return true if the code point has the Lowercase Unicode property, false otherwise
 *
 * @see UCHAR_LOWERCASE
 * @see u_islower
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isULowercase(UChar32 c);

/**
 * Check if a code point has the Uppercase Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
 * This is different from u_isupper!
 * @param c Code point to test
 * @return true if the code point has the Uppercase Unicode property, false otherwise
 *
 * @see UCHAR_UPPERCASE
 * @see u_isupper
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isUUppercase(UChar32 c);

/**
 * Check if a code point has the White_Space Unicode property.
 * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
 * This is different from both u_isspace and u_isWhitespace!
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * @param c Code point to test
 * @return true if the code point has the White_Space Unicode property, false otherwise.
 *
 * @see UCHAR_WHITE_SPACE
 * @see u_isWhitespace
 * @see u_isspace
 * @see u_isJavaSpaceChar
 * @see u_hasBinaryProperty
 * @stable ICU 2.1
 */
U_CAPI UBool U_EXPORT2
u_isUWhiteSpace(UChar32 c);

/**
 * Get the property value for an enumerated or integer Unicode property for a code point.
 * Also returns binary and mask property values.
 *
 * Unicode, especially in version 3.2, defines many more properties than the
 * original set in UnicodeData.txt.
 *
 * The properties APIs are intended to reflect Unicode properties as defined
 * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
 * For details about the properties see http://www.unicode.org/ .
 * For names of Unicode properties see the UCD file PropertyAliases.txt.
 *
 * Sample usage:
 * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
 * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
 *
 * @param c Code point to test.
 * @param which UProperty selector constant, identifies which property to check.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
 *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
 * @return Numeric value that is directly the property value or,
 *         for enumerated properties, corresponds to the numeric value of the enumerated
 *         constant of the respective property value enumeration type
 *         (cast to enum type if necessary).
 *         Returns 0 or 1 (for false/true) for binary Unicode properties.
 *         Returns a bit-mask for mask properties.
 *         Returns 0 if 'which' is out of bounds or if the Unicode version
 *         does not have data for the property at all, or not for this code point.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getIntPropertyMinValue
 * @see u_getIntPropertyMaxValue
 * @see u_getIntPropertyMap
 * @see u_getUnicodeVersion
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which);

/**
 * Get the minimum value for an enumerated/integer/binary Unicode property.
 * Can be used together with u_getIntPropertyMaxValue
 * to allocate arrays of UnicodeSet or similar.
 *
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
 * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
 *         0 if the property selector is out of range.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getUnicodeVersion
 * @see u_getIntPropertyMaxValue
 * @see u_getIntPropertyValue
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMinValue(UProperty which);

/**
 * Get the maximum value for an enumerated/integer/binary Unicode property.
 * Can be used together with u_getIntPropertyMinValue
 * to allocate arrays of UnicodeSet or similar.
 *
 * Examples for min/max values (for Unicode 3.2):
 *
 * - UCHAR_BIDI_CLASS:    0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
 * - UCHAR_SCRIPT:        0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
 * - UCHAR_IDEOGRAPHIC:   0/1  (false/true)
 *
 * For undefined UProperty constant values, min/max values will be 0/-1.
 *
 * @param which UProperty selector constant, identifies which binary property to check.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
 * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
 *         <=0 if the property selector is out of range.
 *
 * @see UProperty
 * @see u_hasBinaryProperty
 * @see u_getUnicodeVersion
 * @see u_getIntPropertyMaxValue
 * @see u_getIntPropertyValue
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMaxValue(UProperty which);

/**
 * Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
 * The library retains ownership over the returned object.
 * Sets an error code if the property number is not one for an "int property".
 *
 * The returned object maps all Unicode code points to their values for that property.
 * For documentation of the integer values see u_getIntPropertyValue().
 *
 * @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
 * @param pErrorCode an in/out ICU UErrorCode
 * @return the property as a map
 * @see UProperty
 * @see u_getIntPropertyValue
 * @stable ICU 63
 */
U_CAPI const UCPMap * U_EXPORT2
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);

/**
 * Get the numeric value for a Unicode code point as defined in the
 * Unicode Character Database.
 *
 * A "double" return type is necessary because
 * some numeric values are fractions, negative, or too large for int32_t.
 *
 * For characters without any numeric values in the Unicode Character Database,
 * this function will return U_NO_NUMERIC_VALUE.
 * Note: This is different from the Unicode Standard which specifies NaN as the default value.
 * (NaN is not available on all platforms.)
 *
 * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
 * also supports negative values, large values, and fractions,
 * while Java's getNumericValue() returns values 10..35 for ASCII letters.
 *
 * @param c Code point to get the numeric value for.
 * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
 *
 * @see U_NO_NUMERIC_VALUE
 * @stable ICU 2.2
 */
U_CAPI double U_EXPORT2
u_getNumericValue(UChar32 c);

/**
 * Special value that is returned by u_getNumericValue when
 * no numeric value is defined for a code point.
 *
 * @see u_getNumericValue
 * @stable ICU 2.2
 */
#define U_NO_NUMERIC_VALUE …

/**
 * Determines whether the specified code point has the general category "Ll"
 * (lowercase letter).
 *
 * Same as java.lang.Character.isLowerCase().
 *
 * This misses some characters that are also lowercase but
 * have a different general category value.
 * In order to include those, use UCHAR_LOWERCASE.
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is an Ll lowercase letter
 *
 * @see UCHAR_LOWERCASE
 * @see u_isupper
 * @see u_istitle
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_islower(UChar32 c);

/**
 * Determines whether the specified code point has the general category "Lu"
 * (uppercase letter).
 *
 * Same as java.lang.Character.isUpperCase().
 *
 * This misses some characters that are also uppercase but
 * have a different general category value.
 * In order to include those, use UCHAR_UPPERCASE.
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is an Lu uppercase letter
 *
 * @see UCHAR_UPPERCASE
 * @see u_islower
 * @see u_istitle
 * @see u_tolower
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isupper(UChar32 c);

/**
 * Determines whether the specified code point is a titlecase letter.
 * True for general category "Lt" (titlecase letter).
 *
 * Same as java.lang.Character.isTitleCase().
 *
 * @param c the code point to be tested
 * @return true if the code point is an Lt titlecase letter
 *
 * @see u_isupper
 * @see u_islower
 * @see u_totitle
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_istitle(UChar32 c);

/**
 * Determines whether the specified code point is a digit character according to Java.
 * True for characters with general category "Nd" (decimal digit numbers).
 * Beginning with Unicode 4, this is the same as
 * testing for the Numeric_Type of Decimal.
 *
 * Same as java.lang.Character.isDigit().
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a digit character according to Character.isDigit()
 *
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isdigit(UChar32 c);

/**
 * Determines whether the specified code point is a letter character.
 * True for general categories "L" (letters).
 *
 * Same as java.lang.Character.isLetter().
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a letter character
 *
 * @see u_isdigit
 * @see u_isalnum
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isalpha(UChar32 c);

/**
 * Determines whether the specified code point is an alphanumeric character
 * (letter or digit) according to Java.
 * True for characters with general categories
 * "L" (letters) and "Nd" (decimal digit numbers).
 *
 * Same as java.lang.Character.isLetterOrDigit().
 *
 * In addition to being equivalent to a Java function, this also serves
 * as a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is an alphanumeric character according to Character.isLetterOrDigit()
 *
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isalnum(UChar32 c);

/**
 * Determines whether the specified code point is a hexadecimal digit.
 * This is equivalent to u_digit(c, 16)>=0.
 * True for characters with general category "Nd" (decimal digit numbers)
 * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
 * (That is, for letters with code points
 * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
 *
 * In order to narrow the definition of hexadecimal digits to only ASCII
 * characters, use (c<=0x7f && u_isxdigit(c)).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a hexadecimal digit
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isxdigit(UChar32 c);

/**
 * Determines whether the specified code point is a punctuation character.
 * True for characters with general categories "P" (punctuation).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a punctuation character
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_ispunct(UChar32 c);

/**
 * Determines whether the specified code point is a "graphic" character
 * (printable, excluding spaces).
 * true for all characters except those with general categories
 * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
 * "Cn" (unassigned), and "Z" (separators).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a "graphic" character
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isgraph(UChar32 c);

/**
 * Determines whether the specified code point is a "blank" or "horizontal space",
 * a character that visibly separates words on a line.
 * The following are equivalent definitions:
 *
 * true for Unicode White_Space characters except for "vertical space controls"
 * where "vertical space controls" are the following characters:
 * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
 *
 * same as
 *
 * true for U+0009 (TAB) and characters with general category "Zs" (space separators).
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a "blank"
 *
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isblank(UChar32 c);

/**
 * Determines whether the specified code point is "defined",
 * which usually means that it is assigned a character.
 * True for general categories other than "Cn" (other, not assigned),
 * i.e., true for all code points mentioned in UnicodeData.txt.
 *
 * Note that non-character code points (e.g., U+FDD0) are not "defined"
 * (they are Cn), but surrogate code points are "defined" (Cs).
 *
 * Same as java.lang.Character.isDefined().
 *
 * @param c the code point to be tested
 * @return true if the code point is assigned a character
 *
 * @see u_isdigit
 * @see u_isalpha
 * @see u_isalnum
 * @see u_isupper
 * @see u_islower
 * @see u_istitle
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isdefined(UChar32 c);

/**
 * Determines if the specified character is a space character or not.
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c    the character to be tested
 * @return  true if the character is a space character; false otherwise.
 *
 * @see u_isJavaSpaceChar
 * @see u_isWhitespace
 * @see u_isUWhiteSpace
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isspace(UChar32 c);

/**
 * Determine if the specified code point is a space character according to Java.
 * True for characters with general categories "Z" (separators),
 * which does not include control codes (e.g., TAB or Line Feed).
 *
 * Same as java.lang.Character.isSpaceChar().
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * @param c the code point to be tested
 * @return true if the code point is a space character according to Character.isSpaceChar()
 *
 * @see u_isspace
 * @see u_isWhitespace
 * @see u_isUWhiteSpace
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isJavaSpaceChar(UChar32 c);

/**
 * Determines if the specified code point is a whitespace character according to Java/ICU.
 * A character is considered to be a Java whitespace character if and only
 * if it satisfies one of the following criteria:
 *
 * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
 *      also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
 * - It is U+0009 HORIZONTAL TABULATION.
 * - It is U+000A LINE FEED.
 * - It is U+000B VERTICAL TABULATION.
 * - It is U+000C FORM FEED.
 * - It is U+000D CARRIAGE RETURN.
 * - It is U+001C FILE SEPARATOR.
 * - It is U+001D GROUP SEPARATOR.
 * - It is U+001E RECORD SEPARATOR.
 * - It is U+001F UNIT SEPARATOR.
 *
 * This API tries to sync with the semantics of Java's
 * java.lang.Character.isWhitespace(), but it may not return
 * the exact same results because of the Unicode version
 * difference.
 *
 * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
 * See http://www.unicode.org/versions/Unicode4.0.1/
 *
 * Note: There are several ICU whitespace functions; please see the uchar.h
 * file documentation for a detailed comparison.
 *
 * @param c the code point to be tested
 * @return true if the code point is a whitespace character according to Java/ICU
 *
 * @see u_isspace
 * @see u_isJavaSpaceChar
 * @see u_isUWhiteSpace
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isWhitespace(UChar32 c);

/**
 * Determines whether the specified code point is a control character
 * (as defined by this function).
 * A control character is one of the following:
 * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
 * - U_CONTROL_CHAR (Cc)
 * - U_FORMAT_CHAR (Cf)
 * - U_LINE_SEPARATOR (Zl)
 * - U_PARAGRAPH_SEPARATOR (Zp)
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a control character
 *
 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
 * @see u_isprint
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_iscntrl(UChar32 c);

/**
 * Determines whether the specified code point is an ISO control code.
 * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
 *
 * Same as java.lang.Character.isISOControl().
 *
 * @param c the code point to be tested
 * @return true if the code point is an ISO control code
 *
 * @see u_iscntrl
 * @stable ICU 2.6
 */
U_CAPI UBool U_EXPORT2
u_isISOControl(UChar32 c);

/**
 * Determines whether the specified code point is a printable character.
 * True for general categories <em>other</em> than "C" (controls).
 *
 * This is a C/POSIX migration function.
 * See the comments about C/POSIX character classification functions in the
 * documentation at the top of this header file.
 *
 * @param c the code point to be tested
 * @return true if the code point is a printable character
 *
 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
 * @see u_iscntrl
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isprint(UChar32 c);

/**
 * Non-standard: Determines whether the specified code point is a base character.
 * True for general categories "L" (letters), "N" (numbers),
 * "Mc" (spacing combining marks), and "Me" (enclosing marks).
 *
 * Note that this is different from the Unicode Standard definition in
 * chapter 3.6, conformance clause D51 “Base character”,
 * which defines base characters as the code points with general categories
 * Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs).
 *
 * @param c the code point to be tested
 * @return true if the code point is a base character according to this function
 *
 * @see u_isalpha
 * @see u_isdigit
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isbase(UChar32 c);

/**
 * Returns the bidirectional category value for the code point,
 * which is used in the Unicode bidirectional algorithm
 * (UAX #9 http://www.unicode.org/reports/tr9/).
 * Note that some <em>unassigned</em> code points have bidi values
 * of R or AL because they are in blocks that are reserved
 * for Right-To-Left scripts.
 *
 * Same as java.lang.Character.getDirectionality()
 *
 * @param c the code point to be tested
 * @return the bidirectional category (UCharDirection) value
 *
 * @see UCharDirection
 * @stable ICU 2.0
 */
U_CAPI UCharDirection U_EXPORT2
u_charDirection(UChar32 c);

/**
 * Determines whether the code point has the Bidi_Mirrored property.
 * This property is set for characters that are commonly used in
 * Right-To-Left contexts and need to be displayed with a "mirrored"
 * glyph.
 *
 * Same as java.lang.Character.isMirrored().
 * Same as UCHAR_BIDI_MIRRORED
 *
 * @param c the code point to be tested
 * @return true if the character has the Bidi_Mirrored property
 *
 * @see UCHAR_BIDI_MIRRORED
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isMirrored(UChar32 c);

/**
 * Maps the specified character to a "mirror-image" character.
 * For characters with the Bidi_Mirrored property, implementations
 * sometimes need a "poor man's" mapping to another Unicode
 * character (code point) such that the default glyph may serve
 * as the mirror-image of the default glyph of the specified
 * character. This is useful for text conversion to and from
 * codepages with visual order, and for displays without glyph
 * selection capabilities.
 *
 * @param c the code point to be mapped
 * @return another Unicode code point that may serve as a mirror-image
 *         substitute, or c itself if there is no such mapping or c
 *         does not have the Bidi_Mirrored property
 *
 * @see UCHAR_BIDI_MIRRORED
 * @see u_isMirrored
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_charMirror(UChar32 c);

/**
 * Maps the specified character to its paired bracket character.
 * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
 * Otherwise c itself is returned.
 * See http://www.unicode.org/reports/tr9/
 *
 * @param c the code point to be mapped
 * @return the paired bracket code point,
 *         or c itself if there is no such mapping
 *         (Bidi_Paired_Bracket_Type=None)
 *
 * @see UCHAR_BIDI_PAIRED_BRACKET
 * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
 * @see u_charMirror
 * @stable ICU 52
 */
U_CAPI UChar32 U_EXPORT2
u_getBidiPairedBracket(UChar32 c);

/**
 * Returns the general category value for the code point.
 *
 * Same as java.lang.Character.getType().
 *
 * @param c the code point to be tested
 * @return the general category (UCharCategory) value
 *
 * @see UCharCategory
 * @stable ICU 2.0
 */
U_CAPI int8_t U_EXPORT2
u_charType(UChar32 c);

/**
 * Get a single-bit bit set for the general category of a character.
 * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
 * Same as U_MASK(u_charType(c)).
 *
 * @param c the code point to be tested
 * @return a single-bit mask corresponding to the general category (UCharCategory) value
 *
 * @see u_charType
 * @see UCharCategory
 * @see U_GC_CN_MASK
 * @stable ICU 2.1
 */
#define U_GET_GC_MASK(c) …

/**
 * Callback from u_enumCharTypes(), is called for each contiguous range
 * of code points c (where start<=c<limit)
 * with the same Unicode general category ("character type").
 *
 * The callback function can stop the enumeration by returning false.
 *
 * @param context an opaque pointer, as passed into utrie_enum()
 * @param start the first code point in a contiguous range with value
 * @param limit one past the last code point in a contiguous range with value
 * @param type the general category for all code points in [start..limit[
 * @return false to stop the enumeration
 *
 * @stable ICU 2.1
 * @see UCharCategory
 * @see u_enumCharTypes
 */
UCharEnumTypeRange;

/**
 * Enumerate efficiently all code points with their Unicode general categories.
 *
 * This is useful for building data structures (e.g., UnicodeSet's),
 * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
 *
 * For each contiguous range of code points with a given general category ("character type"),
 * the UCharEnumTypeRange function is called.
 * Adjacent ranges have different types.
 * The Unicode Standard guarantees that the numeric value of the type is 0..31.
 *
 * @param enumRange a pointer to a function that is called for each contiguous range
 *                  of code points with the same general category
 * @param context an opaque pointer that is passed on to the callback function
 *
 * @stable ICU 2.1
 * @see UCharCategory
 * @see UCharEnumTypeRange
 */
U_CAPI void U_EXPORT2
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);

#if !UCONFIG_NO_NORMALIZATION

/**
 * Returns the combining class of the code point as specified in UnicodeData.txt.
 *
 * @param c the code point of the character
 * @return the combining class of the character
 * @stable ICU 2.0
 */
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass(UChar32 c);

#endif

/**
 * Returns the decimal digit value of a decimal digit character.
 * Such characters have the general category "Nd" (decimal digit numbers)
 * and a Numeric_Type of Decimal.
 *
 * Unlike ICU releases before 2.6, no digit values are returned for any
 * Han characters because Han number characters are often used with a special
 * Chinese-style number format (with characters for powers of 10 in between)
 * instead of in decimal-positional notation.
 * Unicode 4 explicitly assigns Han number characters the Numeric_Type
 * Numeric instead of Decimal.
 * See Jitterbug 1483 for more details.
 *
 * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
 * for complete numeric Unicode properties.
 *
 * @param c the code point for which to get the decimal digit value
 * @return the decimal digit value of c,
 *         or -1 if c is not a decimal digit character
 *
 * @see u_getNumericValue
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
u_charDigitValue(UChar32 c);

/**
 * Returns the Unicode allocation block that contains the character.
 *
 * @param c the code point to be tested
 * @return the block value (UBlockCode) for c
 *
 * @see UBlockCode
 * @stable ICU 2.0
 */
U_CAPI UBlockCode U_EXPORT2
ublock_getCode(UChar32 c);

/**
 * Retrieve the name of a Unicode character.
 * Depending on <code>nameChoice</code>, the character name written
 * into the buffer is the "modern" name or the name that was defined
 * in Unicode version 1.0.
 * The name contains only "invariant" characters
 * like A-Z, 0-9, space, and '-'.
 * Unicode 1.0 names are only retrieved if they are different from the modern
 * names and if the data file contains the data for them. gennames may or may
 * not be called with a command line option to include 1.0 names in unames.dat.
 *
 * @param code The character (code point) for which to get the name.
 *             It must be <code>0<=code<=0x10ffff</code>.
 * @param nameChoice Selector for which name to get.
 * @param buffer Destination address for copying the name.
 *               The name will always be zero-terminated.
 *               If there is no name, then the buffer will be set to the empty string.
 * @param bufferLength <code>==sizeof(buffer)</code>
 * @param pErrorCode Pointer to a UErrorCode variable;
 *        check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
 *        returns.
 * @return The length of the name, or 0 if there is no name for this character.
 *         If the bufferLength is less than or equal to the length, then the buffer
 *         contains the truncated name and the returned length indicates the full
 *         length of the name.
 *         The length does not include the zero-termination.
 *
 * @see UCharNameChoice
 * @see u_charFromName
 * @see u_enumCharNames
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
u_charName(UChar32 code, UCharNameChoice nameChoice,
           char *buffer, int32_t bufferLength,
           UErrorCode *pErrorCode);

#ifndef U_HIDE_DEPRECATED_API
/**
 * Returns an empty string.
 * Used to return the ISO 10646 comment for a character.
 * The Unicode ISO_Comment property is deprecated and has no values.
 *
 * @param c The character (code point) for which to get the ISO comment.
 *             It must be <code>0<=c<=0x10ffff</code>.
 * @param dest Destination address for copying the comment.
 *             The comment will be zero-terminated if possible.
 *             If there is no comment, then the buffer will be set to the empty string.
 * @param destCapacity <code>==sizeof(dest)</code>
 * @param pErrorCode Pointer to a UErrorCode variable;
 *        check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
 *        returns.
 * @return 0
 *
 * @deprecated ICU 49
 */
U_DEPRECATED int32_t U_EXPORT2
u_getISOComment(UChar32 c,
                char *dest, int32_t destCapacity,
                UErrorCode *pErrorCode);
#endif  /* U_HIDE_DEPRECATED_API */

/**
 * Find a Unicode character by its name and return its code point value.
 * The name is matched exactly and completely.
 * If the name does not correspond to a code point, <i>pErrorCode</i>
 * is set to <code>U_INVALID_CHAR_FOUND</code>.
 * A Unicode 1.0 name is matched only if it differs from the modern name.
 * Unicode names are all uppercase. Extended names are lowercase followed
 * by an uppercase hexadecimal number, and within angle brackets.
 *
 * @param nameChoice Selector for which name to match.
 * @param name The name to match.
 * @param pErrorCode Pointer to a UErrorCode variable
 * @return The Unicode value of the code point with the given name,
 *         or an undefined value if there is no such code point.
 *
 * @see UCharNameChoice
 * @see u_charName
 * @see u_enumCharNames
 * @stable ICU 1.7
 */
U_CAPI UChar32 U_EXPORT2
u_charFromName(UCharNameChoice nameChoice,
               const char *name,
               UErrorCode *pErrorCode);

/**
 * Type of a callback function for u_enumCharNames() that gets called
 * for each Unicode character with the code point value and
 * the character name.
 * If such a function returns false, then the enumeration is stopped.
 *
 * @param context The context pointer that was passed to u_enumCharNames().
 * @param code The Unicode code point for the character with this name.
 * @param nameChoice Selector for which kind of names is enumerated.
 * @param name The character's name, zero-terminated.
 * @param length The length of the name.
 * @return true if the enumeration should continue, false to stop it.
 *
 * @see UCharNameChoice
 * @see u_enumCharNames
 * @stable ICU 1.7
 */
UEnumCharNamesFn;

/**
 * Enumerate all assigned Unicode characters between the start and limit
 * code points (start inclusive, limit exclusive) and call a function
 * for each, passing the code point value and the character name.
 * For Unicode 1.0 names, only those are enumerated that differ from the
 * modern names.
 *
 * @param start The first code point in the enumeration range.
 * @param limit One more than the last code point in the enumeration range
 *              (the first one after the range).
 * @param fn The function that is to be called for each character name.
 * @param context An arbitrary pointer that is passed to the function.
 * @param nameChoice Selector for which kind of names to enumerate.
 * @param pErrorCode Pointer to a UErrorCode variable
 *
 * @see UCharNameChoice
 * @see UEnumCharNamesFn
 * @see u_charName
 * @see u_charFromName
 * @stable ICU 1.7
 */
U_CAPI void U_EXPORT2
u_enumCharNames(UChar32 start, UChar32 limit,
                UEnumCharNamesFn *fn,
                void *context,
                UCharNameChoice nameChoice,
                UErrorCode *pErrorCode);

/**
 * Return the Unicode name for a given property, as given in the
 * Unicode database file PropertyAliases.txt.
 *
 * In addition, this function maps the property
 * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
 * "General_Category_Mask".  These names are not in
 * PropertyAliases.txt.
 *
 * @param property UProperty selector other than UCHAR_INVALID_CODE.
 *         If out of range, NULL is returned.
 *
 * @param nameChoice selector for which name to get.  If out of range,
 *         NULL is returned.  All properties have a long name.  Most
 *         have a short name, but some do not.  Unicode allows for
 *         additional names; if present these will be returned by
 *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
 *
 * @return a pointer to the name, or NULL if either the
 *         property or the nameChoice is out of range.  If a given
 *         nameChoice returns NULL, then all larger values of
 *         nameChoice will return NULL, with one exception: if NULL is
 *         returned for U_SHORT_PROPERTY_NAME, then
 *         U_LONG_PROPERTY_NAME (and higher) may still return a
 *         non-NULL value.  The returned pointer is valid until
 *         u_cleanup() is called.
 *
 * @see UProperty
 * @see UPropertyNameChoice
 * @stable ICU 2.4
 */
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
                  UPropertyNameChoice nameChoice);

/**
 * Return the UProperty enum for a given property name, as specified
 * in the Unicode database file PropertyAliases.txt.  Short, long, and
 * any other variants are recognized.
 *
 * In addition, this function maps the synthetic names "gcm" /
 * "General_Category_Mask" to the property
 * UCHAR_GENERAL_CATEGORY_MASK.  These names are not in
 * PropertyAliases.txt.
 *
 * @param alias the property name to be matched.  The name is compared
 *         using "loose matching" as described in PropertyAliases.txt.
 *
 * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
 *         does not match any property.
 *
 * @see UProperty
 * @stable ICU 2.4
 */
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias);

/**
 * Return the Unicode name for a given property value, as given in the
 * Unicode database file PropertyValueAliases.txt.
 *
 * Note: Some of the names in PropertyValueAliases.txt can only be
 * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
 * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
 *
 * @param property UProperty selector constant.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
 *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
 *        If out of range, NULL is returned.
 *
 * @param value selector for a value for the given property.  If out
 *         of range, NULL is returned.  In general, valid values range
 *         from 0 up to some maximum.  There are a few exceptions:
 *         (1.) UCHAR_BLOCK values begin at the non-zero value
 *         UBLOCK_BASIC_LATIN.  (2.)  UCHAR_CANONICAL_COMBINING_CLASS
 *         values are not contiguous and range from 0..240.  (3.)
 *         UCHAR_GENERAL_CATEGORY_MASK values are not values of
 *         UCharCategory, but rather mask values produced by
 *         U_GET_GC_MASK().  This allows grouped categories such as
 *         [:L:] to be represented.  Mask values range
 *         non-contiguously from 1..U_GC_P_MASK.
 *
 * @param nameChoice selector for which name to get.  If out of range,
 *         NULL is returned.  All values have a long name.  Most have
 *         a short name, but some do not.  Unicode allows for
 *         additional names; if present these will be returned by
 *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...

 * @return a pointer to the name, or NULL if either the
 *         property or the nameChoice is out of range.  If a given
 *         nameChoice returns NULL, then all larger values of
 *         nameChoice will return NULL, with one exception: if NULL is
 *         returned for U_SHORT_PROPERTY_NAME, then
 *         U_LONG_PROPERTY_NAME (and higher) may still return a
 *         non-NULL value.  The returned pointer is valid until
 *         u_cleanup() is called.
 *
 * @see UProperty
 * @see UPropertyNameChoice
 * @stable ICU 2.4
 */
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
                       int32_t value,
                       UPropertyNameChoice nameChoice);

/**
 * Return the property value integer for a given value name, as
 * specified in the Unicode database file PropertyValueAliases.txt.
 * Short, long, and any other variants are recognized.
 *
 * Note: Some of the names in PropertyValueAliases.txt will only be
 * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
 * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
 *
 * @param property UProperty selector constant.
 *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
 *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
 *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
 *        If out of range, UCHAR_INVALID_CODE is returned.
 *
 * @param alias the value name to be matched.  The name is compared
 *         using "loose matching" as described in
 *         PropertyValueAliases.txt.
 *
 * @return a value integer or UCHAR_INVALID_CODE if the given name
 *         does not match any value of the given property, or if the
 *         property is invalid.  Note: UCHAR_GENERAL_CATEGORY_MASK values
 *         are not values of UCharCategory, but rather mask values
 *         produced by U_GET_GC_MASK().  This allows grouped
 *         categories such as [:L:] to be represented.
 *
 * @see UProperty
 * @stable ICU 2.4
 */
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
                       const char* alias);

/**
 * Determines if the specified character is permissible as the first character in an identifier
 * according to UAX #31 Unicode Identifier and Pattern Syntax.
 *
 * Same as Unicode ID_Start (UCHAR_ID_START).
 *
 * @param c the code point to be tested
 * @return true if the code point may start an identifier
 *
 * @see UCHAR_ID_START
 * @see u_isalpha
 * @see u_isIDPart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isIDStart(UChar32 c);

/**
 * Determines if the specified character is permissible as a non-initial character of an identifier
 * according to UAX #31 Unicode Identifier and Pattern Syntax.
 *
 * Same as Unicode ID_Continue (UCHAR_ID_CONTINUE).
 *
 * @param c the code point to be tested
 * @return true if the code point may occur as a non-initial character of an identifier
 *
 * @see UCHAR_ID_CONTINUE
 * @see u_isIDStart
 * @see u_isIDIgnorable
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isIDPart(UChar32 c);

#ifndef U_HIDE_DRAFT_API
/**
 * Does the set of Identifier_Type values code point c contain the given type?
 *
 * Used for UTS #39 General Security Profile for Identifiers
 * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
 *
 * Each code point maps to a <i>set</i> of UIdentifierType values.
 *
 * @param c code point
 * @param type Identifier_Type to check
 * @return true if type is in Identifier_Type(c)
 * @draft ICU 75
 */
U_CAPI bool U_EXPORT2
u_hasIDType(UChar32 c, UIdentifierType type);

/**
 * Writes code point c's Identifier_Type as a list of UIdentifierType values
 * to the output types array and returns the number of types.
 *
 * Used for UTS #39 General Security Profile for Identifiers
 * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
 *
 * Each code point maps to a <i>set</i> of UIdentifierType values.
 * There is always at least one type.
 * The order of output values is undefined.
 * Each type is output at most once;
 * there cannot be more output values than UIdentifierType constants.
 * In addition, only some of the types can be combined with others,
 * and usually only a small number of types occur together.
 * Future versions might add additional types.
 * See UTS #39 and its data files for details.
 *
 * If there are more than capacity types to be written, then
 * U_BUFFER_OVERFLOW_ERROR is set and the number of types is returned.
 * (Usual ICU buffer handling behavior.)
 *
 * @param c code point
 * @param types output array
 * @param capacity capacity of the array
 * @param pErrorCode Standard ICU error code. Its input value must
 *                   pass the U_SUCCESS() test, or else the function returns
 *                   immediately. Check for U_FAILURE() on output or use with
 *                   function chaining. (See User Guide for details.)
 * @return number of values in c's Identifier_Type,
 *         written to types unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
 * @draft ICU 75
 */
U_CAPI int32_t U_EXPORT2
u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode);
#endif  // U_HIDE_DRAFT_API

/**
 * Determines if the specified character should be regarded
 * as an ignorable character in an identifier,
 * according to Java.
 * True for characters with general category "Cf" (format controls) as well as
 * non-whitespace ISO controls
 * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
 *
 * Same as java.lang.Character.isIdentifierIgnorable().
 *
 * Note that Unicode just recommends to ignore Cf (format controls).
 *
 * @param c the code point to be tested
 * @return true if the code point is ignorable in identifiers according to Java
 *
 * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
 * @see u_isIDStart
 * @see u_isIDPart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isIDIgnorable(UChar32 c);

/**
 * Determines if the specified character is permissible as the
 * first character in a Java identifier.
 * In addition to u_isIDStart(c), true for characters with
 * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
 *
 * Same as java.lang.Character.isJavaIdentifierStart().
 *
 * @param c the code point to be tested
 * @return true if the code point may start a Java identifier
 *
 * @see     u_isJavaIDPart
 * @see     u_isalpha
 * @see     u_isIDStart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isJavaIDStart(UChar32 c);

/**
 * Determines if the specified character is permissible
 * in a Java identifier.
 * In addition to u_isIDPart(c), true for characters with
 * general category "Sc" (currency symbols).
 *
 * Same as java.lang.Character.isJavaIdentifierPart().
 *
 * @param c the code point to be tested
 * @return true if the code point may occur in a Java identifier
 *
 * @see     u_isIDIgnorable
 * @see     u_isJavaIDStart
 * @see     u_isalpha
 * @see     u_isdigit
 * @see     u_isIDPart
 * @stable ICU 2.0
 */
U_CAPI UBool U_EXPORT2
u_isJavaIDPart(UChar32 c);

/**
 * The given character is mapped to its lowercase equivalent according to
 * UnicodeData.txt; if the character has no lowercase equivalent, the character
 * itself is returned.
 *
 * Same as java.lang.Character.toLowerCase().
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @return the Simple_Lowercase_Mapping of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_tolower(UChar32 c);

/**
 * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
 * if the character has no uppercase equivalent, the character itself is
 * returned.
 *
 * Same as java.lang.Character.toUpperCase().
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @return the Simple_Uppercase_Mapping of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_toupper(UChar32 c);

/**
 * The given character is mapped to its titlecase equivalent
 * according to UnicodeData.txt;
 * if none is defined, the character itself is returned.
 *
 * Same as java.lang.Character.toTitleCase().
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @return the Simple_Titlecase_Mapping of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_totitle(UChar32 c);

/**
 * The given character is mapped to its case folding equivalent according to
 * UnicodeData.txt and CaseFolding.txt;
 * if the character has no case folding equivalent, the character
 * itself is returned.
 *
 * This function only returns the simple, single-code point case mapping.
 * Full case mappings should be used whenever possible because they produce
 * better results by working on whole strings.
 * They take into account the string context and the language and can map
 * to a result string with a different length as appropriate.
 * Full case mappings are applied by the string case mapping functions,
 * see ustring.h and the UnicodeString class.
 * See also the User Guide chapter on C/POSIX migration:
 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
 *
 * @param c the code point to be mapped
 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
 * @return the Simple_Case_Folding of the code point, if any;
 *         otherwise the code point itself.
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_foldCase(UChar32 c, uint32_t options);

/**
 * Returns the decimal digit value of the code point in the
 * specified radix.
 *
 * If the radix is not in the range <code>2<=radix<=36</code> or if the
 * value of <code>c</code> is not a valid digit in the specified
 * radix, <code>-1</code> is returned. A character is a valid digit
 * if at least one of the following is true:
 * <ul>
 * <li>The character has a decimal digit value.
 *     Such characters have the general category "Nd" (decimal digit numbers)
 *     and a Numeric_Type of Decimal.
 *     In this case the value is the character's decimal digit value.</li>
 * <li>The character is one of the uppercase Latin letters
 *     <code>'A'</code> through <code>'Z'</code>.
 *     In this case the value is <code>c-'A'+10</code>.</li>
 * <li>The character is one of the lowercase Latin letters
 *     <code>'a'</code> through <code>'z'</code>.
 *     In this case the value is <code>ch-'a'+10</code>.</li>
 * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
 *     as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
 *     are recognized.</li>
 * </ul>
 *
 * Same as java.lang.Character.digit().
 *
 * @param   ch      the code point to be tested.
 * @param   radix   the radix.
 * @return  the numeric value represented by the character in the
 *          specified radix,
 *          or -1 if there is no value or if the value exceeds the radix.
 *
 * @see     UCHAR_NUMERIC_TYPE
 * @see     u_forDigit
 * @see     u_charDigitValue
 * @see     u_isdigit
 * @stable ICU 2.0
 */
U_CAPI int32_t U_EXPORT2
u_digit(UChar32 ch, int8_t radix);

/**
 * Determines the character representation for a specific digit in
 * the specified radix. If the value of <code>radix</code> is not a
 * valid radix, or the value of <code>digit</code> is not a valid
 * digit in the specified radix, the null character
 * (<code>U+0000</code>) is returned.
 * <p>
 * The <code>radix</code> argument is valid if it is greater than or
 * equal to 2 and less than or equal to 36.
 * The <code>digit</code> argument is valid if
 * <code>0 <= digit < radix</code>.
 * <p>
 * If the digit is less than 10, then
 * <code>'0' + digit</code> is returned. Otherwise, the value
 * <code>'a' + digit - 10</code> is returned.
 *
 * Same as java.lang.Character.forDigit().
 *
 * @param   digit   the number to convert to a character.
 * @param   radix   the radix.
 * @return  the <code>char</code> representation of the specified digit
 *          in the specified radix.
 *
 * @see     u_digit
 * @see     u_charDigitValue
 * @see     u_isdigit
 * @stable ICU 2.0
 */
U_CAPI UChar32 U_EXPORT2
u_forDigit(int32_t digit, int8_t radix);

/**
 * Get the "age" of the code point.
 * The "age" is the Unicode version when the code point was first
 * designated (as a non-character or for Private Use)
 * or assigned a character.
 * This can be useful to avoid emitting code points to receiving
 * processes that do not accept newer characters.
 * The data is from the UCD file DerivedAge.txt.
 *
 * @param c The code point.
 * @param versionArray The Unicode version number array, to be filled in.
 *
 * @stable ICU 2.1
 */
U_CAPI void U_EXPORT2
u_charAge(UChar32 c, UVersionInfo versionArray);

/**
 * Gets the Unicode version information.
 * The version array is filled in with the version information
 * for the Unicode standard that is currently used by ICU.
 * For example, Unicode version 3.1.1 is represented as an array with
 * the values { 3, 1, 1, 0 }.
 *
 * @param versionArray an output array that will be filled in with
 *                     the Unicode version number
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2
u_getUnicodeVersion(UVersionInfo versionArray);

#if !UCONFIG_NO_NORMALIZATION
/**
 * Get the FC_NFKC_Closure property string for a character.
 * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
 * or for "FNC": http://www.unicode.org/reports/tr15/
 *
 * @param c The character (code point) for which to get the FC_NFKC_Closure string.
 *             It must be <code>0<=c<=0x10ffff</code>.
 * @param dest Destination address for copying the string.
 *             The string will be zero-terminated if possible.
 *             If there is no FC_NFKC_Closure string,
 *             then the buffer will be set to the empty string.
 * @param destCapacity <code>==sizeof(dest)</code>
 * @param pErrorCode Pointer to a UErrorCode variable.
 * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
 *         If the destCapacity is less than or equal to the length, then the buffer
 *         contains the truncated name and the returned length indicates the full
 *         length of the name.
 *         The length does not include the zero-termination.
 *
 * @stable ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);

#endif


U_CDECL_END

#endif /*_UCHAR*/
/*eof*/
godot/thirdparty/icu4c/common/unicode/uchar.h