unicodectype.c | Explore in Territory

/*
   Unicode character type helpers.

   Written by Marc-Andre Lemburg ([email protected]).
   Modified for Python 2.0 by Fredrik Lundh ([email protected])

   Copyright (c) Corporation for National Research Initiatives.

*/

#include "Python.h"

#define ALPHA_MASK …
#define DECIMAL_MASK …
#define DIGIT_MASK …
#define LOWER_MASK …
#define TITLE_MASK …
#define UPPER_MASK …
#define XID_START_MASK …
#define XID_CONTINUE_MASK …
#define PRINTABLE_MASK …
#define NUMERIC_MASK …
#define CASE_IGNORABLE_MASK …
#define CASED_MASK …
#define EXTENDED_CASE_MASK …

_PyUnicode_TypeRecord;

#include "unicodetype_db.h"

static const _PyUnicode_TypeRecord *
gettyperecord(Py_UCS4 code)
{ … }

/* Returns the titlecase Unicode characters corresponding to ch or just
   ch if no titlecase mapping is known. */

Py_UCS4 _PyUnicode_ToTitlecase(Py_UCS4 ch)
{ … }

/* Returns 1 for Unicode characters having the category 'Lt', 0
   otherwise. */

int _PyUnicode_IsTitlecase(Py_UCS4 ch)
{ … }

/* Returns 1 for Unicode characters having the XID_Start property, 0
   otherwise. */

int _PyUnicode_IsXidStart(Py_UCS4 ch)
{ … }

/* Returns 1 for Unicode characters having the XID_Continue property,
   0 otherwise. */

int _PyUnicode_IsXidContinue(Py_UCS4 ch)
{ … }

/* Returns the integer decimal (0-9) for Unicode characters having
   this property, -1 otherwise. */

int _PyUnicode_ToDecimalDigit(Py_UCS4 ch)
{ … }

int _PyUnicode_IsDecimalDigit(Py_UCS4 ch)
{ … }

/* Returns the integer digit (0-9) for Unicode characters having
   this property, -1 otherwise. */

int _PyUnicode_ToDigit(Py_UCS4 ch)
{ … }

int _PyUnicode_IsDigit(Py_UCS4 ch)
{ … }

/* Returns the numeric value as double for Unicode characters having
   this property, -1.0 otherwise. */

int _PyUnicode_IsNumeric(Py_UCS4 ch)
{ … }

/* Returns 1 for Unicode characters to be hex-escaped when repr()ed,
   0 otherwise.
   All characters except those characters defined in the Unicode character
   database as following categories are considered printable.
      * Cc (Other, Control)
      * Cf (Other, Format)
      * Cs (Other, Surrogate)
      * Co (Other, Private Use)
      * Cn (Other, Not Assigned)
      * Zl Separator, Line ('\u2028', LINE SEPARATOR)
      * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
      * Zs (Separator, Space) other than ASCII space('\x20').
*/
int _PyUnicode_IsPrintable(Py_UCS4 ch)
{ … }

/* Returns 1 for Unicode characters having the category 'Ll', 0
   otherwise. */

int _PyUnicode_IsLowercase(Py_UCS4 ch)
{ … }

/* Returns 1 for Unicode characters having the category 'Lu', 0
   otherwise. */

int _PyUnicode_IsUppercase(Py_UCS4 ch)
{ … }

/* Returns the uppercase Unicode characters corresponding to ch or just
   ch if no uppercase mapping is known. */

Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
{ … }

/* Returns the lowercase Unicode characters corresponding to ch or just
   ch if no lowercase mapping is known. */

Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
{ … }

int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
{ … }

int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res)
{ … }

int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
{ … }

int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res)
{ … }

int _PyUnicode_IsCased(Py_UCS4 ch)
{ … }

int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch)
{ … }

/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
   'Lo' or 'Lm',  0 otherwise. */

int _PyUnicode_IsAlpha(Py_UCS4 ch)
{ … }
cpython/Objects/unicodectype.c