// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 1999-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: uinvchar.c * encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * * created on: 2004sep14 * created by: Markus W. Scherer * * Functions for handling invariant characters, moved here from putil.c * for better modularization. */ #include "unicode/utypes.h" #include "unicode/ustring.h" #include "udataswp.h" #include "cstring.h" #include "cmemory.h" #include "uassert.h" #include "uinvchar.h" /* invariant-character handling --------------------------------------------- */ /* * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) * appropriately for most EBCDIC codepages. * * They currently also map most other ASCII graphic characters, * appropriately for codepages 37 and 1047. * Exceptions: The characters for []^ have different codes in 37 & 1047. * Both versions are mapped to ASCII. * * ASCII 37 1047 * [ 5B BA AD * ] 5D BB BD * ^ 5E B0 5F * * There are no mappings for variant characters from Unicode to EBCDIC. * * Currently, C0 control codes are also included in these maps. * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), * but there is no mapping for ASCII LF back to EBCDIC. * * ASCII EBCDIC S/390-OE * LF 0A 25 15 * NEL 85 15 25 * * The maps below explicitly exclude the variant * control and graphical characters that are in ASCII-based * codepages at 0x80 and above. * "No mapping" is expressed by mapping to a 00 byte. * * These tables do not establish a converter or a codepage. */ static const uint8_t asciiFromEbcdic[256]= …; static const uint8_t ebcdicFromAscii[256]= …; /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ static const uint8_t lowercaseAsciiFromEbcdic[256]= …; /* * Bit sets indicating which characters of the ASCII repertoire * (by ASCII/Unicode code) are "invariant". * See utypes.h for more details. * * As invariant are considered the characters of the ASCII repertoire except * for the following: * 21 '!' <exclamation mark> * 23 '#' <number sign> * 24 '$' <dollar sign> * * 40 '@' <commercial at> * * 5b '[' <left bracket> * 5c '\' <backslash> * 5d ']' <right bracket> * 5e '^' <circumflex> * * 60 '`' <grave accent> * * 7b '{' <left brace> * 7c '|' <vertical line> * 7d '}' <right brace> * 7e '~' <tilde> */ static const uint32_t invariantChars[4]= …; /* * test unsigned types (or values known to be non-negative) for invariant characters, * tests ASCII-family character values */ #define UCHAR_IS_INVARIANT(c) … /* test signed types for invariant characters, adds test for positive values */ #define SCHAR_IS_INVARIANT(c) … #if U_CHARSET_FAMILY==U_ASCII_FAMILY #define CHAR_TO_UCHAR(c) … #define UCHAR_TO_CHAR(c) … #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY #define CHAR_TO_UCHAR … #define UCHAR_TO_CHAR … #else # error U_CHARSET_FAMILY is not valid #endif U_CAPI void U_EXPORT2 u_charsToUChars(const char *cs, char16_t *us, int32_t length) { … } U_CAPI void U_EXPORT2 u_UCharsToChars(const char16_t *us, char *cs, int32_t length) { … } U_CAPI UBool U_EXPORT2 uprv_isInvariantString(const char *s, int32_t length) { … } U_CAPI UBool U_EXPORT2 uprv_isInvariantUString(const char16_t *s, int32_t length) { … } /* UDataSwapFn implementations used in udataswp.c ------- */ /* convert ASCII to EBCDIC and verify that all characters are invariant */ U_CAPI int32_t U_EXPORT2 uprv_ebcdicFromAscii(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { … } /* this function only checks and copies ASCII strings without conversion */ U_CFUNC int32_t uprv_copyAscii(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { … } /* convert EBCDIC to ASCII and verify that all characters are invariant */ U_CFUNC int32_t uprv_asciiFromEbcdic(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { … } /* this function only checks and copies EBCDIC strings without conversion */ U_CFUNC int32_t uprv_copyEbcdic(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { … } U_CAPI UBool uprv_isEbcdicAtSign(char c) { … } /* compare invariant strings; variant characters compare less than others and unlike each other */ U_CFUNC int32_t uprv_compareInvAscii(const UDataSwapper *ds, const char *outString, int32_t outLength, const char16_t *localString, int32_t localLength) { … } U_CFUNC int32_t uprv_compareInvEbcdic(const UDataSwapper *ds, const char *outString, int32_t outLength, const char16_t *localString, int32_t localLength) { … } U_CAPI int32_t U_EXPORT2 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { … } U_CAPI char U_EXPORT2 uprv_ebcdicToAscii(char c) { … } U_CAPI char U_EXPORT2 uprv_ebcdicToLowercaseAscii(char c) { … } U_CAPI uint8_t* U_EXPORT2 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) { … } U_CAPI uint8_t* U_EXPORT2 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) { … }