// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (C) 1999-2009, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * * * ucnv_err.h: */ /** * \file * \brief C API: UConverter predefined error callbacks * * <h2>Error Behaviour Functions</h2> * Defines some error behaviour functions called by ucnv_{from,to}Unicode * These are provided as part of ICU and many are stable, but they * can also be considered only as an example of what can be done with * callbacks. You may of course write your own. * * If you want to write your own, you may also find the functions from * ucnv_cb.h useful when writing your own callbacks. * * These functions, although public, should NEVER be called directly. * They should be used as parameters to the ucnv_setFromUCallback * and ucnv_setToUCallback functions, to set the behaviour of a converter * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. * * usage example: 'STOP' doesn't need any context, but newContext * could be set to something other than 'NULL' if needed. The available * contexts in this header can modify the default behavior of the callback. * * \code * UErrorCode err = U_ZERO_ERROR; * UConverter *myConverter = ucnv_open("ibm-949", &err); * const void *oldContext; * UConverterFromUCallback oldAction; * * * if (U_SUCCESS(err)) * { * ucnv_setFromUCallBack(myConverter, * UCNV_FROM_U_CALLBACK_STOP, * NULL, * &oldAction, * &oldContext, * &status); * } * \endcode * * The code above tells "myConverter" to stop when it encounters an * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, * and ucnv_setToUCallBack would need to be called in order to change * that behavior too. * * Here is an example with a context: * * \code * UErrorCode err = U_ZERO_ERROR; * UConverter *myConverter = ucnv_open("ibm-949", &err); * const void *oldContext; * UConverterFromUCallback oldAction; * * * if (U_SUCCESS(err)) * { * ucnv_setToUCallBack(myConverter, * UCNV_TO_U_CALLBACK_SUBSTITUTE, * UCNV_SUB_STOP_ON_ILLEGAL, * &oldAction, * &oldContext, * &status); * } * \endcode * * The code above tells "myConverter" to stop when it encounters an * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from * Codepage -> Unicode. Any unmapped and legal characters will be * substituted to be the default substitution character. */ #ifndef UCNV_ERR_H #define UCNV_ERR_H #include "unicode/utypes.h" #if !UCONFIG_NO_CONVERSION /** Forward declaring the UConverter structure. @stable ICU 2.0 */ struct UConverter; /** @stable ICU 2.0 */ UConverter; /** * FROM_U, TO_U context options for sub callback * @stable ICU 2.0 */ #define UCNV_SUB_STOP_ON_ILLEGAL … /** * FROM_U, TO_U context options for skip callback * @stable ICU 2.0 */ #define UCNV_SKIP_STOP_ON_ILLEGAL … /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_ICU … /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_JAVA … /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_C … /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_DEC … /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_HEX … /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_UNICODE … /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is, * a backslash, 1..6 hex digits, and a space) * @stable ICU 4.0 */ #define UCNV_ESCAPE_CSS2 … /** * The process condition code to be used with the callbacks. * Codes which are greater than UCNV_IRREGULAR should be * passed on to any chained callbacks. * @stable ICU 2.0 */ UConverterCallbackReason; /** * The structure for the fromUnicode callback function parameter. * @stable ICU 2.0 */ UConverterFromUnicodeArgs; /** * The structure for the toUnicode callback function parameter. * @stable ICU 2.0 */ UConverterToUnicodeArgs; /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, * returning the error code back to the caller immediately. * * @param context Pointer to the callback's private data * @param fromUArgs Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence * @param length Size (in bytes) of the concerned codepage sequence * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. * @param reason Defines the reason the callback was invoked * @param err This should always be set to a failure status prior to calling. * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err); /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, * returning the error code back to the caller immediately. * * @param context Pointer to the callback's private data * @param toUArgs Information about the conversion in progress * @param codeUnits Points to 'length' bytes of the concerned codepage sequence * @param length Size (in bytes) of the concerned codepage sequence * @param reason Defines the reason the callback was invoked * @param err This should always be set to a failure status prior to calling. * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( const void *context, UConverterToUnicodeArgs *toUArgs, const char* codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode * err); /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This From Unicode callback skips any ILLEGAL_SEQUENCE, or * skips only UNASSIGNED_SEQUENCE depending on the context parameter * simply ignoring those characters. * * @param context The function currently recognizes the callback options: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, * returning the error code back to the caller immediately. * NULL: Skips any ILLEGAL_SEQUENCE * @param fromUArgs Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence * @param length Size (in bytes) of the concerned codepage sequence * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. * @param reason Defines the reason the callback was invoked * @param err Return value will be set to success if the callback was handled, * otherwise this value will be set to a failure status. * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err); /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or * UNASSIGNED_SEQUENCE depending on context parameter, with the * current substitution string for the converter. This is the default * callback. * * @param context The function currently recognizes the callback options: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, * returning the error code back to the caller immediately. * NULL: Substitutes any ILLEGAL_SEQUENCE * @param fromUArgs Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence * @param length Size (in bytes) of the concerned codepage sequence * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. * @param reason Defines the reason the callback was invoked * @param err Return value will be set to success if the callback was handled, * otherwise this value will be set to a failure status. * @see ucnv_setSubstChars * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err); /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the * hexadecimal representation of the illegal codepoints * * @param context The function currently recognizes the callback options: * <ul> * <li>UCNV_ESCAPE_ICU: Substitutes the ILLEGAL SEQUENCE with the hexadecimal * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], * it will substitute the illegal sequence with the substitution characters. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as * %UD84D%UDC56</li> * <li>UCNV_ESCAPE_JAVA: Substitutes the ILLEGAL SEQUENCE with the hexadecimal * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], * it will substitute the illegal sequence with the substitution characters. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as * \\uD84D\\uDC56</li> * <li>UCNV_ESCAPE_C: Substitutes the ILLEGAL SEQUENCE with the hexadecimal * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], * it will substitute the illegal sequence with the substitution characters. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as * \\U00023456</li> * <li>UCNV_ESCAPE_XML_DEC: Substitutes the ILLEGAL SEQUENCE with the decimal * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly. * In the Event the converter doesn't support the characters {&,#}[0-9], * it will substitute the illegal sequence with the substitution characters. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as * &#144470; and Zero padding is ignored.</li> * <li>UCNV_ESCAPE_XML_HEX:Substitutes the ILLEGAL SEQUENCE with the decimal * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly. * In the Event the converter doesn't support the characters {&,#,x}[0-9], * it will substitute the illegal sequence with the substitution characters. * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as * \htmlonly&#x23456;\endhtmlonly</li> * </ul> * @param fromUArgs Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence * @param length Size (in bytes) of the concerned codepage sequence * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. * @param reason Defines the reason the callback was invoked * @param err Return value will be set to success if the callback was handled, * otherwise this value will be set to a failure status. * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err); /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This To Unicode callback skips any ILLEGAL_SEQUENCE, or * skips only UNASSIGNED_SEQUENCE depending on the context parameter * simply ignoring those characters. * * @param context The function currently recognizes the callback options: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, * returning the error code back to the caller immediately. * NULL: Skips any ILLEGAL_SEQUENCE * @param toUArgs Information about the conversion in progress * @param codeUnits Points to 'length' bytes of the concerned codepage sequence * @param length Size (in bytes) of the concerned codepage sequence * @param reason Defines the reason the callback was invoked * @param err Return value will be set to success if the callback was handled, * otherwise this value will be set to a failure status. * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( const void *context, UConverterToUnicodeArgs *toUArgs, const char* codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode * err); /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or * UNASSIGNED_SEQUENCE depending on context parameter, with the * Unicode substitution character, U+FFFD. * * @param context The function currently recognizes the callback options: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, * returning the error code back to the caller immediately. * NULL: Substitutes any ILLEGAL_SEQUENCE * @param toUArgs Information about the conversion in progress * @param codeUnits Points to 'length' bytes of the concerned codepage sequence * @param length Size (in bytes) of the concerned codepage sequence * @param reason Defines the reason the callback was invoked * @param err Return value will be set to success if the callback was handled, * otherwise this value will be set to a failure status. * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( const void *context, UConverterToUnicodeArgs *toUArgs, const char* codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode * err); /** * DO NOT CALL THIS FUNCTION DIRECTLY! * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the * hexadecimal representation of the illegal bytes * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). * * @param context This function currently recognizes the callback options: * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. * @param toUArgs Information about the conversion in progress * @param codeUnits Points to 'length' bytes of the concerned codepage sequence * @param length Size (in bytes) of the concerned codepage sequence * @param reason Defines the reason the callback was invoked * @param err Return value will be set to success if the callback was handled, * otherwise this value will be set to a failure status. * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( const void *context, UConverterToUnicodeArgs *toUArgs, const char* codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode * err); #endif #endif /*UCNV_ERR_H*/