// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2011-2013, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: messagepattern.h * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2011mar14 * created by: Markus W. Scherer */ #ifndef __MESSAGEPATTERN_H__ #define __MESSAGEPATTERN_H__ /** * \file * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. */ #include "unicode/utypes.h" #if U_SHOW_CPLUSPLUS_API #if !UCONFIG_NO_FORMATTING #include "unicode/parseerr.h" #include "unicode/unistr.h" /** * Mode for when an apostrophe starts quoted literal text for MessageFormat output. * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). * <p> * A pair of adjacent apostrophes always results in a single apostrophe in the output, * even when the pair is between two single, text-quoting apostrophes. * <p> * The following table shows examples of desired MessageFormat.format() output * with the pattern strings that yield that output. * <p> * <table> * <tr> * <th>Desired output</th> * <th>DOUBLE_OPTIONAL</th> * <th>DOUBLE_REQUIRED</th> * </tr> * <tr> * <td>I see {many}</td> * <td>I see '{many}'</td> * <td>(same)</td> * </tr> * <tr> * <td>I said {'Wow!'}</td> * <td>I said '{''Wow!''}'</td> * <td>(same)</td> * </tr> * <tr> * <td>I don't know</td> * <td>I don't know OR<br> I don''t know</td> * <td>I don''t know</td> * </tr> * </table> * @stable ICU 4.8 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE */ enum UMessagePatternApostropheMode { … }; /** * @stable ICU 4.8 */ UMessagePatternApostropheMode; /** * MessagePattern::Part type constants. * @stable ICU 4.8 */ enum UMessagePatternPartType { … }; /** * @stable ICU 4.8 */ UMessagePatternPartType; /** * Argument type constants. * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. * * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, * with a nesting level one greater than the surrounding message. * @stable ICU 4.8 */ enum UMessagePatternArgType { … }; /** * @stable ICU 4.8 */ UMessagePatternArgType; /** * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE * Returns true if the argument type has a plural style part sequence and semantics, * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. * @stable ICU 50 */ #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) … enum { … }; /** * Special value that is returned by getNumericValue(Part) when no * numeric value is defined for a part. * @see MessagePattern.getNumericValue() * @stable ICU 4.8 */ #define UMSGPAT_NO_NUMERIC_VALUE … U_NAMESPACE_BEGIN class MessagePatternDoubleList; class MessagePatternPartsList; /** * Parses and represents ICU MessageFormat patterns. * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. * Used in the implementations of those classes as well as in tools * for message validation, translation and format conversion. * <p> * The parser handles all syntax relevant for identifying message arguments. * This includes "complex" arguments whose style strings contain * nested MessageFormat pattern substrings. * For "simple" arguments (with no nested MessageFormat pattern substrings), * the argument style is not parsed any further. * <p> * The parser handles named and numbered message arguments and allows both in one message. * <p> * Once a pattern has been parsed successfully, iterate through the parsed data * with countParts(), getPart() and related methods. * <p> * The data logically represents a parse tree, but is stored and accessed * as a list of "parts" for fast and simple parsing and to minimize object allocations. * Arguments and nested messages are best handled via recursion. * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns * the index of the corresponding _LIMIT "part". * <p> * List of "parts": * <pre> * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT * argument = noneArg | simpleArg | complexArg * complexArg = choiceArg | pluralArg | selectArg * * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT * * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ * selectStyle = (ARG_SELECTOR message)+ * </pre> * <ul> * <li>Literal output text is not represented directly by "parts" but accessed * between parts of a message, from one part's getLimit() to the next part's getIndex(). * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or * the less-than-or-equal-to sign (U+2264). * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. * The optional numeric Part between each (ARG_SELECTOR, message) pair * is the value of an explicit-number selector like "=2", * otherwise the selector is a non-numeric identifier. * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. * </ul> * <p> * This class is not intended for public subclassing. * * @stable ICU 4.8 */ class U_COMMON_API MessagePattern : public UObject { … }; U_NAMESPACE_END #endif // !UCONFIG_NO_FORMATTING #endif /* U_SHOW_CPLUSPLUS_API */ #endif // __MESSAGEPATTERN_H__