// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** * Copyright (C) 1997-2016, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * * File DECIMFMT.H * * Modification History: * * Date Name Description * 02/19/97 aliu Converted from java. * 03/20/97 clhuang Updated per C++ implementation. * 04/03/97 aliu Rewrote parsing and formatting completely, and * cleaned up and debugged. Actually works now. * 04/17/97 aliu Changed DigitCount to int per code review. * 07/10/97 helena Made ParsePosition a class and get rid of the function * hiding problems. * 09/09/97 aliu Ported over support for exponential formats. * 07/20/98 stephen Changed documentation * 01/30/13 emmons Added Scaling methods ******************************************************************************** */ #ifndef DECIMFMT_H #define DECIMFMT_H #include "unicode/utypes.h" #if U_SHOW_CPLUSPLUS_API /** * \file * \brief C++ API: Compatibility APIs for decimal formatting. */ #if !UCONFIG_NO_FORMATTING #include "unicode/dcfmtsym.h" #include "unicode/numfmt.h" #include "unicode/locid.h" #include "unicode/fpositer.h" #include "unicode/stringpiece.h" #include "unicode/curramt.h" #include "unicode/enumset.h" U_NAMESPACE_BEGIN class CurrencyPluralInfo; class CompactDecimalFormat; namespace number { class LocalizedNumberFormatter; namespace impl { class DecimalQuantity; struct DecimalFormatFields; class UFormattedNumberData; } } namespace numparse { namespace impl { class NumberParserImpl; } } /** * **IMPORTANT:** New users are strongly encouraged to see if * numberformatter.h fits their use case. Although not deprecated, this header * is provided for backwards compatibility only. * * DecimalFormat is a concrete subclass of NumberFormat that formats decimal * numbers. It has a variety of features designed to make it possible to parse * and format numbers in any locale, including support for Western, Arabic, or * Indic digits. It also supports different flavors of numbers, including * integers ("123"), fixed-point numbers ("123.4"), scientific notation * ("1.23E4"), percentages ("12%"), and currency amounts ("$123", "USD123", * "123 US dollars"). All of these flavors can be easily localized. * * To obtain a NumberFormat for a specific locale (including the default * locale) call one of NumberFormat's factory methods such as * createInstance(). Do not call the DecimalFormat constructors directly, unless * you know what you are doing, since the NumberFormat factory methods may * return subclasses other than DecimalFormat. * * **Example Usage** * * \code * // Normally we would have a GUI with a menu for this * int32_t locCount; * const Locale* locales = NumberFormat::getAvailableLocales(locCount); * * double myNumber = -1234.56; * UErrorCode success = U_ZERO_ERROR; * NumberFormat* form; * * // Print out a number with the localized number, currency and percent * // format for each locale. * UnicodeString countryName; * UnicodeString displayName; * UnicodeString str; * UnicodeString pattern; * Formattable fmtable; * for (int32_t j = 0; j < 3; ++j) { * cout << endl << "FORMAT " << j << endl; * for (int32_t i = 0; i < locCount; ++i) { * if (locales[i].getCountry(countryName).size() == 0) { * // skip language-only * continue; * } * switch (j) { * case 0: * form = NumberFormat::createInstance(locales[i], success ); break; * case 1: * form = NumberFormat::createCurrencyInstance(locales[i], success ); break; * default: * form = NumberFormat::createPercentInstance(locales[i], success ); break; * } * if (form) { * str.remove(); * pattern = ((DecimalFormat*)form)->toPattern(pattern); * cout << locales[i].getDisplayName(displayName) << ": " << pattern; * cout << " -> " << form->format(myNumber,str) << endl; * form->parse(form->format(myNumber,str), fmtable, success); * delete form; * } * } * } * \endcode * * **Another example use createInstance(style)** * * \code * // Print out a number using the localized number, currency, * // percent, scientific, integer, iso currency, and plural currency * // format for each locale</strong> * Locale* locale = new Locale("en", "US"); * double myNumber = 1234.56; * UErrorCode success = U_ZERO_ERROR; * UnicodeString str; * Formattable fmtable; * for (int j=NumberFormat::kNumberStyle; * j<=NumberFormat::kPluralCurrencyStyle; * ++j) { * NumberFormat* form = NumberFormat::createInstance(locale, j, success); * str.remove(); * cout << "format result " << form->format(myNumber, str) << endl; * format->parse(form->format(myNumber, str), fmtable, success); * delete form; * } * \endcode * * * <p><strong>Patterns</strong> * * <p>A DecimalFormat consists of a <em>pattern</em> and a set of * <em>symbols</em>. The pattern may be set directly using * applyPattern(), or indirectly using other API methods which * manipulate aspects of the pattern, such as the minimum number of integer * digits. The symbols are stored in a DecimalFormatSymbols * object. When using the NumberFormat factory methods, the * pattern and symbols are read from ICU's locale data. * * <p><strong>Special Pattern Characters</strong> * * <p>Many characters in a pattern are taken literally; they are matched during * parsing and output unchanged during formatting. Special characters, on the * other hand, stand for other characters, strings, or classes of characters. * For example, the '#' character is replaced by a localized digit. Often the * replacement character is the same as the pattern character; in the U.S. locale, * the ',' grouping character is replaced by ','. However, the replacement is * still happening, and if the symbols are modified, the grouping character * changes. Some special characters affect the behavior of the formatter by * their presence; for example, if the percent character is seen, then the * value is multiplied by 100 before being displayed. * * <p>To insert a special character in a pattern as a literal, that is, without * any special meaning, the character must be quoted. There are some exceptions to * this which are noted below. * * <p>The characters listed here are used in non-localized patterns. Localized * patterns use the corresponding characters taken from this formatter's * DecimalFormatSymbols object instead, and these characters lose * their special status. Two exceptions are the currency sign and quote, which * are not localized. * * <table border=0 cellspacing=3 cellpadding=0> * <tr bgcolor="#ccccff"> * <td align=left><strong>Symbol</strong> * <td align=left><strong>Location</strong> * <td align=left><strong>Localized?</strong> * <td align=left><strong>Meaning</strong> * <tr valign=top> * <td><code>0</code> * <td>Number * <td>Yes * <td>Digit * <tr valign=top bgcolor="#eeeeff"> * <td><code>1-9</code> * <td>Number * <td>Yes * <td>'1' through '9' indicate rounding. * <tr valign=top> * <td><code>\htmlonly@\endhtmlonly</code> <!--doxygen doesn't like @--> * <td>Number * <td>No * <td>Significant digit * <tr valign=top bgcolor="#eeeeff"> * <td><code>#</code> * <td>Number * <td>Yes * <td>Digit, zero shows as absent * <tr valign=top> * <td><code>.</code> * <td>Number * <td>Yes * <td>Decimal separator or monetary decimal separator * <tr valign=top bgcolor="#eeeeff"> * <td><code>-</code> * <td>Number * <td>Yes * <td>Minus sign * <tr valign=top> * <td><code>,</code> * <td>Number * <td>Yes * <td>Grouping separator * <tr valign=top bgcolor="#eeeeff"> * <td><code>E</code> * <td>Number * <td>Yes * <td>Separates mantissa and exponent in scientific notation. * <em>Need not be quoted in prefix or suffix.</em> * <tr valign=top> * <td><code>+</code> * <td>Exponent * <td>Yes * <td>Prefix positive exponents with localized plus sign. * <em>Need not be quoted in prefix or suffix.</em> * <tr valign=top bgcolor="#eeeeff"> * <td><code>;</code> * <td>Subpattern boundary * <td>Yes * <td>Separates positive and negative subpatterns * <tr valign=top> * <td><code>\%</code> * <td>Prefix or suffix * <td>Yes * <td>Multiply by 100 and show as percentage * <tr valign=top bgcolor="#eeeeff"> * <td><code>\\u2030</code> * <td>Prefix or suffix * <td>Yes * <td>Multiply by 1000 and show as per mille * <tr valign=top> * <td><code>\htmlonly¤\endhtmlonly</code> (<code>\\u00A4</code>) * <td>Prefix or suffix * <td>No * <td>Currency sign, replaced by currency symbol. If * doubled, replaced by international currency symbol. * If tripled, replaced by currency plural names, for example, * "US dollar" or "US dollars" for America. * If present in a pattern, the monetary decimal separator * is used instead of the decimal separator. * <tr valign=top bgcolor="#eeeeff"> * <td><code>'</code> * <td>Prefix or suffix * <td>No * <td>Used to quote special characters in a prefix or suffix, * for example, <code>"'#'#"</code> formats 123 to * <code>"#123"</code>. To create a single quote * itself, use two in a row: <code>"# o''clock"</code>. * <tr valign=top> * <td><code>*</code> * <td>Prefix or suffix boundary * <td>Yes * <td>Pad escape, precedes pad character * </table> * * <p>A DecimalFormat pattern contains a positive and negative * subpattern, for example, "#,##0.00;(#,##0.00)". Each subpattern has a * prefix, a numeric part, and a suffix. If there is no explicit negative * subpattern, the negative subpattern is the localized minus sign prefixed to the * positive subpattern. That is, "0.00" alone is equivalent to "0.00;-0.00". If there * is an explicit negative subpattern, it serves only to specify the negative * prefix and suffix; the number of digits, minimal digits, and other * characteristics are ignored in the negative subpattern. That means that * "#,##0.0#;(#)" has precisely the same result as "#,##0.0#;(#,##0.0#)". * * <p>The prefixes, suffixes, and various symbols used for infinity, digits, * thousands separators, decimal separators, etc. may be set to arbitrary * values, and they will appear properly during formatting. However, care must * be taken that the symbols and strings do not conflict, or parsing will be * unreliable. For example, either the positive and negative prefixes or the * suffixes must be distinct for parse() to be able * to distinguish positive from negative values. Another example is that the * decimal separator and thousands separator should be distinct characters, or * parsing will be impossible. * * <p>The <em>grouping separator</em> is a character that separates clusters of * integer digits to make large numbers more legible. It commonly used for * thousands, but in some locales it separates ten-thousands. The <em>grouping * size</em> is the number of digits between the grouping separators, such as 3 * for "100,000,000" or 4 for "1 0000 0000". There are actually two different * grouping sizes: One used for the least significant integer digits, the * <em>primary grouping size</em>, and one used for all others, the * <em>secondary grouping size</em>. In most locales these are the same, but * sometimes they are different. For example, if the primary grouping interval * is 3, and the secondary is 2, then this corresponds to the pattern * "#,##,##0", and the number 123456789 is formatted as "12,34,56,789". If a * pattern contains multiple grouping separators, the interval between the last * one and the end of the integer defines the primary grouping size, and the * interval between the last two defines the secondary grouping size. All others * are ignored, so "#,##,###,####" == "###,###,####" == "##,#,###,####". * * <p>Illegal patterns, such as "#.#.#" or "#.###,###", will cause * DecimalFormat to set a failing UErrorCode. * * <p><strong>Pattern BNF</strong> * * <pre> * pattern := subpattern (';' subpattern)? * subpattern := prefix? number exponent? suffix? * number := (integer ('.' fraction)?) | sigDigits * prefix := '\\u0000'..'\\uFFFD' - specialCharacters * suffix := '\\u0000'..'\\uFFFD' - specialCharacters * integer := '#'* '0'* '0' * fraction := '0'* '#'* * sigDigits := '#'* '@' '@'* '#'* * exponent := 'E' '+'? '0'* '0' * padSpec := '*' padChar * padChar := '\\u0000'..'\\uFFFD' - quote * * Notation: * X* 0 or more instances of X * X? 0 or 1 instances of X * X|Y either X or Y * C..D any character from C up to D, inclusive * S-T characters in S, except those in T * </pre> * The first subpattern is for positive numbers. The second (optional) * subpattern is for negative numbers. * * <p>Not indicated in the BNF syntax above: * * <ul><li>The grouping separator ',' can occur inside the integer and * sigDigits elements, between any two pattern characters of that * element, as long as the integer or sigDigits element is not * followed by the exponent element. * * <li>Two grouping intervals are recognized: That between the * decimal point and the first grouping symbol, and that * between the first and second grouping symbols. These * intervals are identical in most locales, but in some * locales they differ. For example, the pattern * "#,##,###" formats the number 123456789 as * "12,34,56,789".</li> * * <li>The pad specifier <code>padSpec</code> may appear before the prefix, * after the prefix, before the suffix, after the suffix, or not at all. * * <li>In place of '0', the digits '1' through '9' may be used to * indicate a rounding increment. * </ul> * * <p><strong>Parsing</strong> * * <p>DecimalFormat parses all Unicode characters that represent * decimal digits, as defined by u_charDigitValue(). In addition, * DecimalFormat also recognizes as digits the ten consecutive * characters starting with the localized zero digit defined in the * DecimalFormatSymbols object. During formatting, the * DecimalFormatSymbols-based digits are output. * * <p>During parsing, grouping separators are ignored if in lenient mode; * otherwise, if present, they must be in appropriate positions. * * <p>For currency parsing, the formatter is able to parse every currency * style formats no matter which style the formatter is constructed with. * For example, a formatter instance gotten from * NumberFormat.getInstance(ULocale, NumberFormat.CURRENCYSTYLE) can parse * formats such as "USD1.00" and "3.00 US dollars". * * <p>If parse(UnicodeString&,Formattable&,ParsePosition&) * fails to parse a string, it leaves the parse position unchanged. * The convenience method parse(UnicodeString&,Formattable&,UErrorCode&) * indicates parse failure by setting a failing * UErrorCode. * * <p><strong>Formatting</strong> * * <p>Formatting is guided by several parameters, all of which can be * specified either using a pattern or using the API. The following * description applies to formats that do not use <a href="#sci">scientific * notation</a> or <a href="#sigdig">significant digits</a>. * * <ul><li>If the number of actual integer digits exceeds the * <em>maximum integer digits</em>, then only the least significant * digits are shown. For example, 1997 is formatted as "97" if the * maximum integer digits is set to 2. * * <li>If the number of actual integer digits is less than the * <em>minimum integer digits</em>, then leading zeros are added. For * example, 1997 is formatted as "01997" if the minimum integer digits * is set to 5. * * <li>If the number of actual fraction digits exceeds the <em>maximum * fraction digits</em>, then rounding is performed to the * maximum fraction digits. For example, 0.125 is formatted as "0.12" * if the maximum fraction digits is 2. This behavior can be changed * by specifying a rounding increment and/or a rounding mode. * * <li>If the number of actual fraction digits is less than the * <em>minimum fraction digits</em>, then trailing zeros are added. * For example, 0.125 is formatted as "0.1250" if the minimum fraction * digits is set to 4. * * <li>Trailing fractional zeros are not displayed if they occur * <em>j</em> positions after the decimal, where <em>j</em> is less * than the maximum fraction digits. For example, 0.10004 is * formatted as "0.1" if the maximum fraction digits is four or less. * </ul> * * <p><strong>Special Values</strong> * * <p><code>NaN</code> is represented as a single character, typically * <code>\\uFFFD</code>. This character is determined by the * DecimalFormatSymbols object. This is the only value for which * the prefixes and suffixes are not used. * * <p>Infinity is represented as a single character, typically * <code>\\u221E</code>, with the positive or negative prefixes and suffixes * applied. The infinity character is determined by the * DecimalFormatSymbols object. * * <a name="sci"><strong>Scientific Notation</strong></a> * * <p>Numbers in scientific notation are expressed as the product of a mantissa * and a power of ten, for example, 1234 can be expressed as 1.234 x 10<sup>3</sup>. The * mantissa is typically in the half-open interval [1.0, 10.0) or sometimes [0.0, 1.0), * but it need not be. DecimalFormat supports arbitrary mantissas. * DecimalFormat can be instructed to use scientific * notation through the API or through the pattern. In a pattern, the exponent * character immediately followed by one or more digit characters indicates * scientific notation. Example: "0.###E0" formats the number 1234 as * "1.234E3". * * <ul> * <li>The number of digit characters after the exponent character gives the * minimum exponent digit count. There is no maximum. Negative exponents are * formatted using the localized minus sign, <em>not</em> the prefix and suffix * from the pattern. This allows patterns such as "0.###E0 m/s". To prefix * positive exponents with a localized plus sign, specify '+' between the * exponent and the digits: "0.###E+0" will produce formats "1E+1", "1E+0", * "1E-1", etc. (In localized patterns, use the localized plus sign rather than * '+'.) * * <li>The minimum number of integer digits is achieved by adjusting the * exponent. Example: 0.00123 formatted with "00.###E0" yields "12.3E-4". This * only happens if there is no maximum number of integer digits. If there is a * maximum, then the minimum number of integer digits is fixed at one. * * <li>The maximum number of integer digits, if present, specifies the exponent * grouping. The most common use of this is to generate <em>engineering * notation</em>, in which the exponent is a multiple of three, e.g., * "##0.###E0". The number 12345 is formatted using "##0.####E0" as "12.345E3". * * <li>When using scientific notation, the formatter controls the * digit counts using significant digits logic. The maximum number of * significant digits limits the total number of integer and fraction * digits that will be shown in the mantissa; it does not affect * parsing. For example, 12345 formatted with "##0.##E0" is "12.3E3". * See the section on significant digits for more details. * * <li>The number of significant digits shown is determined as * follows: If areSignificantDigitsUsed() returns false, then the * minimum number of significant digits shown is one, and the maximum * number of significant digits shown is the sum of the <em>minimum * integer</em> and <em>maximum fraction</em> digits, and is * unaffected by the maximum integer digits. If this sum is zero, * then all significant digits are shown. If * areSignificantDigitsUsed() returns true, then the significant digit * counts are specified by getMinimumSignificantDigits() and * getMaximumSignificantDigits(). In this case, the number of * integer digits is fixed at one, and there is no exponent grouping. * * <li>Exponential patterns may not contain grouping separators. * </ul> * * <a name="sigdig"><strong>Significant Digits</strong></a> * * <code>DecimalFormat</code> has two ways of controlling how many * digits are shows: (a) significant digits counts, or (b) integer and * fraction digit counts. Integer and fraction digit counts are * described above. When a formatter is using significant digits * counts, the number of integer and fraction digits is not specified * directly, and the formatter settings for these counts are ignored. * Instead, the formatter uses however many integer and fraction * digits are required to display the specified number of significant * digits. Examples: * * <table border=0 cellspacing=3 cellpadding=0> * <tr bgcolor="#ccccff"> * <td align=left>Pattern * <td align=left>Minimum significant digits * <td align=left>Maximum significant digits * <td align=left>Number * <td align=left>Output of format() * <tr valign=top> * <td><code>\@\@\@</code> * <td>3 * <td>3 * <td>12345 * <td><code>12300</code> * <tr valign=top bgcolor="#eeeeff"> * <td><code>\@\@\@</code> * <td>3 * <td>3 * <td>0.12345 * <td><code>0.123</code> * <tr valign=top> * <td><code>\@\@##</code> * <td>2 * <td>4 * <td>3.14159 * <td><code>3.142</code> * <tr valign=top bgcolor="#eeeeff"> * <td><code>\@\@##</code> * <td>2 * <td>4 * <td>1.23004 * <td><code>1.23</code> * </table> * * <ul> * <li>Significant digit counts may be expressed using patterns that * specify a minimum and maximum number of significant digits. These * are indicated by the <code>'@'</code> and <code>'#'</code> * characters. The minimum number of significant digits is the number * of <code>'@'</code> characters. The maximum number of significant * digits is the number of <code>'@'</code> characters plus the number * of <code>'#'</code> characters following on the right. For * example, the pattern <code>"@@@"</code> indicates exactly 3 * significant digits. The pattern <code>"@##"</code> indicates from * 1 to 3 significant digits. Trailing zero digits to the right of * the decimal separator are suppressed after the minimum number of * significant digits have been shown. For example, the pattern * <code>"@##"</code> formats the number 0.1203 as * <code>"0.12"</code>. * * <li>If a pattern uses significant digits, it may not contain a * decimal separator, nor the <code>'0'</code> pattern character. * Patterns such as <code>"@00"</code> or <code>"@.###"</code> are * disallowed. * * <li>Any number of <code>'#'</code> characters may be prepended to * the left of the leftmost <code>'@'</code> character. These have no * effect on the minimum and maximum significant digits counts, but * may be used to position grouping separators. For example, * <code>"#,#@#"</code> indicates a minimum of one significant digits, * a maximum of two significant digits, and a grouping size of three. * * <li>In order to enable significant digits formatting, use a pattern * containing the <code>'@'</code> pattern character. Alternatively, * call setSignificantDigitsUsed(true). * * <li>In order to disable significant digits formatting, use a * pattern that does not contain the <code>'@'</code> pattern * character. Alternatively, call setSignificantDigitsUsed(false). * * <li>The number of significant digits has no effect on parsing. * * <li>Significant digits may be used together with exponential notation. Such * patterns are equivalent to a normal exponential pattern with a minimum and * maximum integer digit count of one, a minimum fraction digit count of * <code>getMinimumSignificantDigits() - 1</code>, and a maximum fraction digit * count of <code>getMaximumSignificantDigits() - 1</code>. For example, the * pattern <code>"@@###E0"</code> is equivalent to <code>"0.0###E0"</code>. * * <li>If significant digits are in use, then the integer and fraction * digit counts, as set via the API, are ignored. If significant * digits are not in use, then the significant digit counts, as set via * the API, are ignored. * * </ul> * * <p><strong>Padding</strong> * * <p>DecimalFormat supports padding the result of * format() to a specific width. Padding may be specified either * through the API or through the pattern syntax. In a pattern the pad escape * character, followed by a single pad character, causes padding to be parsed * and formatted. The pad escape character is '*' in unlocalized patterns, and * can be localized using DecimalFormatSymbols::setSymbol() with a * DecimalFormatSymbols::kPadEscapeSymbol * selector. For example, <code>"$*x#,##0.00"</code> formats 123 to * <code>"$xx123.00"</code>, and 1234 to <code>"$1,234.00"</code>. * * <ul> * <li>When padding is in effect, the width of the positive subpattern, * including prefix and suffix, determines the format width. For example, in * the pattern <code>"* #0 o''clock"</code>, the format width is 10. * * <li>The width is counted in 16-bit code units (char16_ts). * * <li>Some parameters which usually do not matter have meaning when padding is * used, because the pattern width is significant with padding. In the pattern * "* ##,##,#,##0.##", the format width is 14. The initial characters "##,##," * do not affect the grouping size or maximum integer digits, but they do affect * the format width. * * <li>Padding may be inserted at one of four locations: before the prefix, * after the prefix, before the suffix, or after the suffix. If padding is * specified in any other location, applyPattern() * sets a failing UErrorCode. If there is no prefix, * before the prefix and after the prefix are equivalent, likewise for the * suffix. * * <li>When specified in a pattern, the 32-bit code point immediately * following the pad escape is the pad character. This may be any character, * including a special pattern character. That is, the pad escape * <em>escapes</em> the following character. If there is no character after * the pad escape, then the pattern is illegal. * * </ul> * * <p><strong>Rounding</strong> * * <p>DecimalFormat supports rounding to a specific increment. For * example, 1230 rounded to the nearest 50 is 1250. 1.234 rounded to the * nearest 0.65 is 1.3. The rounding increment may be specified through the API * or in a pattern. To specify a rounding increment in a pattern, include the * increment in the pattern itself. "#,#50" specifies a rounding increment of * 50. "#,##0.05" specifies a rounding increment of 0.05. * * <p>In the absence of an explicit rounding increment numbers are * rounded to their formatted width. * * <ul> * <li>Rounding only affects the string produced by formatting. It does * not affect parsing or change any numerical values. * * <li>A <em>rounding mode</em> determines how values are rounded; see * DecimalFormat::ERoundingMode. The default rounding mode is * DecimalFormat::kRoundHalfEven. The rounding mode can only be set * through the API; it can not be set with a pattern. * * <li>Some locales use rounding in their currency formats to reflect the * smallest currency denomination. * * <li>In a pattern, digits '1' through '9' specify rounding, but otherwise * behave identically to digit '0'. * </ul> * * <p><strong>Synchronization</strong> * * <p>DecimalFormat objects are not synchronized. Multiple * threads should not access one formatter concurrently. * * <p><strong>Subclassing</strong> * * <p><em>User subclasses are not supported.</em> While clients may write * subclasses, such code will not necessarily work and will not be * guaranteed to work stably from release to release. */ class U_I18N_API DecimalFormat : public NumberFormat { … }; U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ #endif /* U_SHOW_CPLUSPLUS_API */ #endif // _DECIMFMT //eof