nfrule.cpp | Explore in Territory

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*   Copyright (C) 1997-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
******************************************************************************
*   file name:  nfrule.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
* Modification history
* Date        Name      Comments
* 10/11/2001  Doug      Ported from ICU4J
*/

#include "nfrule.h"

#if U_HAVE_RBNF

#include "unicode/localpointer.h"
#include "unicode/rbnf.h"
#include "unicode/tblcoll.h"
#include "unicode/plurfmt.h"
#include "unicode/upluralrules.h"
#include "unicode/coleitr.h"
#include "unicode/uchar.h"
#include "nfrs.h"
#include "nfrlist.h"
#include "nfsubs.h"
#include "patternprops.h"
#include "putilimp.h"

U_NAMESPACE_BEGIN

NFRule::NFRule(const RuleBasedNumberFormat* _rbnf, const UnicodeString &_ruleText, UErrorCode &status)
  : … { … }

NFRule::~NFRule()
{ … }

static const char16_t gLeftBracket = …;
static const char16_t gRightBracket = …;
static const char16_t gColon = …;
static const char16_t gZero = …;
static const char16_t gNine = …;
static const char16_t gSpace = …;
static const char16_t gSlash = …;
static const char16_t gGreaterThan = …;
static const char16_t gLessThan = …;
static const char16_t gComma = …;
static const char16_t gDot = …;
static const char16_t gTick = …;
//static const char16_t gMinus = 0x002d;
static const char16_t gSemicolon = …;
static const char16_t gX = …;

static const char16_t gMinusX[] = …;    /* "-x" */
static const char16_t gInf[] = …; /* "Inf" */
static const char16_t gNaN[] = …; /* "NaN" */

static const char16_t gDollarOpenParenthesis[] = …; /* "$(" */
static const char16_t gClosedParenthesisDollar[] = …; /* ")$" */

static const char16_t gLessLess[] = …;    /* "<<" */
static const char16_t gLessPercent[] = …;    /* "<%" */
static const char16_t gLessHash[] = …;    /* "<#" */
static const char16_t gLessZero[] = …;    /* "<0" */
static const char16_t gGreaterGreater[] = …;    /* ">>" */
static const char16_t gGreaterPercent[] = …;    /* ">%" */
static const char16_t gGreaterHash[] = …;    /* ">#" */
static const char16_t gGreaterZero[] = …;    /* ">0" */
static const char16_t gEqualPercent[] = …;    /* "=%" */
static const char16_t gEqualHash[] = …;    /* "=#" */
static const char16_t gEqualZero[] = …;    /* "=0" */
static const char16_t gGreaterGreaterGreater[] = …; /* ">>>" */

static const char16_t * const RULE_PREFIXES[] = …;

void
NFRule::makeRules(UnicodeString& description,
                  NFRuleSet *owner,
                  const NFRule *predecessor,
                  const RuleBasedNumberFormat *rbnf,
                  NFRuleList& rules,
                  UErrorCode& status)
{ … }

/**
 * This function parses the rule's rule descriptor (i.e., the base
 * value and/or other tokens that precede the rule's rule text
 * in the description) and sets the rule's base value, radix, and
 * exponent according to the descriptor.  (If the description doesn't
 * include a rule descriptor, then this function sets everything to
 * default values and the rule set sets the rule's real base value).
 * @param description The rule's description
 * @return If "description" included a rule descriptor, this is
 * "description" with the descriptor and any trailing whitespace
 * stripped off.  Otherwise; it's "descriptor" unchangd.
 */
void
NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
{ … }

/**
* Searches the rule's rule text for the substitution tokens,
* creates the substitutions, and removes the substitution tokens
* from the rule's rule text.
* @param owner The rule set containing this rule
* @param predecessor The rule preseding this one in "owners" rule list
* @param ownersOwner The RuleBasedFormat that owns this rule
*/
void
NFRule::extractSubstitutions(const NFRuleSet* ruleSet,
                             const UnicodeString &ruleText,
                             const NFRule* predecessor,
                             UErrorCode& status)
{ … }

/**
* Searches the rule's rule text for the first substitution token,
* creates a substitution based on it, and removes the token from
* the rule's rule text.
* @param owner The rule set containing this rule
* @param predecessor The rule preceding this one in the rule set's
* rule list
* @param ownersOwner The RuleBasedNumberFormat that owns this rule
* @return The newly-created substitution.  This is never null; if
* the rule text doesn't contain any substitution tokens, this will
* be a NullSubstitution.
*/
NFSubstitution *
NFRule::extractSubstitution(const NFRuleSet* ruleSet,
                            const NFRule* predecessor,
                            UErrorCode& status)
{ … }

/**
 * Sets the rule's base value, and causes the radix and exponent
 * to be recalculated.  This is used during construction when we
 * don't know the rule's base value until after it's been
 * constructed.  It should be used at any other time.
 * @param The new base value for the rule.
 */
void
NFRule::setBaseValue(int64_t newBaseValue, UErrorCode& status)
{ … }

/**
* This calculates the rule's exponent based on its radix and base
* value.  This will be the highest power the radix can be raised to
* and still produce a result less than or equal to the base value.
*/
int16_t
NFRule::expectedExponent() const
{ … }

/**
 * Searches the rule's rule text for any of the specified strings.
 * @return The index of the first match in the rule's rule text
 * (i.e., the first substring in the rule's rule text that matches
 * _any_ of the strings in "strings").  If none of the strings in
 * "strings" is found in the rule's rule text, returns -1.
 */
int32_t
NFRule::indexOfAnyRulePrefix() const
{ … }

//-----------------------------------------------------------------------
// boilerplate
//-----------------------------------------------------------------------

static UBool
util_equalSubstitutions(const NFSubstitution* sub1, const NFSubstitution* sub2)
{ … }

/**
* Tests two rules for equality.
* @param that The rule to compare this one against
* @return True is the two rules are functionally equivalent
*/
bool
NFRule::operator==(const NFRule& rhs) const
{ … }

/**
* Returns a textual representation of the rule.  This won't
* necessarily be the same as the description that this rule
* was created with, but it will produce the same result.
* @return A textual description of the rule
*/
static void util_append64(UnicodeString& result, int64_t n)
{ … }

void
NFRule::_appendRuleText(UnicodeString& result) const
{ … }

int64_t NFRule::getDivisor() const
{ … }


//-----------------------------------------------------------------------
// formatting
//-----------------------------------------------------------------------

/**
* Formats the number, and inserts the resulting text into
* toInsertInto.
* @param number The number being formatted
* @param toInsertInto The string where the resultant text should
* be inserted
* @param pos The position in toInsertInto where the resultant text
* should be inserted
*/
void
NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const
{ … }

/**
* Formats the number, and inserts the resulting text into
* toInsertInto.
* @param number The number being formatted
* @param toInsertInto The string where the resultant text should
* be inserted
* @param pos The position in toInsertInto where the resultant text
* should be inserted
*/
void
NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const
{ … }

/**
* Used by the owning rule set to determine whether to invoke the
* rollback rule (i.e., whether this rule or the one that precedes
* it in the rule set's list should be used to format the number)
* @param The number being formatted
* @return True if the rule set should use the rule that precedes
* this one in its list; false if it should use this rule
*/
UBool
NFRule::shouldRollBack(int64_t number) const
{ … }

//-----------------------------------------------------------------------
// parsing
//-----------------------------------------------------------------------

/**
* Attempts to parse the string with this rule.
* @param text The string being parsed
* @param parsePosition On entry, the value is ignored and assumed to
* be 0. On exit, this has been updated with the position of the first
* character not consumed by matching the text against this rule
* (if this rule doesn't match the text at all, the parse position
* if left unchanged (presumably at 0) and the function returns
* new Long(0)).
* @param isFractionRule True if this rule is contained within a
* fraction rule set.  This is only used if the rule has no
* substitutions.
* @return If this rule matched the text, this is the rule's base value
* combined appropriately with the results of parsing the substitutions.
* If nothing matched, this is new Long(0) and the parse position is
* left unchanged.  The result will be an instance of Long if the
* result is an integer and Double otherwise.  The result is never null.
*/
#ifdef RBNF_DEBUG
#include <stdio.h>

static void dumpUS(FILE* f, const UnicodeString& us) {
  int len = us.length();
  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
  if (buf != nullptr) {
	  us.extract(0, len, buf);
	  buf[len] = 0;
	  fprintf(f, "%s", buf);
	  uprv_free(buf); //delete[] buf;
  }
}
#endif
UBool
NFRule::doParse(const UnicodeString& text,
                ParsePosition& parsePosition,
                UBool isFractionRule,
                double upperBound,
                uint32_t nonNumericalExecutedRuleMask,
                Formattable& resVal) const
{ … }

/**
* This function is used by parse() to match the text being parsed
* against a possible prefix string.  This function
* matches characters from the beginning of the string being parsed
* to characters from the prospective prefix.  If they match, pp is
* updated to the first character not matched, and the result is
* the unparsed part of the string.  If they don't match, the whole
* string is returned, and pp is left unchanged.
* @param text The string being parsed
* @param prefix The text to match against
* @param pp On entry, ignored and assumed to be 0.  On exit, points
* to the first unmatched character (assuming the whole prefix matched),
* or is unchanged (if the whole prefix didn't match).
* @return If things match, this is the unparsed part of "text";
* if they didn't match, this is "text".
*/
void
NFRule::stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const
{ … }

/**
* Used by parse() to match a substitution and any following text.
* "text" is searched for instances of "delimiter".  For each instance
* of delimiter, the intervening text is tested to see whether it
* matches the substitution.  The longest match wins.
* @param text The string being parsed
* @param startPos The position in "text" where we should start looking
* for "delimiter".
* @param baseValue A partial parse result (often the rule's base value),
* which is combined with the result from matching the substitution
* @param delimiter The string to search "text" for.
* @param pp Ignored and presumed to be 0 on entry.  If there's a match,
* on exit this will point to the first unmatched character.
* @param sub If we find "delimiter" in "text", this substitution is used
* to match the text between the beginning of the string and the
* position of "delimiter."  (If "delimiter" is the empty string, then
* this function just matches against this substitution and updates
* everything accordingly.)
* @param upperBound When matching the substitution, it will only
* consider rules with base values lower than this value.
* @return If there's a match, this is the result of composing
* baseValue with the result of matching the substitution.  Otherwise,
* this is new Long(0).  It's never null.  If the result is an integer,
* this will be an instance of Long; otherwise, it's an instance of
* Double.
*
* !!! note {dlf} in point of fact, in the java code the caller always converts
* the result to a double, so we might as well return one.
*/
double
NFRule::matchToDelimiter(const UnicodeString& text,
                         int32_t startPos,
                         double _baseValue,
                         const UnicodeString& delimiter,
                         ParsePosition& pp,
                         const NFSubstitution* sub,
                         uint32_t nonNumericalExecutedRuleMask,
                         double upperBound) const
{ … }

/**
* Used by stripPrefix() to match characters.  If lenient parse mode
* is off, this just calls startsWith().  If lenient parse mode is on,
* this function uses CollationElementIterators to match characters in
* the strings (only primary-order differences are significant in
* determining whether there's a match).
* @param str The string being tested
* @param prefix The text we're hoping to see at the beginning
* of "str"
* @return If "prefix" is found at the beginning of "str", this
* is the number of characters in "str" that were matched (this
* isn't necessarily the same as the length of "prefix" when matching
* text with a collator).  If there's no match, this is 0.
*/
int32_t
NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErrorCode& status) const
{ … }

/**
* Searches a string for another string.  If lenient parsing is off,
* this just calls indexOf().  If lenient parsing is on, this function
* uses CollationElementIterator to match characters, and only
* primary-order differences are significant in determining whether
* there's a match.
* @param str The string to search
* @param key The string to search "str" for
* @param startingAt The index into "str" where the search is to
* begin
* @return A two-element array of ints.  Element 0 is the position
* of the match, or -1 if there was no match.  Element 1 is the
* number of characters in "str" that matched (which isn't necessarily
* the same as the length of "key")
*/
int32_t
NFRule::findText(const UnicodeString& str,
                 const UnicodeString& key,
                 int32_t startingAt,
                 int32_t* length) const
{ … }

int32_t
NFRule::findTextLenient(const UnicodeString& str,
                 const UnicodeString& key,
                 int32_t startingAt,
                 int32_t* length) const
{ … }

/**
* Checks to see whether a string consists entirely of ignorable
* characters.
* @param str The string to test.
* @return true if the string is empty of consists entirely of
* characters that the number formatter's collator says are
* ignorable at the primary-order level.  false otherwise.
*/
UBool
NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const
{ … }

void
NFRule::setDecimalFormatSymbols(const DecimalFormatSymbols& newSymbols, UErrorCode& status) { … }

U_NAMESPACE_END

/* U_HAVE_RBNF */
#endif
chromium/third_party/icu/source/i18n/nfrule.cpp