// © 2017 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING #include <cstdlib> #include "unicode/simpleformatter.h" #include "unicode/ures.h" #include "ureslocs.h" #include "charstr.h" #include "uresimp.h" #include "measunit_impl.h" #include "number_longnames.h" #include "number_microprops.h" #include <algorithm> #include "cstring.h" #include "util.h" usingnamespaceicu; usingnamespaceicu::number; usingnamespaceicu::number::impl; namespace { /** * Display Name (this format has no placeholder). * * Used as an index into the LongNameHandler::simpleFormats array. Units * resources cover the normal set of PluralRules keys, as well as `dnam` and * `per` forms. */ constexpr int32_t DNAM_INDEX = …; /** * "per" form (e.g. "{0} per day" is day's "per" form). * * Used as an index into the LongNameHandler::simpleFormats array. Units * resources cover the normal set of PluralRules keys, as well as `dnam` and * `per` forms. */ constexpr int32_t PER_INDEX = …; /** * Gender of the word, in languages with grammatical gender. */ constexpr int32_t GENDER_INDEX = …; // Number of keys in the array populated by PluralTableSink. constexpr int32_t ARRAY_LENGTH = …; // TODO(icu-units#28): load this list from resources, after creating a "&set" // function for use in ldml2icu rules. const int32_t GENDER_COUNT = …; const char *gGenders[GENDER_COUNT] = …; // Converts a UnicodeString to a const char*, either pointing to a string in // gGenders, or pointing to an empty string if an appropriate string was not // found. const char *getGenderString(UnicodeString uGender, UErrorCode status) { … } // Returns the array index that corresponds to the given pluralKeyword. static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { … } // Selects a string out of the `strings` array which corresponds to the // specified plural form, with fallback to the OTHER form. // // The `strings` array must have ARRAY_LENGTH items: one corresponding to each // of the plural forms, plus a display name ("dnam") and a "per" form. static UnicodeString getWithPlural( const UnicodeString* strings, StandardPlural::Form plural, UErrorCode& status) { … } enum PlaceholderPosition { … }; /** * Returns three outputs extracted from pattern. * * @param coreUnit is extracted as per Extract(...) in the spec: * https://unicode.org/reports/tr35/tr35-general.html#compound-units * @param PlaceholderPosition indicates where in the string the placeholder was * found. * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar * contains the space character (if any) that separated the placeholder from * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one * space character is considered. */ void extractCorePattern(const UnicodeString &pattern, UnicodeString &coreUnit, PlaceholderPosition &placeholderPosition, char16_t &joinerChar) { … } ////////////////////////// /// BEGIN DATA LOADING /// ////////////////////////// // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty // string both in case of unknown gender and in case of unknown unit. UnicodeString getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) { … } // Loads data from a resource tree with paths matching // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases // and genders. // // An InflectedPluralSink is configured to load data for a specific gender and // case. It loads all plural forms, because selection between plural forms is // dependent upon the value being formatted. // // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at // units/compound/power2: German has case, French has differences for gender, // but no case. // // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the // tree structures are different. After homogenizing the structures, we may be // able to unify the two classes. // // TODO: Spec violation: expects presence of "count" - does not fallback to an // absent "count"! If this fallback were added, getCompoundValue could be // superseded? class InflectedPluralSink : public ResourceSink { … }; // Fetches localised formatting patterns for the given subKey. See documentation // for InflectedPluralSink for details. // // Data is loaded for the appropriate unit width, with missing data filled in // from unitsShort. void getInflectedMeasureData(StringPiece subKey, const Locale &locale, const UNumberUnitWidth &width, const char *gender, const char *caseVariant, UnicodeString *outArray, UErrorCode &status) { … } class PluralTableSink : public ResourceSink { … }; /** * Populates outArray with `locale`-specific values for `unit` through use of * PluralTableSink. Only the set of basic units are supported! * * Reading from resources *unitsNarrow* and *unitsShort* (for width * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units". * * @param unit must be a built-in unit, i.e. must have a type and subtype, * listed in gTypes and gSubTypes in measunit.cpp. * @param unitDisplayCase the empty string and "nominative" are treated the * same. For other cases, strings for the requested case are used if found. * (For any missing case-specific data, we fall back to nominative.) * @param outArray must be of fixed length ARRAY_LENGTH. */ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, const char *unitDisplayCase, UnicodeString *outArray, UErrorCode &status) { … } // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, UErrorCode &status) { … } UnicodeString getCompoundValue(StringPiece compoundKey, const Locale &locale, const UNumberUnitWidth &width, UErrorCode &status) { … } /** * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml. * * Consider a deriveComponent rule that looks like this: * * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/> * * Instantiating an instance as follows: * * DerivedComponents d(loc, "case", "per"); * * Applying the rule in the XML element above, `d.value0("foo")` will be "foo", * and `d.value1("foo")` will be "nominative". * * The values returned by value0(...) and value1(...) are valid only while the * instance exists. In case of any kind of failure, value0(...) and value1(...) * will return "". */ class DerivedComponents { … }; // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding // testsuite support for testing with synthetic data? /** * Loads and returns the value in rules that look like these: * * <deriveCompound feature="gender" structure="per" value="0"/> * <deriveCompound feature="gender" structure="times" value="1"/> * * Currently a fake example, but spec compliant: * <deriveCompound feature="gender" structure="power" value="feminine"/> * * NOTE: If U_FAILURE(status), returns an empty string. */ UnicodeString getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) { … } // Returns the gender string for structures following these rules: // // <deriveCompound feature="gender" structure="per" value="0"/> // <deriveCompound feature="gender" structure="times" value="1"/> // // Fake example: // <deriveCompound feature="gender" structure="power" value="feminine"/> // // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that // correspond to value="0" and value="1". // // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g. // "prefix" doesn't). UnicodeString getDerivedGender(Locale locale, const char *structure, UnicodeString *data0, UnicodeString *data1, UErrorCode &status) { … } //////////////////////// /// END DATA LOADING /// //////////////////////// // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace const char16_t *trimSpaceChars(const char16_t *s, int32_t &length) { … } /** * Calculates the gender of an arbitrary unit: this is the *second* * implementation of an algorithm to do this: * * Gender is also calculated in "processPatternTimes": that code path is "bottom * up", loading the gender for every component of a compound unit (at the same * time as loading the Long Names formatting patterns), even if the gender is * unneeded, then combining the single units' genders into the compound unit's * gender, according to the rules. This algorithm does a lazier "top-down" * evaluation, starting with the compound unit, calculating which single unit's * gender is needed by breaking it down according to the rules, and then loading * only the gender of the one single unit who's gender is needed. * * For future refactorings: * 1. we could drop processPatternTimes' gender calculation and just call this * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very * same table as the formatting patterns, so loading it then may be * efficient. For other unit widths however, it needs to be explicitly looked * up anyway. * 2. alternatively, if CLDR is providing all the genders we need such that we * don't need to calculate them in ICU anymore, we could drop this function * and keep only processPatternTimes' calculation. (And optimise it a bit?) * * @param locale The desired locale. * @param unit The measure unit to calculate the gender for. * @return The gender string for the unit, or an empty string if unknown or * ungendered. */ UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) { … } void maybeCalculateGender(const Locale &locale, const MeasureUnit &unitRef, UnicodeString *outArray, UErrorCode &status) { … } } // namespace void LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const UNumberUnitWidth &width, const char *unitDisplayCase, const PluralRules *rules, const MicroPropsGenerator *parent, LongNameHandler *fillIn, UErrorCode &status) { … } void LongNameHandler::forArbitraryUnit(const Locale &loc, const MeasureUnit &unitRef, const UNumberUnitWidth &width, const char *unitDisplayCase, LongNameHandler *fillIn, UErrorCode &status) { … } void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, Locale loc, const UNumberUnitWidth &width, const char *caseVariant, UnicodeString *outArray, UErrorCode &status) { … } UnicodeString LongNameHandler::getUnitDisplayName( const Locale& loc, const MeasureUnit& unit, UNumberUnitWidth width, UErrorCode& status) { … } UnicodeString LongNameHandler::getUnitPattern( const Locale& loc, const MeasureUnit& unit, UNumberUnitWidth width, StandardPlural::Form pluralForm, UErrorCode& status) { … } LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { … } void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, UErrorCode &status) { … } void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, Field field, UErrorCode &status) { … } void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { … } const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { … } void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit, const UNumberUnitWidth &width, const char *unitDisplayCase, const PluralRules *rules, const MicroPropsGenerator *parent, MixedUnitLongNameHandler *fillIn, UErrorCode &status) { … } void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { … } const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { … } const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form /*plural*/) const { … } LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVector<MeasureUnit> &units, const UNumberUnitWidth &width, const char *unitDisplayCase, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { … } void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { … } #endif /* #if !UCONFIG_NO_FORMATTING */