// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (c) 2002-2014, International Business Machines Corporation * and others. All Rights Reserved. ********************************************************************** * Date Name Description * 01/14/2002 aliu Creation. ********************************************************************** */ #include "unicode/utypes.h" #if !UCONFIG_NO_TRANSLITERATION #include "tridpars.h" #include "hash.h" #include "mutex.h" #include "transreg.h" #include "uassert.h" #include "ucln_in.h" #include "unicode/parsepos.h" #include "unicode/translit.h" #include "unicode/uchar.h" #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/utrans.h" #include "util.h" #include "uvector.h" U_NAMESPACE_BEGIN static const char16_t ID_DELIM = …; // ; static const char16_t TARGET_SEP = …; // - static const char16_t VARIANT_SEP = …; // / static const char16_t OPEN_REV = …; // ( static const char16_t CLOSE_REV = …; // ) //static const char16_t EMPTY[] = {0}; // "" static const char16_t ANY[] = …; // "Any" static const char16_t ANY_NULL[] = …; // "Any-Null" static const int32_t FORWARD = …; static const int32_t REVERSE = …; static Hashtable* SPECIAL_INVERSES = …; static UInitOnce gSpecialInversesInitOnce { … }; /** * The mutex controlling access to SPECIAL_INVERSES */ static UMutex LOCK; TransliteratorIDParser::Specs::Specs(const UnicodeString& s, const UnicodeString& t, const UnicodeString& v, UBool sawS, const UnicodeString& f) { … } TransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b, const UnicodeString& f) { … } TransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b) { … } Transliterator* TransliteratorIDParser::SingleID::createInstance() { … } /** * Parse a single ID, that is, an ID of the general form * "[f1] s1-t1/v1 ([f2] s2-t3/v2)", with the parenthesized element * optional, the filters optional, and the variants optional. * @param id the id to be parsed * @param pos INPUT-OUTPUT parameter. On input, the position of * the first character to parse. On output, the position after * the last character parsed. * @param dir the direction. If the direction is REVERSE then the * SingleID is constructed for the reverse direction. * @return a SingleID object or nullptr */ TransliteratorIDParser::SingleID* TransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos, int32_t dir, UErrorCode& status) { … } /** * Parse a filter ID, that is, an ID of the general form * "[f1] s1-t1/v1", with the filters optional, and the variants optional. * @param id the id to be parsed * @param pos INPUT-OUTPUT parameter. On input, the position of * the first character to parse. On output, the position after * the last character parsed. * @return a SingleID object or null if the parse fails */ TransliteratorIDParser::SingleID* TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) { … } /** * Parse a global filter of the form "[f]" or "([f])", depending * on 'withParens'. * @param id the pattern the parse * @param pos INPUT-OUTPUT parameter. On input, the position of * the first character to parse. On output, the position after * the last character parsed. * @param dir the direction. * @param withParens INPUT-OUTPUT parameter. On entry, if * withParens is 0, then parens are disallowed. If it is 1, * then parens are requires. If it is -1, then parens are * optional, and the return result will be set to 0 or 1. * @param canonID OUTPUT parameter. The pattern for the filter * added to the canonID, either at the end, if dir is FORWARD, or * at the start, if dir is REVERSE. The pattern will be enclosed * in parentheses if appropriate, and will be suffixed with an * ID_DELIM character. May be nullptr. * @return a UnicodeSet object or nullptr. A non-nullptr results * indicates a successful parse, regardless of whether the filter * applies to the given direction. The caller should discard it * if withParens != (dir == REVERSE). */ UnicodeSet* TransliteratorIDParser::parseGlobalFilter(const UnicodeString& id, int32_t& pos, int32_t dir, int32_t& withParens, UnicodeString* canonID) { … } U_CDECL_BEGIN static void U_CALLCONV _deleteSingleID(void* obj) { … } static void U_CALLCONV _deleteTransliteratorTrIDPars(void* obj) { … } U_CDECL_END /** * Parse a compound ID, consisting of an optional forward global * filter, a separator, one or more single IDs delimited by * separators, an an optional reverse global filter. The * separator is a semicolon. The global filters are UnicodeSet * patterns. The reverse global filter must be enclosed in * parentheses. * @param id the pattern the parse * @param dir the direction. * @param canonID OUTPUT parameter that receives the canonical ID, * consisting of canonical IDs for all elements, as returned by * parseSingleID(), separated by semicolons. Previous contents * are discarded. * @param list OUTPUT parameter that receives a list of SingleID * objects representing the parsed IDs. Previous contents are * discarded. * @param globalFilter OUTPUT parameter that receives a pointer to * a newly created global filter for this ID in this direction, or * nullptr if there is none. * @return true if the parse succeeds, that is, if the entire * id is consumed without syntax error. */ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t dir, UnicodeString& canonID, UVector& list, UnicodeSet*& globalFilter) { … } /** * Convert the elements of the 'list' vector, which are SingleID * objects, into actual Transliterator objects. In the course of * this, some (or all) entries may be removed. If all entries * are removed, the nullptr transliterator will be added. * * Delete entries with empty basicIDs; these are generated by * elements like "(A)" in the forward direction, or "A()" in * the reverse. THIS MAY RESULT IN AN EMPTY VECTOR. Convert * SingleID entries to actual transliterators. * * @param list vector of SingleID objects. On exit, vector * of one or more Transliterators. * @return new value of insertIndex. The index will shift if * there are empty items, like "(Lower)", with indices less than * insertIndex. */ void TransliteratorIDParser::instantiateList(UVector& list, UErrorCode& ec) { … } /** * Parse an ID into pieces. Take IDs of the form T, T/V, S-T, * S-T/V, or S/V-T. If the source is missing, return a source of * ANY. * @param id the id string, in any of several forms * @return an array of 4 strings: source, target, variant, and * isSourcePresent. If the source is not present, ANY will be * given as the source, and isSourcePresent will be nullptr. Otherwise * isSourcePresent will be non-nullptr. The target may be empty if the * id is not well-formed. The variant may be empty. */ void TransliteratorIDParser::IDtoSTV(const UnicodeString& id, UnicodeString& source, UnicodeString& target, UnicodeString& variant, UBool& isSourcePresent) { … } /** * Given source, target, and variant strings, concatenate them into a * full ID. If the source is empty, then "Any" will be used for the * source, so the ID will always be of the form s-t/v or s-t. */ void TransliteratorIDParser::STVtoID(const UnicodeString& source, const UnicodeString& target, const UnicodeString& variant, UnicodeString& id) { … } /** * Register two targets as being inverses of one another. For * example, calling registerSpecialInverse("NFC", "NFD", true) causes * Transliterator to form the following inverse relationships: * * <pre>NFC => NFD * Any-NFC => Any-NFD * NFD => NFC * Any-NFD => Any-NFC</pre> * * (Without the special inverse registration, the inverse of NFC * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but * that the presence or absence of "Any-" is preserved. * * <p>The relationship is symmetrical; registering (a, b) is * equivalent to registering (b, a). * * <p>The relevant IDs must still be registered separately as * factories or classes. * * <p>Only the targets are specified. Special inverses always * have the form Any-Target1 <=> Any-Target2. The target should * have canonical casing (the casing desired to be produced when * an inverse is formed) and should contain no whitespace or other * extraneous characters. * * @param target the target against which to register the inverse * @param inverseTarget the inverse of target, that is * Any-target.getInverse() => Any-inverseTarget * @param bidirectional if true, register the reverse relation * as well, that is, Any-inverseTarget.getInverse() => Any-target */ void TransliteratorIDParser::registerSpecialInverse(const UnicodeString& target, const UnicodeString& inverseTarget, UBool bidirectional, UErrorCode &status) { … } //---------------------------------------------------------------- // Private implementation //---------------------------------------------------------------- /** * Parse an ID into component pieces. Take IDs of the form T, * T/V, S-T, S-T/V, or S/V-T. If the source is missing, return a * source of ANY. * @param id the id string, in any of several forms * @param pos INPUT-OUTPUT parameter. On input, pos is the * offset of the first character to parse in id. On output, * pos is the offset after the last parsed character. If the * parse failed, pos will be unchanged. * @param allowFilter2 if true, a UnicodeSet pattern is allowed * at any location between specs or delimiters, and is returned * as the fifth string in the array. * @return a Specs object, or nullptr if the parse failed. If * neither source nor target was seen in the parsed id, then the * parse fails. If allowFilter is true, then the parsed filter * pattern is returned in the Specs object, otherwise the returned * filter reference is nullptr. If the parse fails for any reason * nullptr is returned. */ TransliteratorIDParser::Specs* TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos, UBool allowFilter) { … } /** * Givens a Spec object, convert it to a SingleID object. The * Spec object is a more unprocessed parse result. The SingleID * object contains information about canonical and basic IDs. * @return a SingleID; never returns nullptr. Returned object always * has 'filter' field of nullptr. */ TransliteratorIDParser::SingleID* TransliteratorIDParser::specsToID(const Specs* specs, int32_t dir) { … } /** * Given a Specs object, return a SingleID representing the * special inverse of that ID. If there is no special inverse * then return nullptr. * @return a SingleID or nullptr. Returned object always has * 'filter' field of nullptr. */ TransliteratorIDParser::SingleID* TransliteratorIDParser::specsToSpecialInverse(const Specs& specs, UErrorCode &status) { … } /** * Glue method to get around access problems in C++. This would * ideally be inline but we want to avoid a circular header * dependency. */ Transliterator* TransliteratorIDParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) { … } /** * Initialize static memory. Called through umtx_initOnce only. */ void U_CALLCONV TransliteratorIDParser::init(UErrorCode &status) { … } /** * Free static memory. */ void TransliteratorIDParser::cleanup() { … } U_NAMESPACE_END #endif /* #if !UCONFIG_NO_TRANSLITERATION */ //eof