// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 1999-2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: uniset_props.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2004aug25 * created by: Markus W. Scherer * * Character property dependent functions moved here from uniset.cpp */ #include "unicode/utypes.h" #include "unicode/uniset.h" #include "unicode/parsepos.h" #include "unicode/uchar.h" #include "unicode/uscript.h" #include "unicode/symtable.h" #include "unicode/uset.h" #include "unicode/locid.h" #include "unicode/brkiter.h" #include "uset_imp.h" #include "ruleiter.h" #include "cmemory.h" #include "ucln_cmn.h" #include "util.h" #include "uvector.h" #include "uprops.h" #include "propname.h" #include "normalizer2impl.h" #include "uinvchar.h" #include "uprops.h" #include "charstr.h" #include "cstring.h" #include "mutex.h" #include "umutex.h" #include "uassert.h" #include "hash.h" U_NAMESPACE_USE // Special property set IDs static const char ANY[] = …; // [\u0000-\U0010FFFF] static const char ASCII[] = …; // [\u0000-\u007F] static const char ASSIGNED[] = …; // [:^Cn:] // Unicode name property alias #define NAME_PROP … #define NAME_PROP_LENGTH … // Cached sets ------------------------------------------------------------- *** U_CDECL_BEGIN static UBool U_CALLCONV uset_cleanup(); static UnicodeSet *uni32Singleton; static icu::UInitOnce uni32InitOnce { … }; /** * Cleanup function for UnicodeSet */ static UBool U_CALLCONV uset_cleanup() { … } U_CDECL_END U_NAMESPACE_BEGIN namespace { // Cache some sets for other services -------------------------------------- *** void U_CALLCONV createUni32Set(UErrorCode &errorCode) { … } U_CFUNC UnicodeSet * uniset_getUnicode32Instance(UErrorCode &errorCode) { … } // helper functions for matching of pattern syntax pieces ------------------ *** // these functions are parallel to the PERL_OPEN etc. strings above // using these functions is not only faster than UnicodeString::compare() and // caseCompare(), but they also make UnicodeSet work for simple patterns when // no Unicode properties data is available - when caseCompare() fails inline UBool isPerlOpen(const UnicodeString &pattern, int32_t pos) { … } /*static inline UBool isPerlClose(const UnicodeString &pattern, int32_t pos) { return pattern.charAt(pos)==u'}'; }*/ inline UBool isNameOpen(const UnicodeString &pattern, int32_t pos) { … } inline UBool isPOSIXOpen(const UnicodeString &pattern, int32_t pos) { … } /*static inline UBool isPOSIXClose(const UnicodeString &pattern, int32_t pos) { return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']'; }*/ // TODO memory debugging provided inside uniset.cpp // could be made available here but probably obsolete with use of modern // memory leak checker tools #define _dbgct(me) … } // namespace //---------------------------------------------------------------- // Constructors &c //---------------------------------------------------------------- /** * Constructs a set from the given pattern, optionally ignoring * white space. See the class description for the syntax of the * pattern language. * @param pattern a string specifying what characters are in the set */ UnicodeSet::UnicodeSet(const UnicodeString& pattern, UErrorCode& status) { … } //---------------------------------------------------------------- // Public API //---------------------------------------------------------------- UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, UErrorCode& status) { … } void UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, ParsePosition& pos, const SymbolTable* symbols, UErrorCode& status) { … } /** * Return true if the given position, in the given pattern, appears * to be the start of a UnicodeSet pattern. */ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { … } //---------------------------------------------------------------- // Implementation: Pattern parsing //---------------------------------------------------------------- namespace { /** * A small all-inline class to manage a UnicodeSet pointer. Add * operator->() etc. as needed. */ class UnicodeSetPointer { … }; constexpr int32_t MAX_DEPTH = …; } // namespace /** * Parse the pattern from the given RuleCharacterIterator. The * iterator is advanced over the parsed pattern. * @param chars iterator over the pattern characters. Upon return * it will be advanced to the first character after the parsed * pattern, or the end of the iteration if all characters are * parsed. * @param symbols symbol table to use to parse and dereference * variables, or null if none. * @param rebuiltPat the pattern that was parsed, rebuilt or * copied from the input pattern, as appropriate. * @param options a bit mask of zero or more of the following: * IGNORE_SPACE, CASE. */ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, const SymbolTable* symbols, UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), int32_t depth, UErrorCode& ec) { … } //---------------------------------------------------------------- // Property set implementation //---------------------------------------------------------------- namespace { UBool numericValueFilter(UChar32 ch, void* context) { … } UBool generalCategoryMaskFilter(UChar32 ch, void* context) { … } UBool versionFilter(UChar32 ch, void* context) { … } IntPropertyContext; UBool intPropertyFilter(UChar32 ch, void* context) { … } UBool scriptExtensionsFilter(UChar32 ch, void* context) { … } UBool idTypeFilter(UChar32 ch, void* context) { … } } // namespace /** * Generic filter-based scanning code for UCD property UnicodeSets. */ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, void* context, const UnicodeSet* inclusions, UErrorCode &status) { … } namespace { UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { … } } // namespace //---------------------------------------------------------------- // Property set API //---------------------------------------------------------------- #define FAIL(ec) … UnicodeSet& UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { … } UnicodeSet& UnicodeSet::applyPropertyAlias(const UnicodeString& prop, const UnicodeString& value, UErrorCode& ec) { … } //---------------------------------------------------------------- // Property set patterns //---------------------------------------------------------------- /** * Return true if the given position, in the given pattern, appears * to be the start of a property set pattern. */ UBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern, int32_t pos) { … } /** * Return true if the given iterator appears to point at a * property pattern. Regardless of the result, return with the * iterator unchanged. * @param chars iterator over the pattern characters. Upon return * it will be unchanged. * @param iterOpts RuleCharacterIterator options */ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars, int32_t iterOpts) { … } /** * Parse the given property pattern at the given parse position. */ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, ParsePosition& ppos, UErrorCode &ec) { … } /** * Parse a property pattern. * @param chars iterator over the pattern characters. Upon return * it will be advanced to the first character after the parsed * pattern, or the end of the iteration if all characters are * parsed. * @param rebuiltPat the pattern that was parsed, rebuilt or * copied from the input pattern, as appropriate. */ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, UnicodeString& rebuiltPat, UErrorCode& ec) { … } U_NAMESPACE_END