// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2004-2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uregex.cpp */ #include "unicode/utypes.h" #if !UCONFIG_NO_REGULAR_EXPRESSIONS #include "unicode/regex.h" #include "unicode/uregex.h" #include "unicode/unistr.h" #include "unicode/ustring.h" #include "unicode/uchar.h" #include "unicode/uobject.h" #include "unicode/utf16.h" #include "cmemory.h" #include "uassert.h" #include "uhash.h" #include "umutex.h" #include "uvectr32.h" #include "regextxt.h" U_NAMESPACE_BEGIN #define REMAINING_CAPACITY(idx,len) … struct RegularExpression: public UMemory { … }; static const int32_t REXP_MAGIC = …; // "rexp" in ASCII RegularExpression::RegularExpression() { … } RegularExpression::~RegularExpression() { … } U_NAMESPACE_END U_NAMESPACE_USE //---------------------------------------------------------------------------------------- // // validateRE Do boilerplate style checks on API function parameters. // Return true if they look OK. //---------------------------------------------------------------------------------------- static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) { … } //---------------------------------------------------------------------------------------- // // uregex_open // //---------------------------------------------------------------------------------------- U_CAPI URegularExpression * U_EXPORT2 uregex_open( const char16_t *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status) { … } //---------------------------------------------------------------------------------------- // // uregex_openUText // //---------------------------------------------------------------------------------------- U_CAPI URegularExpression * U_EXPORT2 uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status) { … } //---------------------------------------------------------------------------------------- // // uregex_close // //---------------------------------------------------------------------------------------- U_CAPI void U_EXPORT2 uregex_close(URegularExpression *re2) { … } //---------------------------------------------------------------------------------------- // // uregex_clone // //---------------------------------------------------------------------------------------- U_CAPI URegularExpression * U_EXPORT2 uregex_clone(const URegularExpression *source2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_pattern // //------------------------------------------------------------------------------ U_CAPI const char16_t * U_EXPORT2 uregex_pattern(const URegularExpression *regexp2, int32_t *patLength, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_patternUText // //------------------------------------------------------------------------------ U_CAPI UText * U_EXPORT2 uregex_patternUText(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_flags // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_flags(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setText // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setText(URegularExpression *regexp2, const char16_t *text, int32_t textLength, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setUText // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setUText(URegularExpression *regexp2, UText *text, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_getText // //------------------------------------------------------------------------------ U_CAPI const char16_t * U_EXPORT2 uregex_getText(URegularExpression *regexp2, int32_t *textLength, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_getUText // //------------------------------------------------------------------------------ U_CAPI UText * U_EXPORT2 uregex_getUText(URegularExpression *regexp2, UText *dest, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_refreshUText // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_refreshUText(URegularExpression *regexp2, UText *text, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_matches // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_matches(URegularExpression *regexp2, int32_t startIndex, UErrorCode *status) { … } U_CAPI UBool U_EXPORT2 uregex_matches64(URegularExpression *regexp2, int64_t startIndex, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_lookingAt // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_lookingAt(URegularExpression *regexp2, int32_t startIndex, UErrorCode *status) { … } U_CAPI UBool U_EXPORT2 uregex_lookingAt64(URegularExpression *regexp2, int64_t startIndex, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_find // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_find(URegularExpression *regexp2, int32_t startIndex, UErrorCode *status) { … } U_CAPI UBool U_EXPORT2 uregex_find64(URegularExpression *regexp2, int64_t startIndex, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_findNext // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_findNext(URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_groupCount // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_groupCount(URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_groupNumberFromName // //------------------------------------------------------------------------------ int32_t uregex_groupNumberFromName(URegularExpression *regexp2, const char16_t *groupName, int32_t nameLength, UErrorCode *status) { … } int32_t uregex_groupNumberFromCName(URegularExpression *regexp2, const char *groupName, int32_t nameLength, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_group // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_group(URegularExpression *regexp2, int32_t groupNum, char16_t *dest, int32_t destCapacity, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_groupUText // //------------------------------------------------------------------------------ U_CAPI UText * U_EXPORT2 uregex_groupUText(URegularExpression *regexp2, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_start // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_start(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { … } U_CAPI int64_t U_EXPORT2 uregex_start64(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_end // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_end(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { … } U_CAPI int64_t U_EXPORT2 uregex_end64(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_reset // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_reset(URegularExpression *regexp2, int32_t index, UErrorCode *status) { … } U_CAPI void U_EXPORT2 uregex_reset64(URegularExpression *regexp2, int64_t index, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setRegion // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setRegion(URegularExpression *regexp2, int32_t regionStart, int32_t regionLimit, UErrorCode *status) { … } U_CAPI void U_EXPORT2 uregex_setRegion64(URegularExpression *regexp2, int64_t regionStart, int64_t regionLimit, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setRegionAndStart // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setRegionAndStart(URegularExpression *regexp2, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_regionStart // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_regionStart(const URegularExpression *regexp2, UErrorCode *status) { … } U_CAPI int64_t U_EXPORT2 uregex_regionStart64(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_regionEnd // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_regionEnd(const URegularExpression *regexp2, UErrorCode *status) { … } U_CAPI int64_t U_EXPORT2 uregex_regionEnd64(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_hasTransparentBounds // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_hasTransparentBounds(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_useTransparentBounds // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_useTransparentBounds(URegularExpression *regexp2, UBool b, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_hasAnchoringBounds // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_hasAnchoringBounds(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_useAnchoringBounds // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_useAnchoringBounds(URegularExpression *regexp2, UBool b, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_hitEnd // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_hitEnd(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_requireEnd // //------------------------------------------------------------------------------ U_CAPI UBool U_EXPORT2 uregex_requireEnd(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setTimeLimit // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setTimeLimit(URegularExpression *regexp2, int32_t limit, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_getTimeLimit // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_getTimeLimit(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setStackLimit // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setStackLimit(URegularExpression *regexp2, int32_t limit, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_getStackLimit // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_getStackLimit(const URegularExpression *regexp2, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setMatchCallback // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setMatchCallback(URegularExpression *regexp2, URegexMatchCallback *callback, const void *context, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_getMatchCallback // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_getMatchCallback(const URegularExpression *regexp2, URegexMatchCallback **callback, const void **context, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_setMatchProgressCallback // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_setFindProgressCallback(URegularExpression *regexp2, URegexFindProgressCallback *callback, const void *context, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_getMatchCallback // //------------------------------------------------------------------------------ U_CAPI void U_EXPORT2 uregex_getFindProgressCallback(const URegularExpression *regexp2, URegexFindProgressCallback **callback, const void **context, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_replaceAll // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_replaceAll(URegularExpression *regexp2, const char16_t *replacementText, int32_t replacementLength, char16_t *destBuf, int32_t destCapacity, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_replaceAllUText // //------------------------------------------------------------------------------ U_CAPI UText * U_EXPORT2 uregex_replaceAllUText(URegularExpression *regexp2, UText *replacementText, UText *dest, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_replaceFirst // //------------------------------------------------------------------------------ U_CAPI int32_t U_EXPORT2 uregex_replaceFirst(URegularExpression *regexp2, const char16_t *replacementText, int32_t replacementLength, char16_t *destBuf, int32_t destCapacity, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_replaceFirstUText // //------------------------------------------------------------------------------ U_CAPI UText * U_EXPORT2 uregex_replaceFirstUText(URegularExpression *regexp2, UText *replacementText, UText *dest, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_appendReplacement // //------------------------------------------------------------------------------ U_NAMESPACE_BEGIN // // Dummy class, because these functions need to be friends of class RegexMatcher, // and stand-alone C functions don't work as friends // class RegexCImpl { … }; U_NAMESPACE_END static const char16_t BACKSLASH = …; static const char16_t DOLLARSIGN = …; static const char16_t LEFTBRACKET = …; static const char16_t RIGHTBRACKET = …; // // Move a character to an output buffer, with bounds checking on the index. // Index advances even if capacity is exceeded, for preflight size computations. // This little sequence is used a LOT. // static inline void appendToBuf(char16_t c, int32_t *idx, char16_t *buf, int32_t bufCapacity) { … } // // appendReplacement, the actual implementation. // int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, const char16_t *replacementText, int32_t replacementLength, char16_t **destBuf, int32_t *destCapacity, UErrorCode *status) { … } // // appendReplacement the actual API function, // U_CAPI int32_t U_EXPORT2 uregex_appendReplacement(URegularExpression *regexp2, const char16_t *replacementText, int32_t replacementLength, char16_t **destBuf, int32_t *destCapacity, UErrorCode *status) { … } // // uregex_appendReplacementUText...can just use the normal C++ method // U_CAPI void U_EXPORT2 uregex_appendReplacementUText(URegularExpression *regexp2, UText *replText, UText *dest, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // uregex_appendTail // //------------------------------------------------------------------------------ int32_t RegexCImpl::appendTail(RegularExpression *regexp, char16_t **destBuf, int32_t *destCapacity, UErrorCode *status) { … } // // appendTail the actual API function // U_CAPI int32_t U_EXPORT2 uregex_appendTail(URegularExpression *regexp2, char16_t **destBuf, int32_t *destCapacity, UErrorCode *status) { … } // // uregex_appendTailUText...can just use the normal C++ method // U_CAPI UText * U_EXPORT2 uregex_appendTailUText(URegularExpression *regexp2, UText *dest, UErrorCode *status) { … } //------------------------------------------------------------------------------ // // copyString Internal utility to copy a string to an output buffer, // while managing buffer overflow and preflight size // computation. NUL termination is added to destination, // and the NUL is counted in the output size. // //------------------------------------------------------------------------------ #if 0 static void copyString(char16_t *destBuffer, // Destination buffer. int32_t destCapacity, // Total capacity of dest buffer int32_t *destIndex, // Index into dest buffer. Updated on return. // Update not clipped to destCapacity. const char16_t *srcPtr, // Pointer to source string int32_t srcLen) // Source string len. { int32_t si; int32_t di = *destIndex; char16_t c; for (si=0; si<srcLen; si++) { c = srcPtr[si]; if (di < destCapacity) { destBuffer[di] = c; di++; } else { di += srcLen - si; break; } } if (di<destCapacity) { destBuffer[di] = 0; } di++; *destIndex = di; } #endif //------------------------------------------------------------------------------ // // uregex_split // //------------------------------------------------------------------------------ int32_t RegexCImpl::split(RegularExpression *regexp, char16_t *destBuf, int32_t destCapacity, int32_t *requiredCapacity, char16_t *destFields[], int32_t destFieldsCapacity, UErrorCode *status) { … } // // uregex_split The actual API function // U_CAPI int32_t U_EXPORT2 uregex_split(URegularExpression *regexp2, char16_t *destBuf, int32_t destCapacity, int32_t *requiredCapacity, char16_t *destFields[], int32_t destFieldsCapacity, UErrorCode *status) { … } // // uregex_splitUText...can just use the normal C++ method // U_CAPI int32_t U_EXPORT2 uregex_splitUText(URegularExpression *regexp2, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status) { … } #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS