// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // Copyright (C) 2002-2015 International Business Machines Corporation // and others. All rights reserved. // // file: regeximp.h // // ICU Regular Expressions, // Definitions of constant values used in the compiled form of // a regular expression pattern. // #ifndef _REGEXIMP_H #define _REGEXIMP_H #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/uniset.h" #include "unicode/utext.h" #include "cmemory.h" #include "ucase.h" U_NAMESPACE_BEGIN // For debugging, define REGEX_DEBUG // To define with configure, // CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux #ifdef REGEX_DEBUG // // debugging options. Enable one or more of the three #defines immediately following // //#define REGEX_SCAN_DEBUG #define REGEX_DUMP_DEBUG #define REGEX_RUN_DEBUG // End of #defines intended to be directly set. #include <stdio.h> #endif #ifdef REGEX_SCAN_DEBUG #define REGEX_SCAN_DEBUG_PRINTF … #else #define REGEX_SCAN_DEBUG_PRINTF(a) … #endif // // Opcode types In the compiled form of the regexp, these are the type, or opcodes, // of the entries. // enum { … }; // Keep this list of opcode names in sync with the above enum // Used for debug printing only. #define URX_OPCODE_NAMES … // // Convenience macros for assembling and disassembling a compiled operation. // #define URX_TYPE(x) … #define URX_VAL(x) … // // Access to Unicode Sets composite character properties // The sets are accessed by the match engine for things like \w (word boundary) // enum { … }; // // Match Engine State Stack Frame Layout. // struct REStackFrame { … }; // number of UVector elements in the header #define RESTACKFRAME_HDRCOUNT … // // Start-Of-Match type. Used by find() to quickly scan to positions where a // match might start before firing up the full match engine. // enum StartOfMatch { … }; #define START_OF_MATCH_STR(v) … // // 8 bit set, to fast-path latin-1 set membership tests. // struct Regex8BitSet : public UMemory { … }; inline Regex8BitSet::Regex8BitSet() { … } inline UBool Regex8BitSet::contains(UChar32 c) { … } inline void Regex8BitSet::add(UChar32 c) { … } inline void Regex8BitSet::init(const UnicodeSet *s) { … } inline void Regex8BitSet::operator = (const Regex8BitSet &s) { … } // Case folded UText Iterator helper class. // Wraps a UText, provides a case-folded enumeration over its contents. // Used in implementing case insensitive matching constructs. // Implementation in rematch.cpp class CaseFoldingUTextIterator: public UMemory { … }; // Case folded char16_t * string iterator. // Wraps a char16_t *, provides a case-folded enumeration over its contents. // Used in implementing case insensitive matching constructs. // Implementation in rematch.cpp class CaseFoldingUCharIterator: public UMemory { … }; U_NAMESPACE_END #endif