// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (C) 2001-2015 IBM and others. All rights reserved. ********************************************************************** * Date Name Description * 08/13/2001 synwee Creation. ********************************************************************** */ #ifndef USRCHIMP_H #define USRCHIMP_H #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "unicode/normalizer2.h" #include "unicode/ucol.h" #include "unicode/ucoleitr.h" #include "unicode/ubrk.h" /* mask off anything but primary order */ #define UCOL_PRIMARYORDERMASK … /* mask off anything but secondary order */ #define UCOL_SECONDARYORDERMASK … /* mask off anything but tertiary order */ #define UCOL_TERTIARYORDERMASK … /* primary order shift */ #define UCOL_PRIMARYORDERSHIFT … /* secondary order shift */ #define UCOL_SECONDARYORDERSHIFT … #define UCOL_IGNORABLE … /* get weights from a CE */ #define UCOL_PRIMARYORDER(order) … #define UCOL_SECONDARYORDER(order) … #define UCOL_TERTIARYORDER(order) … #define UCOL_CONTINUATION_MARKER … #define isContinuation(CE) … /** * This indicates an error has occurred during processing or there are no more CEs * to be returned. */ #define UCOL_PROCESSED_NULLORDER … U_NAMESPACE_BEGIN class CollationElementIterator; class Collator; struct PCEI { … }; struct PCEBuffer { … }; class UCollationPCE : public UMemory { … }; U_NAMESPACE_END #define INITIAL_ARRAY_SIZE_ … struct USearch { … }; struct UPattern { … }; struct UStringSearch { … }; /** * Exact matches without checking for the ends for extra accents. * The match after the position within the collation element iterator is to be * found. * After a match is found the offset in the collation element iterator will be * shifted to the start of the match. * Implementation note: * For tertiary we can't use the collator->tertiaryMask, that is a * preprocessed mask that takes into account case options. since we are only * concerned with exact matches, we don't need that. * Alternate handling - since only the 16 most significant digits is only used, * we can safely do a compare without masking if the ce is a variable, we mask * and get only the primary values no shifting to quartenary is required since * all primary values less than variabletop will need to be masked off anyway. * If the end character is composite and the pattern ce does not match the text * ce, we skip it until we find a match in the end composite character or when * it has passed the character. This is so that we can match pattern "a" with * the text "\u00e6" * @param strsrch string search data * @param status error status if any * @return true if an exact match is found, false otherwise */ U_CFUNC UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); /** * Canonical matches. * According to the definition, matches found here will include the whole span * of beginning and ending accents if it overlaps that region. * @param strsrch string search data * @param status error status if any * @return true if a canonical match is found, false otherwise */ U_CFUNC UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); /** * Gets the previous match. * Comments follows from handleNextExact * @param strsrch string search data * @param status error status if any * @return True if a exact math is found, false otherwise. */ U_CFUNC UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); /** * Canonical matches. * According to the definition, matches found here will include the whole span * of beginning and ending accents if it overlaps that region. * @param strsrch string search data * @param status error status if any * @return true if a canonical match is found, false otherwise */ U_CFUNC UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, UErrorCode *status); #endif /* #if !UCONFIG_NO_COLLATION */ #endif