// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * Copyright (C) 1996-2016, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** */ /** * \file * \brief C++ API: Collation Service. */ /** * File coll.h * * Created by: Helena Shih * * Modification History: * * Date Name Description * 02/5/97 aliu Modified createDefault to load collation data from * binary files when possible. Added related methods * createCollationFromFile, chopLocale, createPathName. * 02/11/97 aliu Added members addToCache, findInCache, and fgCache. * 02/12/97 aliu Modified to create objects from RuleBasedCollator cache. * Moved cache out of Collation class. * 02/13/97 aliu Moved several methods out of this class and into * RuleBasedCollator, with modifications. Modified * createDefault() to call new RuleBasedCollator(Locale&) * constructor. General clean up and documentation. * 02/20/97 helena Added clone, operator==, operator!=, operator=, copy * constructor and getDynamicClassID. * 03/25/97 helena Updated with platform independent data types. * 05/06/97 helena Added memory allocation error detection. * 06/20/97 helena Java class name change. * 09/03/97 helena Added createCollationKeyValues(). * 02/10/98 damiba Added compare() with length as parameter. * 04/23/99 stephen Removed EDecompositionMode, merged with * Normalizer::EMode. * 11/02/99 helena Collator performance enhancements. Eliminates the * UnicodeString construction and special case for NO_OP. * 11/23/99 srl More performance enhancements. Inlining of * critical accessors. * 05/15/00 helena Added version information API. * 01/29/01 synwee Modified into a C++ wrapper which calls C apis * (ucol.h). * 2012-2014 markus Rewritten in C++ again. */ #ifndef COLL_H #define COLL_H #include "unicode/utypes.h" #if U_SHOW_CPLUSPLUS_API #if !UCONFIG_NO_COLLATION #include "unicode/uobject.h" #include "unicode/ucol.h" #include "unicode/unorm.h" #include "unicode/locid.h" #include "unicode/uniset.h" #include "unicode/umisc.h" #include "unicode/uiter.h" #include "unicode/stringpiece.h" U_NAMESPACE_BEGIN class StringEnumeration; #if !UCONFIG_NO_SERVICE /** * @stable ICU 2.6 */ class CollatorFactory; #endif /** * @stable ICU 2.0 */ class CollationKey; /** * The <code>Collator</code> class performs locale-sensitive string * comparison.<br> * You use this class to build searching and sorting routines for natural * language text. * <p> * <code>Collator</code> is an abstract base class. Subclasses implement * specific collation strategies. One subclass, * <code>RuleBasedCollator</code>, is currently provided and is applicable * to a wide set of languages. Other subclasses may be created to handle more * specialized needs. * <p> * Like other locale-sensitive classes, you can use the static factory method, * <code>createInstance</code>, to obtain the appropriate * <code>Collator</code> object for a given locale. You will only need to * look at the subclasses of <code>Collator</code> if you need to * understand the details of a particular collation strategy or if you need to * modify that strategy. * <p> * The following example shows how to compare two strings using the * <code>Collator</code> for the default locale. * \htmlonly<blockquote>\endhtmlonly * <pre> * \code * // Compare two strings in the default locale * UErrorCode success = U_ZERO_ERROR; * Collator* myCollator = Collator::createInstance(success); * if (myCollator->compare("abc", "ABC") < 0) * cout << "abc is less than ABC" << endl; * else * cout << "abc is greater than or equal to ABC" << endl; * \endcode * </pre> * \htmlonly</blockquote>\endhtmlonly * <p> * You can set a <code>Collator</code>'s <em>strength</em> attribute to * determine the level of difference considered significant in comparisons. * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>. * The exact assignment of strengths to language features is locale dependent. * For example, in Czech, "e" and "f" are considered primary differences, * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary * differences and "e" and "e" are identical. The following shows how both case * and accents could be ignored for US English. * \htmlonly<blockquote>\endhtmlonly * <pre> * \code * //Get the Collator for US English and set its strength to PRIMARY * UErrorCode success = U_ZERO_ERROR; * Collator* usCollator = Collator::createInstance(Locale::getUS(), success); * usCollator->setStrength(Collator::PRIMARY); * if (usCollator->compare("abc", "ABC") == 0) * cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl; * \endcode * </pre> * \htmlonly</blockquote>\endhtmlonly * * The <code>getSortKey</code> methods * convert a string to a series of bytes that can be compared bitwise against * other sort keys using <code>strcmp()</code>. Sort keys are written as * zero-terminated byte strings. * * Another set of APIs returns a <code>CollationKey</code> object that wraps * the sort key bytes instead of returning the bytes themselves. * </p> * <p> * <strong>Note:</strong> <code>Collator</code>s with different Locale, * and CollationStrength settings will return different sort * orders for the same set of strings. Locales have specific collation rules, * and the way in which secondary and tertiary differences are taken into * account, for example, will result in a different sorting order for same * strings. * </p> * @see RuleBasedCollator * @see CollationKey * @see CollationElementIterator * @see Locale * @see Normalizer2 * @version 2.0 11/15/01 */ class U_I18N_API Collator : public UObject { … }; #if !UCONFIG_NO_SERVICE /** * A factory, used with registerFactory, the creates multiple collators and provides * display names for them. A factory supports some number of locales-- these are the * locales for which it can create collators. The factory can be visible, in which * case the supported locales will be enumerated by getAvailableLocales, or invisible, * in which they are not. Invisible locales are still supported, they are just not * listed by getAvailableLocales. * <p> * If standard locale display names are sufficient, Collator instances can * be registered using registerInstance instead.</p> * <p> * Note: if the collators are to be used from C APIs, they must be instances * of RuleBasedCollator.</p> * * @stable ICU 2.6 */ class U_I18N_API CollatorFactory : public UObject { … }; #endif /* UCONFIG_NO_SERVICE */ // Collator inline methods ----------------------------------------------- U_NAMESPACE_END #endif /* #if !UCONFIG_NO_COLLATION */ #endif /* U_SHOW_CPLUSPLUS_API */ #endif