// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2009-2013, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: normalizer2.h * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2009nov22 * created by: Markus W. Scherer */ #ifndef __NORMALIZER2_H__ #define __NORMALIZER2_H__ /** * \file * \brief C++ API: New API for Unicode Normalization. */ #include "unicode/utypes.h" #if U_SHOW_CPLUSPLUS_API #if !UCONFIG_NO_NORMALIZATION #include "unicode/stringpiece.h" #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/unorm2.h" U_NAMESPACE_BEGIN class ByteSink; /** * Unicode normalization functionality for standard Unicode normalization or * for using custom mapping tables. * All instances of this class are unmodifiable/immutable. * Instances returned by getInstance() are singletons that must not be deleted by the caller. * The Normalizer2 class is not intended for public subclassing. * * The primary functions are to produce a normalized string and to detect whether * a string is already normalized. * The most commonly used normalization forms are those defined in * http://www.unicode.org/unicode/reports/tr15/ * However, this API supports additional normalization forms for specialized purposes. * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) * and can be used in implementations of UTS #46. * * Not only are the standard compose and decompose modes supplied, * but additional modes are provided as documented in the Mode enum. * * Some of the functions in this class identify normalization boundaries. * At a normalization boundary, the portions of the string * before it and starting from it do not interact and can be handled independently. * * The spanQuickCheckYes() stops at a normalization boundary. * When the goal is a normalized string, then the text before the boundary * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). * * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether * a character is guaranteed to be at a normalization boundary, * regardless of context. * This is used for moving from one normalization boundary to the next * or preceding boundary, and for performing iterative normalization. * * Iterative normalization is useful when only a small portion of a * longer string needs to be processed. * For example, in ICU, iterative normalization is used by the NormalizationTransliterator * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() * (to process only the substring for which sort key bytes are computed). * * The set of normalization boundaries returned by these functions may not be * complete: There may be more boundaries that could be returned. * Different functions may return different boundaries. * @stable ICU 4.4 */ class U_COMMON_API Normalizer2 : public UObject { … }; /** * Normalization filtered by a UnicodeSet. * Normalizes portions of the text contained in the filter set and leaves * portions not contained in the filter set unchanged. * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). * Not-in-the-filter text is treated as "is normalized" and "quick check yes". * This class implements all of (and only) the Normalizer2 API. * An instance of this class is unmodifiable/immutable but is constructed and * must be destructed by the owner. * @stable ICU 4.4 */ class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { … }; U_NAMESPACE_END #endif // !UCONFIG_NO_NORMALIZATION #endif /* U_SHOW_CPLUSPLUS_API */ #endif // __NORMALIZER2_H__