chariter.h | Explore in Territory

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************
*
*   Copyright (C) 1997-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
********************************************************************
*/

#ifndef CHARITER_H
#define CHARITER_H

#include "unicode/utypes.h"

#if U_SHOW_CPLUSPLUS_API

#include "unicode/uobject.h"
#include "unicode/unistr.h"
/**
 * \file
 * \brief C++ API: Character Iterator
 */
 
U_NAMESPACE_BEGIN
/**
 * Abstract class that defines an API for forward-only iteration
 * on text objects.
 * This is a minimal interface for iteration without random access
 * or backwards iteration. It is especially useful for wrapping
 * streams with converters into an object for collation or
 * normalization.
 *
 * <p>Characters can be accessed in two ways: as code units or as
 * code points.
 * Unicode code points are 21-bit integers and are the scalar values
 * of Unicode characters. ICU uses the type UChar32 for them.
 * Unicode code units are the storage units of a given
 * Unicode/UCS Transformation Format (a character encoding scheme).
 * With UTF-16, all code points can be represented with either one
 * or two code units ("surrogates").
 * String storage is typically based on code units, while properties
 * of characters are typically determined using code point values.
 * Some processes may be designed to work with sequences of code units,
 * or it may be known that all characters that are important to an
 * algorithm can be represented with single code units.
 * Other processes will need to use the code point access functions.</p>
 *
 * <p>ForwardCharacterIterator provides nextPostInc() to access
 * a code unit and advance an internal position into the text object,
 * similar to a <code>return text[position++]</code>.<br>
 * It provides next32PostInc() to access a code point and advance an internal
 * position.</p>
 *
 * <p>next32PostInc() assumes that the current position is that of
 * the beginning of a code point, i.e., of its first code unit.
 * After next32PostInc(), this will be true again.
 * In general, access to code units and code points in the same
 * iteration loop should not be mixed. In UTF-16, if the current position
 * is on a second code unit (Low Surrogate), then only that code unit
 * is returned even by next32PostInc().</p>
 *
 * <p>For iteration with either function, there are two ways to
 * check for the end of the iteration. When there are no more
 * characters in the text object:
 * <ul>
 * <li>The hasNext() function returns false.</li>
 * <li>nextPostInc() and next32PostInc() return DONE
 *     when one attempts to read beyond the end of the text object.</li>
 * </ul>
 *
 * Example:
 * \code 
 * void function1(ForwardCharacterIterator &it) {
 *     UChar32 c;
 *     while(it.hasNext()) {
 *         c=it.next32PostInc();
 *         // use c
 *     }
 * }
 *
 * void function1(ForwardCharacterIterator &it) {
 *     char16_t c;
 *     while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
 *         // use c
 *      }
 *  }
 * \endcode
 * </p>
 *
 * @stable ICU 2.0
 */
class U_COMMON_API ForwardCharacterIterator : public UObject { … };

/**
 * Abstract class that defines an API for iteration
 * on text objects.
 * This is an interface for forward and backward iteration
 * and random access into a text object.
 *
 * <p>The API provides backward compatibility to the Java and older ICU
 * CharacterIterator classes but extends them significantly:
 * <ol>
 * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
 * <li>While the old API functions provided forward iteration with
 *     "pre-increment" semantics, the new one also provides functions
 *     with "post-increment" semantics. They are more efficient and should
 *     be the preferred iterator functions for new implementations.
 *     The backward iteration always had "pre-decrement" semantics, which
 *     are efficient.</li>
 * <li>Just like ForwardCharacterIterator, it provides access to
 *     both code units and code points. Code point access versions are available
 *     for the old and the new iteration semantics.</li>
 * <li>There are new functions for setting and moving the current position
 *     without returning a character, for efficiency.</li>
 * </ol>
 *
 * See ForwardCharacterIterator for examples for using the new forward iteration
 * functions. For backward iteration, there is also a hasPrevious() function
 * that can be used analogously to hasNext().
 * The old functions work as before and are shown below.</p>
 *
 * <p>Examples for some of the new functions:</p>
 *
 * Forward iteration with hasNext():
 * \code
 * void forward1(CharacterIterator &it) {
 *     UChar32 c;
 *     for(it.setToStart(); it.hasNext();) {
 *         c=it.next32PostInc();
 *         // use c
 *     }
 *  }
 * \endcode
 * Forward iteration more similar to loops with the old forward iteration,
 * showing a way to convert simple for() loops:
 * \code
 * void forward2(CharacterIterator &it) {
 *     char16_t c;
 *     for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
 *          // use c
 *      }
 * }
 * \endcode
 * Backward iteration with setToEnd() and hasPrevious():
 * \code
 *  void backward1(CharacterIterator &it) {
 *      UChar32 c;
 *      for(it.setToEnd(); it.hasPrevious();) {
 *         c=it.previous32();
 *          // use c
 *      }
 *  }
 * \endcode
 * Backward iteration with a more traditional for() loop:
 * \code
 * void backward2(CharacterIterator &it) {
 *     char16_t c;
 *     for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
 *         // use c
 *      }
 *  }
 * \endcode
 *
 * Example for random access:
 * \code
 *  void random(CharacterIterator &it) {
 *      // set to the third code point from the beginning
 *      it.move32(3, CharacterIterator::kStart);
 *      // get a code point from here without moving the position
 *      UChar32 c=it.current32();
 *      // get the position
 *      int32_t pos=it.getIndex();
 *      // get the previous code unit
 *      char16_t u=it.previous();
 *      // move back one more code unit
 *      it.move(-1, CharacterIterator::kCurrent);
 *      // set the position back to where it was
 *      // and read the same code point c and move beyond it
 *      it.setIndex(pos);
 *      if(c!=it.next32PostInc()) {
 *          exit(1); // CharacterIterator inconsistent
 *      }
 *  }
 * \endcode
 *
 * <p>Examples, especially for the old API:</p>
 *
 * Function processing characters, in this example simple output
 * <pre>
 * \code
 *  void processChar( char16_t c )
 *  {
 *      cout << " " << c;
 *  }
 * \endcode
 * </pre>
 * Traverse the text from start to finish
 * <pre> 
 * \code
 *  void traverseForward(CharacterIterator& iter)
 *  {
 *      for(char16_t c = iter.first(); c != CharacterIterator::DONE; c = iter.next()) {
 *          processChar(c);
 *      }
 *  }
 * \endcode
 * </pre>
 * Traverse the text backwards, from end to start
 * <pre>
 * \code
 *  void traverseBackward(CharacterIterator& iter)
 *  {
 *      for(char16_t c = iter.last(); c != CharacterIterator::DONE; c = iter.previous()) {
 *          processChar(c);
 *      }
 *  }
 * \endcode
 * </pre>
 * Traverse both forward and backward from a given position in the text. 
 * Calls to notBoundary() in this example represents some additional stopping criteria.
 * <pre>
 * \code
 * void traverseOut(CharacterIterator& iter, int32_t pos)
 * {
 *      char16_t c;
 *      for (c = iter.setIndex(pos);
 *      c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
 *          c = iter.next()) {}
 *      int32_t end = iter.getIndex();
 *      for (c = iter.setIndex(pos);
 *          c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
 *          c = iter.previous()) {}
 *      int32_t start = iter.getIndex() + 1;
 *  
 *      cout << "start: " << start << " end: " << end << endl;
 *      for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
 *          processChar(c);
 *     }
 *  }
 * \endcode
 * </pre>
 * Creating a StringCharacterIterator and calling the test functions
 * <pre>
 * \code
 *  void CharacterIterator_Example( void )
 *   {
 *       cout << endl << "===== CharacterIterator_Example: =====" << endl;
 *       UnicodeString text("Ein kleiner Satz.");
 *       StringCharacterIterator iterator(text);
 *       cout << "----- traverseForward: -----------" << endl;
 *       traverseForward( iterator );
 *       cout << endl << endl << "----- traverseBackward: ----------" << endl;
 *       traverseBackward( iterator );
 *       cout << endl << endl << "----- traverseOut: ---------------" << endl;
 *       traverseOut( iterator, 7 );
 *       cout << endl << endl << "-----" << endl;
 *   }
 * \endcode
 * </pre>
 *
 * @stable ICU 2.0
 */
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator { … };

inline bool
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const { … }

inline int32_t
CharacterIterator::setToStart() { … }

inline int32_t
CharacterIterator::setToEnd() { … }

inline int32_t
CharacterIterator::startIndex() const { … }

inline int32_t
CharacterIterator::endIndex() const { … }

inline int32_t
CharacterIterator::getIndex() const { … }

inline int32_t
CharacterIterator::getLength() const { … }

U_NAMESPACE_END

#endif /* U_SHOW_CPLUSPLUS_API */

#endif
godot/thirdparty/icu4c/common/unicode/chariter.h