/* * Copyright (C) 2006 Lars Knoll <[email protected]> * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/351564777): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_TEXT_BREAK_ITERATOR_H_ #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_TEXT_BREAK_ITERATOR_H_ #include <unicode/brkiter.h> #include <memory> #include <type_traits> #include "base/check_op.h" #include "base/containers/span.h" #include "third_party/blink/renderer/platform/platform_export.h" #include "third_party/blink/renderer/platform/text/character.h" #include "third_party/blink/renderer/platform/text/layout_locale.h" #include "third_party/blink/renderer/platform/wtf/text/atomic_string.h" #include "third_party/blink/renderer/platform/wtf/text/character_names.h" #include "third_party/blink/renderer/platform/wtf/text/wtf_uchar.h" namespace blink { TextBreakIterator; struct PLATFORM_EXPORT ReturnBreakIteratorToPool { … }; // // LineBreakIterator is stocked in a pool to save the construction time. // `PooledBreakIterator`, when destructed, returns the instance to the pool // instead of deleting it. // PooledBreakIterator; // // Returns a new instance from a pool, or create a new one if the pool is empty. // PLATFORM_EXPORT PooledBreakIterator AcquireLineBreakIterator(StringView, const AtomicString& locale); // Note: The returned iterator is good only until you get another iterator, with // the exception of acquireLineBreakIterator. // This is similar to character break iterator in most cases, but is subject to // platform UI conventions. One notable example where this can be different // from character break iterator is Thai prepend characters, see bug 24342. // Use this for insertion point and selection manipulations. PLATFORM_EXPORT TextBreakIterator* CursorMovementIterator( base::span<const UChar>); PLATFORM_EXPORT TextBreakIterator* WordBreakIterator(const String&, int start, int length); PLATFORM_EXPORT TextBreakIterator* WordBreakIterator(base::span<const UChar>); PLATFORM_EXPORT TextBreakIterator* SentenceBreakIterator( base::span<const UChar>); // Before calling this, check if the iterator is not at the end. Otherwise, // it may not work as expected. // See https://ssl.icu-project.org/trac/ticket/13447 . PLATFORM_EXPORT bool IsWordTextBreak(TextBreakIterator*); const int kTextBreakDone = …; // A Unicode Line Break Word Identifier (key "lw".) // https://www.unicode.org/reports/tr35/#UnicodeLineBreakWordIdentifier enum class LineBreakType : uint8_t { … }; // Determines break opportunities around collapsible space characters (space, // newline, and tabulation characters.) enum class BreakSpaceType : uint8_t { … }; PLATFORM_EXPORT std::ostream& operator<<(std::ostream&, LineBreakType); PLATFORM_EXPORT std::ostream& operator<<(std::ostream&, BreakSpaceType); class PLATFORM_EXPORT LazyLineBreakIterator final { … }; inline const AtomicString& LazyLineBreakIterator::LocaleWithKeyword() const { … } inline void LazyLineBreakIterator::InvalidateLocaleWithKeyword() { … } inline void LazyLineBreakIterator::SetBreakType(LineBreakType break_type) { … } inline void LazyLineBreakIterator::SetStrictness( LineBreakStrictness strictness) { … } // Iterates over "extended grapheme clusters", as defined in UAX #29. // Note that platform implementations may be less sophisticated - e.g. ICU prior // to version 4.0 only supports "legacy grapheme clusters". Use this for // general text processing, e.g. string truncation. class PLATFORM_EXPORT NonSharedCharacterBreakIterator final { … }; // Counts the number of grapheme clusters. A surrogate pair or a sequence // of a non-combining character and following combining characters is // counted as 1 grapheme cluster. PLATFORM_EXPORT unsigned NumGraphemeClusters(const String&); // Returns the number of code units that the next grapheme cluster is made of. PLATFORM_EXPORT unsigned LengthOfGraphemeCluster(const String&, unsigned = 0); // Returns a list of graphemes cluster at each character using character break // rules. PLATFORM_EXPORT void GraphemesClusterList(const StringView& text, Vector<unsigned>* graphemes); } // namespace blink #endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_TEXT_BREAK_ITERATOR_H_