/* * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All * rights reserved. * Copyright (C) 2005 Alexey Proskuryakov. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/351564777): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif #include "third_party/blink/renderer/platform/text/unicode_utilities.h" #include <unicode/normalizer2.h> #include <unicode/utf16.h> #include "third_party/blink/renderer/platform/wtf/text/character_names.h" #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h" namespace blink { enum VoicedSoundMarkType { … }; template <typename CharType> static inline CharType FoldQuoteMarkOrSoftHyphen(CharType c) { … } void FoldQuoteMarksAndSoftHyphens(UChar* data, size_t length) { … } void FoldQuoteMarksAndSoftHyphens(String& s) { … } static bool IsNonLatin1Separator(UChar32 character) { … } bool IsSeparator(UChar32 character) { … } bool ContainsOnlySeparatorsOrEmpty(const String& pattern) { … } // ICU's search ignores the distinction between small kana letters and ones // that are not small, and also characters that differ only in the voicing // marks when considering only primary collation strength differences. // This is not helpful for end users, since these differences make words // distinct, so for our purposes we need these to be considered. // The Unicode folks do not think the collation algorithm should be // changed. To work around this, we would like to tailor the ICU searcher, // but we can't get that to work yet. So instead, we check for cases where // these differences occur, and skip those matches. // We refer to the above technique as the "kana workaround". The next few // functions are helper functinos for the kana workaround. bool IsKanaLetter(UChar character) { … } bool IsSmallKanaLetter(UChar character) { … } static inline VoicedSoundMarkType ComposedVoicedSoundMark(UChar character) { … } static inline bool IsCombiningVoicedSoundMark(UChar character) { … } bool ContainsKanaLetters(const String& pattern) { … } void NormalizeCharactersIntoNFCForm(const UChar* characters, unsigned length, Vector<UChar>& buffer) { … } // This function returns kNotFound if |first| and |second| contain different // Kana letters. If |first| and |second| contain the same Kana letter then // function returns offset in characters from |first|. // Pointers to both strings increase simultaneously so so it is possible to use // one offset value. static inline size_t CompareKanaLetterAndComposedVoicedSoundMarks( const UChar* first, const UChar* first_end, const UChar* second, const UChar* second_end) { … } bool CheckOnlyKanaLettersInStrings(const UChar* first_data, unsigned first_length, const UChar* second_data, unsigned second_length) { … } bool CheckKanaStringsEqual(const UChar* first_data, unsigned first_length, const UChar* second_data, unsigned second_length) { … } } // namespace blink