chromium/third_party/blink/renderer/platform/text/unicode_utilities.cc

/*
 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All
 * rights reserved.
 * Copyright (C) 2005 Alexey Proskuryakov.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/351564777): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "third_party/blink/renderer/platform/text/unicode_utilities.h"

#include <unicode/normalizer2.h>
#include <unicode/utf16.h>

#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
#include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"

namespace blink {

enum VoicedSoundMarkType {};

template <typename CharType>
static inline CharType FoldQuoteMarkOrSoftHyphen(CharType c) {}

void FoldQuoteMarksAndSoftHyphens(UChar* data, size_t length) {}

void FoldQuoteMarksAndSoftHyphens(String& s) {}

static bool IsNonLatin1Separator(UChar32 character) {}

bool IsSeparator(UChar32 character) {}

bool ContainsOnlySeparatorsOrEmpty(const String& pattern) {}

// ICU's search ignores the distinction between small kana letters and ones
// that are not small, and also characters that differ only in the voicing
// marks when considering only primary collation strength differences.
// This is not helpful for end users, since these differences make words
// distinct, so for our purposes we need these to be considered.
// The Unicode folks do not think the collation algorithm should be
// changed. To work around this, we would like to tailor the ICU searcher,
// but we can't get that to work yet. So instead, we check for cases where
// these differences occur, and skip those matches.

// We refer to the above technique as the "kana workaround". The next few
// functions are helper functinos for the kana workaround.

bool IsKanaLetter(UChar character) {}

bool IsSmallKanaLetter(UChar character) {}

static inline VoicedSoundMarkType ComposedVoicedSoundMark(UChar character) {}

static inline bool IsCombiningVoicedSoundMark(UChar character) {}

bool ContainsKanaLetters(const String& pattern) {}

void NormalizeCharactersIntoNFCForm(const UChar* characters,
                                    unsigned length,
                                    Vector<UChar>& buffer) {}

// This function returns kNotFound if |first| and |second| contain different
// Kana letters.  If |first| and |second| contain the same Kana letter then
// function returns offset in characters from |first|.
// Pointers to both strings increase simultaneously so so it is possible to use
// one offset value.
static inline size_t CompareKanaLetterAndComposedVoicedSoundMarks(
    const UChar* first,
    const UChar* first_end,
    const UChar* second,
    const UChar* second_end) {}

bool CheckOnlyKanaLettersInStrings(const UChar* first_data,
                                   unsigned first_length,
                                   const UChar* second_data,
                                   unsigned second_length) {}

bool CheckKanaStringsEqual(const UChar* first_data,
                           unsigned first_length,
                           const UChar* second_data,
                           unsigned second_length) {}

}  // namespace blink