spellcheck_worditerator_unittest.cc

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "components/spellcheck/renderer/spellcheck_worditerator.h"

#include <stddef.h>

#include <string>
#include <vector>

#include "base/format_macros.h"
#include "base/i18n/break_iterator.h"
#include "base/strings/string_split.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"

BreakIterator;
WordIteratorStatus;

namespace {

struct TestCase { … };

std::u16string GetRulesForLanguage(const std::string& language) { … }

WordIteratorStatus GetNextNonSkippableWord(SpellcheckWordIterator* iterator,
                                           std::u16string* word_string,
                                           size_t* word_start,
                                           size_t* word_length) { … }

}  // namespace

// Tests whether or not our SpellcheckWordIterator can extract words used by the
// specified language from a multi-language text.
TEST(SpellcheckWordIteratorTest, SplitWord) { … }

// Tests whether our SpellcheckWordIterator extracts an empty word without
// getting stuck in an infinite loop when inputting a Khmer text. (This is a
// regression test for Issue 46278.)
TEST(SpellcheckWordIteratorTest, RuleSetConsistency) { … }

// Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters
// on LTR languages. On the other hand, it should not treat ASCII numbers as
// word characters on RTL languages because they change the text direction from
// RTL to LTR.
TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { … }

// Verify SpellcheckWordIterator treats typographical apostrophe as a part of
// the word.
TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) { … }

TEST(SpellcheckWordIteratorTest, Initialization) { … }

// This test uses English rules to check that different character set
// combinations properly find word breaks and skippable characters.
TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) { … }

// This test uses Russian rules to check that different character set
// combinations properly find word breaks and skippable characters.
TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) { … }

// This test uses Khmer rules to check that different character set combinations
// properly find word breaks and skippable characters. Khmer does not use spaces
// between words and uses a dictionary to determine word breaks instead.
TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) { … }

TEST(SpellcheckCharAttributeTest, IsTextInSameScript) { … }
chromium/components/spellcheck/renderer/spellcheck_worditerator_unittest.cc