chromium/third_party/blink/renderer/platform/fonts/script_run_iterator_test.cc

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/351564777): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "third_party/blink/renderer/platform/fonts/script_run_iterator.h"

#include "base/logging.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
#include "third_party/blink/renderer/platform/wtf/threading.h"

namespace blink {

struct ScriptTestRun {};

struct ScriptExpectedRun {};

std::ostream& operator<<(std::ostream& output, const ScriptExpectedRun& run) {}

class MockScriptData : public ScriptData {};

static const int kLatin2 =;
static const int kHan2 =;
static const int kGreek2 =;
static const int kLatin3 =;
static const int kHan3 =;
static const int kGreek3 =;
const int MockScriptData::kTable[] =;

class ScriptRunIteratorTest : public testing::Test {};

TEST_F(ScriptRunIteratorTest, Empty) {}

// Some of our compilers cannot initialize a vector from an array yet.
#define DECLARE_SCRIPT_RUNSVECTOR(...)

#define CHECK_SCRIPT_RUNS(...)

#define CHECK_MOCK_SCRIPT_RUNS(...)

TEST_F(ScriptRunIteratorTest, Whitespace) {}

TEST_F(ScriptRunIteratorTest, Common) {}

TEST_F(ScriptRunIteratorTest, CombiningCircle) {}

TEST_F(ScriptRunIteratorTest, Latin) {}

TEST_F(ScriptRunIteratorTest, Chinese) {}

struct JapaneseMixedScript {} japanese_mixed_scripts[] =;

class JapaneseMixedScriptTest
    : public ScriptRunIteratorTest,
      public testing::WithParamInterface<JapaneseMixedScript> {};

INSTANTIATE_TEST_SUITE_P();

TEST_P(JapaneseMixedScriptTest, Data) {}

// Close bracket without matching open is ignored
TEST_F(ScriptRunIteratorTest, UnbalancedParens1) {}

// Open bracket without matching close is popped when inside
// matching close brackets, so doesn't match later close.
TEST_F(ScriptRunIteratorTest, UnbalancedParens2) {}

// space goes with leading script
TEST_F(ScriptRunIteratorTest, LatinHan) {}

// space goes with leading script
TEST_F(ScriptRunIteratorTest, HanLatin) {}

TEST_F(ScriptRunIteratorTest, ParenEmptyParen) {}

TEST_F(ScriptRunIteratorTest, ParenChineseParen) {}

TEST_F(ScriptRunIteratorTest, ParenLatinParen) {}

// open paren gets leading script
TEST_F(ScriptRunIteratorTest, LatinParenChineseParen) {}

// open paren gets first trailing script if no leading script
TEST_F(ScriptRunIteratorTest, ParenChineseParenLatin) {}

// leading common and open paren get first trailing script.
// TODO(dougfelt): we don't do quote matching, but probably should figure out
// something better then doing nothing.
TEST_F(ScriptRunIteratorTest, QuoteParenChineseParenLatinQuote) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens1) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens2) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens3) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens4) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens5) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens6) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens7) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens8) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens9) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens10) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParensLatin1) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParensLatin2) {}

TEST_F(ScriptRunIteratorTest, CJKConsecutiveParensLatin3) {}

// Emojies are resolved to the leading script.
TEST_F(ScriptRunIteratorTest, EmojiCommon) {}

// Unmatched close brace gets leading context
TEST_F(ScriptRunIteratorTest, UnmatchedClose) {}

// Match up to 32 bracket pairs
TEST_F(ScriptRunIteratorTest, Match32Brackets) {}

// Matches 32 most recent bracket pairs. More than that, and we revert to
// surrounding script.
TEST_F(ScriptRunIteratorTest, Match32MostRecentBrackets) {}

// A char with multiple scripts that match both leading and trailing context
// gets the leading context.
TEST_F(ScriptRunIteratorTest, ExtensionsPreferLeadingContext) {}

// A char with multiple scripts that only match trailing context gets the
// trailing context.
TEST_F(ScriptRunIteratorTest, ExtensionsMatchTrailingContext) {}

// Retain first established priority script.  <lhg><gh> produce the script <gh>
// with g as priority, because of the two priority scripts l and g, only g
// remains.  Then <gh><hgl> retains g as priority, because of the two priority
// scripts g and h that remain, g was encountered first.
TEST_F(ScriptRunIteratorTest, ExtensionsRetainFirstPriorityScript) {}

// Parens can have scripts that break script runs.
TEST_F(ScriptRunIteratorTest, ExtensionsParens) {}

// The close paren might be encountered before we've established the open
// paren's script, but when this is the case the current set is still valid, so
// this doesn't affect it nor break the run.
TEST_F(ScriptRunIteratorTest, ExtensionsParens2) {}

// A common script with a single extension should be treated as common, but
// with the extended script as a default.  If we encounter anything other than
// common, that takes priority.  If we encounter other common scripts with a
// single extension, the current priority remains.
TEST_F(ScriptRunIteratorTest, CommonWithPriority) {}

TEST_F(ScriptRunIteratorTest, CommonWithPriority2) {}

TEST_F(ScriptRunIteratorTest, CommonWithPriority3) {}

// UDatta (\xE0\xA5\x91) is inherited with LATIN, DEVANAGARI, BENGALI and
// other Indic scripts. Since it has LATIN, and the
// dotted circle U+25CC (\xE2\x97\x8C) is COMMON and has adopted the
// preceding LATIN, it gets the LATIN. This is standard.
TEST_F(ScriptRunIteratorTest, LatinDottedCircleUdatta) {}

// In this situation, UDatta U+0951 (\xE0\xA5\x91) doesn't share a script
// with the value inherited by the dotted circle U+25CC (\xE2\x97\x8C).
// It captures the preceding dotted circle and breaks it from the run it would
// normally have been in. U+0951 is used in multiple scripts (DEVA, BENG, LATN,
// etc) and has multiple values for Script_Extension property. At the moment,
// getScripts() treats the script with the lowest script code as 'true' primary,
// and BENG comes before DEVA in the script enum so that we get BENGALI.
// Taking into account a Unicode block and returning DEVANAGARI would be
// slightly better.
TEST_F(ScriptRunIteratorTest, HanDottedCircleUdatta) {}

// Tatweel is \xD9\x80 Lm, Fathatan is \xD9\x8B Mn. The script of tatweel is
// common, that of Fathatan is inherited.  The script extensions for Fathatan
// are Arabic and Syriac. The Syriac script is 34 in ICU, Arabic is 2. So the
// preferred script for Fathatan is Arabic, according to Behdad's
// heuristic. This is exactly analogous to the Udatta tests above, except
// Tatweel is Lm. But we don't take properties into account, only scripts.
TEST_F(ScriptRunIteratorTest, LatinTatweelFathatan) {}

// Another case where if the mark accepts a script that was inherited by the
// preceding common-script character, they both continue in that script.
// SYRIAC LETTER NUN \xDC\xA2
// ARABIC TATWEEL \xD9\x80
// ARABIC FATHATAN \xD9\x82
TEST_F(ScriptRunIteratorTest, SyriacTatweelFathatan) {}

// The Udatta (\xE0\xA5\x91) is inherited, so will share runs with anything that
// is not common.
TEST_F(ScriptRunIteratorTest, HanUdatta) {}

// The Udatta U+0951 (\xE0\xA5\x91) is inherited, and will capture the space
// and turn it into Bengali because SCRIPT_BENAGLI is 4 and SCRIPT_DEVANAGARI
// is 10. See TODO comment for |getScripts| and HanDottedCircleUdatta.
TEST_F(ScriptRunIteratorTest, HanSpaceUdatta) {}

// Corresponds to one test in RunSegmenter, where orientation of the
// space character is sidesways in vertical.
TEST_F(ScriptRunIteratorTest, Hangul) {}

// Corresponds to one test in RunSegmenter, which tests that the punctuation
// characters mixed in are actually sideways in vertical. The ScriptIterator
// should report one run, but the RunSegmenter should report three, with the
// middle one rotated sideways.
TEST_F(ScriptRunIteratorTest, HiraganaMixedPunctuation) {}

// Make sure Mock code works too.
TEST_F(ScriptRunIteratorTest, MockHanInheritedGL) {}

TEST_F(ScriptRunIteratorTest, MockHanCommonInheritedGL) {}

// Leading inherited just act like common, except there's no preferred script.
TEST_F(ScriptRunIteratorTest, MockLeadingInherited) {}

// Leading inherited just act like common, except there's no preferred script.
TEST_F(ScriptRunIteratorTest, MockLeadingInherited2) {}

TEST_F(ScriptRunIteratorTest, LeadingInheritedHan) {}

TEST_F(ScriptRunIteratorTest, LeadingInheritedHan2) {}

TEST_F(ScriptRunIteratorTest, OddLatinString) {}

TEST_F(ScriptRunIteratorTest, CommonMalayalam) {}

class ScriptRunIteratorICUDataTest : public testing::Test {};

// Validate that ICU never returns more than our maximum expected number of
// script extensions.
TEST_F(ScriptRunIteratorICUDataTest, ValidateICUMaxScriptExtensions) {}

// Check that ICUScriptData returns all of a character's scripts.
// This only checks one likely character, but doesn't check all cases.
TEST_F(ScriptRunIteratorICUDataTest, ICUDataGetScriptsReturnsAllExtensions) {}

TEST_F(ScriptRunIteratorICUDataTest, CommonHaveNoMoreThanOneExtension) {}

// ZWJ is \u200D Cf (Format, other) and its script is inherited.  I'm going to
// ignore this for now, as I think it shouldn't matter which run it ends up
// in. HarfBuzz needs to be able to use it as context and shape each
// neighboring character appropriately no matter what run it got assigned to.

}  // namespace blink