// Copyright 2015 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/351564777): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif #include "third_party/blink/renderer/platform/fonts/script_run_iterator.h" #include "base/logging.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/blink/renderer/platform/wtf/text/string_builder.h" #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h" #include "third_party/blink/renderer/platform/wtf/threading.h" namespace blink { struct ScriptTestRun { … }; struct ScriptExpectedRun { … }; std::ostream& operator<<(std::ostream& output, const ScriptExpectedRun& run) { … } class MockScriptData : public ScriptData { … }; static const int kLatin2 = …; static const int kHan2 = …; static const int kGreek2 = …; static const int kLatin3 = …; static const int kHan3 = …; static const int kGreek3 = …; const int MockScriptData::kTable[] = …; class ScriptRunIteratorTest : public testing::Test { … }; TEST_F(ScriptRunIteratorTest, Empty) { … } // Some of our compilers cannot initialize a vector from an array yet. #define DECLARE_SCRIPT_RUNSVECTOR(...) … #define CHECK_SCRIPT_RUNS(...) … #define CHECK_MOCK_SCRIPT_RUNS(...) … TEST_F(ScriptRunIteratorTest, Whitespace) { … } TEST_F(ScriptRunIteratorTest, Common) { … } TEST_F(ScriptRunIteratorTest, CombiningCircle) { … } TEST_F(ScriptRunIteratorTest, Latin) { … } TEST_F(ScriptRunIteratorTest, Chinese) { … } struct JapaneseMixedScript { … } japanese_mixed_scripts[] = …; class JapaneseMixedScriptTest : public ScriptRunIteratorTest, public testing::WithParamInterface<JapaneseMixedScript> { … }; INSTANTIATE_TEST_SUITE_P(…); TEST_P(JapaneseMixedScriptTest, Data) { … } // Close bracket without matching open is ignored TEST_F(ScriptRunIteratorTest, UnbalancedParens1) { … } // Open bracket without matching close is popped when inside // matching close brackets, so doesn't match later close. TEST_F(ScriptRunIteratorTest, UnbalancedParens2) { … } // space goes with leading script TEST_F(ScriptRunIteratorTest, LatinHan) { … } // space goes with leading script TEST_F(ScriptRunIteratorTest, HanLatin) { … } TEST_F(ScriptRunIteratorTest, ParenEmptyParen) { … } TEST_F(ScriptRunIteratorTest, ParenChineseParen) { … } TEST_F(ScriptRunIteratorTest, ParenLatinParen) { … } // open paren gets leading script TEST_F(ScriptRunIteratorTest, LatinParenChineseParen) { … } // open paren gets first trailing script if no leading script TEST_F(ScriptRunIteratorTest, ParenChineseParenLatin) { … } // leading common and open paren get first trailing script. // TODO(dougfelt): we don't do quote matching, but probably should figure out // something better then doing nothing. TEST_F(ScriptRunIteratorTest, QuoteParenChineseParenLatinQuote) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens1) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens2) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens3) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens4) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens5) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens6) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens7) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens8) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens9) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParens10) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParensLatin1) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParensLatin2) { … } TEST_F(ScriptRunIteratorTest, CJKConsecutiveParensLatin3) { … } // Emojies are resolved to the leading script. TEST_F(ScriptRunIteratorTest, EmojiCommon) { … } // Unmatched close brace gets leading context TEST_F(ScriptRunIteratorTest, UnmatchedClose) { … } // Match up to 32 bracket pairs TEST_F(ScriptRunIteratorTest, Match32Brackets) { … } // Matches 32 most recent bracket pairs. More than that, and we revert to // surrounding script. TEST_F(ScriptRunIteratorTest, Match32MostRecentBrackets) { … } // A char with multiple scripts that match both leading and trailing context // gets the leading context. TEST_F(ScriptRunIteratorTest, ExtensionsPreferLeadingContext) { … } // A char with multiple scripts that only match trailing context gets the // trailing context. TEST_F(ScriptRunIteratorTest, ExtensionsMatchTrailingContext) { … } // Retain first established priority script. <lhg><gh> produce the script <gh> // with g as priority, because of the two priority scripts l and g, only g // remains. Then <gh><hgl> retains g as priority, because of the two priority // scripts g and h that remain, g was encountered first. TEST_F(ScriptRunIteratorTest, ExtensionsRetainFirstPriorityScript) { … } // Parens can have scripts that break script runs. TEST_F(ScriptRunIteratorTest, ExtensionsParens) { … } // The close paren might be encountered before we've established the open // paren's script, but when this is the case the current set is still valid, so // this doesn't affect it nor break the run. TEST_F(ScriptRunIteratorTest, ExtensionsParens2) { … } // A common script with a single extension should be treated as common, but // with the extended script as a default. If we encounter anything other than // common, that takes priority. If we encounter other common scripts with a // single extension, the current priority remains. TEST_F(ScriptRunIteratorTest, CommonWithPriority) { … } TEST_F(ScriptRunIteratorTest, CommonWithPriority2) { … } TEST_F(ScriptRunIteratorTest, CommonWithPriority3) { … } // UDatta (\xE0\xA5\x91) is inherited with LATIN, DEVANAGARI, BENGALI and // other Indic scripts. Since it has LATIN, and the // dotted circle U+25CC (\xE2\x97\x8C) is COMMON and has adopted the // preceding LATIN, it gets the LATIN. This is standard. TEST_F(ScriptRunIteratorTest, LatinDottedCircleUdatta) { … } // In this situation, UDatta U+0951 (\xE0\xA5\x91) doesn't share a script // with the value inherited by the dotted circle U+25CC (\xE2\x97\x8C). // It captures the preceding dotted circle and breaks it from the run it would // normally have been in. U+0951 is used in multiple scripts (DEVA, BENG, LATN, // etc) and has multiple values for Script_Extension property. At the moment, // getScripts() treats the script with the lowest script code as 'true' primary, // and BENG comes before DEVA in the script enum so that we get BENGALI. // Taking into account a Unicode block and returning DEVANAGARI would be // slightly better. TEST_F(ScriptRunIteratorTest, HanDottedCircleUdatta) { … } // Tatweel is \xD9\x80 Lm, Fathatan is \xD9\x8B Mn. The script of tatweel is // common, that of Fathatan is inherited. The script extensions for Fathatan // are Arabic and Syriac. The Syriac script is 34 in ICU, Arabic is 2. So the // preferred script for Fathatan is Arabic, according to Behdad's // heuristic. This is exactly analogous to the Udatta tests above, except // Tatweel is Lm. But we don't take properties into account, only scripts. TEST_F(ScriptRunIteratorTest, LatinTatweelFathatan) { … } // Another case where if the mark accepts a script that was inherited by the // preceding common-script character, they both continue in that script. // SYRIAC LETTER NUN \xDC\xA2 // ARABIC TATWEEL \xD9\x80 // ARABIC FATHATAN \xD9\x82 TEST_F(ScriptRunIteratorTest, SyriacTatweelFathatan) { … } // The Udatta (\xE0\xA5\x91) is inherited, so will share runs with anything that // is not common. TEST_F(ScriptRunIteratorTest, HanUdatta) { … } // The Udatta U+0951 (\xE0\xA5\x91) is inherited, and will capture the space // and turn it into Bengali because SCRIPT_BENAGLI is 4 and SCRIPT_DEVANAGARI // is 10. See TODO comment for |getScripts| and HanDottedCircleUdatta. TEST_F(ScriptRunIteratorTest, HanSpaceUdatta) { … } // Corresponds to one test in RunSegmenter, where orientation of the // space character is sidesways in vertical. TEST_F(ScriptRunIteratorTest, Hangul) { … } // Corresponds to one test in RunSegmenter, which tests that the punctuation // characters mixed in are actually sideways in vertical. The ScriptIterator // should report one run, but the RunSegmenter should report three, with the // middle one rotated sideways. TEST_F(ScriptRunIteratorTest, HiraganaMixedPunctuation) { … } // Make sure Mock code works too. TEST_F(ScriptRunIteratorTest, MockHanInheritedGL) { … } TEST_F(ScriptRunIteratorTest, MockHanCommonInheritedGL) { … } // Leading inherited just act like common, except there's no preferred script. TEST_F(ScriptRunIteratorTest, MockLeadingInherited) { … } // Leading inherited just act like common, except there's no preferred script. TEST_F(ScriptRunIteratorTest, MockLeadingInherited2) { … } TEST_F(ScriptRunIteratorTest, LeadingInheritedHan) { … } TEST_F(ScriptRunIteratorTest, LeadingInheritedHan2) { … } TEST_F(ScriptRunIteratorTest, OddLatinString) { … } TEST_F(ScriptRunIteratorTest, CommonMalayalam) { … } class ScriptRunIteratorICUDataTest : public testing::Test { … }; // Validate that ICU never returns more than our maximum expected number of // script extensions. TEST_F(ScriptRunIteratorICUDataTest, ValidateICUMaxScriptExtensions) { … } // Check that ICUScriptData returns all of a character's scripts. // This only checks one likely character, but doesn't check all cases. TEST_F(ScriptRunIteratorICUDataTest, ICUDataGetScriptsReturnsAllExtensions) { … } TEST_F(ScriptRunIteratorICUDataTest, CommonHaveNoMoreThanOneExtension) { … } // ZWJ is \u200D Cf (Format, other) and its script is inherited. I'm going to // ignore this for now, as I think it shouldn't matter which run it ends up // in. HarfBuzz needs to be able to use it as context and shape each // neighboring character appropriately no matter what run it got assigned to. } // namespace blink