/* * Copyright (C) 2007 Apple Computer, Inc. * * Portions are Copyright (C) 1998 Netscape Communications Corporation. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * Alternatively, the contents of this file may be used under the terms * of either the Mozilla Public License Version 1.1, found at * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html * (the "GPL"), in which case the provisions of the MPL or the GPL are * applicable instead of those above. If you wish to allow use of your * version of this file only under the terms of one of those two * licenses (the MPL or the GPL) and not to allow others to use your * version of this file under the LGPL, indicate your decision by * deletingthe provisions above and replace them with the notice and * other provisions required by the MPL or the GPL, as the case may be. * If you do not delete the provisions above, a recipient may use your * version of this file under any of the LGPL, the MPL or the GPL. */ #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/351564777): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif #include "third_party/blink/renderer/platform/text/unicode_range.h" namespace blink { /********************************************************************** * Unicode subranges as defined in unicode 3.0 * x-western, x-central-euro, tr, x-baltic -> latin * 0000 - 036f * 1e00 - 1eff * 2000 - 206f (general punctuation) * 20a0 - 20cf (currency symbols) * 2100 - 214f (letterlike symbols) * 2150 - 218f (Number Forms) * el -> greek * 0370 - 03ff * 1f00 - 1fff * x-cyrillic -> cyrillic * 0400 - 04ff * he -> hebrew * 0590 - 05ff * ar -> arabic * 0600 - 06ff * fb50 - fdff (arabic presentation forms) * fe70 - feff (arabic presentation forms b) * th - thai * 0e00 - 0e7f * ko -> korean * ac00 - d7af (hangul Syllables) * 1100 - 11ff (jamo) * 3130 - 318f (hangul compatibility jamo) * ja * 3040 - 309f (hiragana) * 30a0 - 30ff (katakana) * zh-CN * zh-TW * * CJK * 3100 - 312f (bopomofo) * 31a0 - 31bf (bopomofo extended) * 3000 - 303f (CJK Symbols and Punctuation) * 2e80 - 2eff (CJK radicals supplement) * 2f00 - 2fdf (Kangxi Radicals) * 2ff0 - 2fff (Ideographic Description Characters) * 3190 - 319f (kanbun) * 3200 - 32ff (Enclosed CJK letters and Months) * 3300 - 33ff (CJK compatibility) * 3400 - 4dbf (CJK Unified Ideographs Extension A) * 4e00 - 9faf (CJK Unified Ideographs) * f900 - fa5f (CJK Compatibility Ideographs) * fe30 - fe4f (CJK compatibility Forms) * ff00 - ffef (halfwidth and fullwidth forms) * * Armenian * 0530 - 058f * Sriac * 0700 - 074f * Thaana * 0780 - 07bf * Devanagari * 0900 - 097f * Bengali * 0980 - 09ff * Gurmukhi * 0a00 - 0a7f * Gujarati * 0a80 - 0aff * Oriya * 0b00 - 0b7f * Tamil * 0b80 - 0bff * Telugu * 0c00 - 0c7f * Kannada * 0c80 - 0cff * Malayalam * 0d00 - 0d7f * Sinhala * 0d80 - 0def * Lao * 0e80 - 0eff * Tibetan * 0f00 - 0fbf * Myanmar * 1000 - 109f * Georgian * 10a0 - 10ff * Ethiopic * 1200 - 137f * Cherokee * 13a0 - 13ff * Canadian Aboriginal Syllabics * 1400 - 167f * Ogham * 1680 - 169f * Runic * 16a0 - 16ff * Khmer * 1780 - 17ff * Mongolian * 1800 - 18af * Misc - superscripts and subscripts * 2070 - 209f * Misc - Combining Diacritical Marks for Symbols * 20d0 - 20ff * Misc - Arrows * 2190 - 21ff * Misc - Mathematical Operators * 2200 - 22ff * Misc - Miscellaneous Technical * 2300 - 23ff * Misc - Control picture * 2400 - 243f * Misc - Optical character recognition * 2440 - 2450 * Misc - Enclose Alphanumerics * 2460 - 24ff * Misc - Box Drawing * 2500 - 257f * Misc - Block Elements * 2580 - 259f * Misc - Geometric Shapes * 25a0 - 25ff * Misc - Miscellaneous Symbols * 2600 - 267f * Misc - Dingbats * 2700 - 27bf * Misc - Braille Patterns * 2800 - 28ff * Yi Syllables * a000 - a48f * Yi radicals * a490 - a4cf * Alphabetic Presentation Forms * fb00 - fb4f * Misc - Combining half Marks * fe20 - fe2f * Misc - small form variants * fe50 - fe6f * Misc - Specials * fff0 - ffff *********************************************************************/ static const unsigned kCNumSubTables = …; static const unsigned kCSubTableSize = …; static const unsigned char kGUnicodeSubrangeTable[kCNumSubTables][kCSubTableSize] = …; // Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80) // code points so that the number of entries in the tertiary range // table for that range is obtained by dividing (0x1700 - 0x0700) by 128. // Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal // syllabaries take multiple chunks and Ogham and Runic share a single chunk. static const unsigned kCTertiaryTableSize = …; static const unsigned char kGUnicodeTertiaryRangeTable[kCTertiaryTableSize] = …; // A two level index is almost enough for locating a range, with the // exception of u03xx and u05xx. Since we don't really care about range for // combining diacritical marks in our font application, they are // not discriminated further. Future adoption of this method for other use // should be aware of this limitation. The implementation can be extended if // there is such a need. // For Indic, Southeast Asian scripts and some other scripts between // U+0700 and U+16FF, it's extended to the third level. unsigned FindCharUnicodeRange(UChar32 ch) { … } } // namespace blink