# Generates Rust tables that define Unicode "script classes" for the purposes
# of autohinting.
#
# For performance, we want to link various pieces of data by index. For ease of
# modification and to avoid errors, we want to define those links symbolically
# by name. Thus, this script exists which converts symbolic references to
# indices when generating code.
#
# The bottom of this file contains the Rust generation code.
#
# In relation to FreeType, this combines the AF_ScriptClass,
# AF_Script_UniRangeRec and AF_BlueStringset.
# Script definitions: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afscript.h
# Unicode ranges: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afranges.c
# Blues: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afblue.h
SCRIPT_CLASSES = [
{
"name": "Adlam",
"tag": "ADLM",
"hint_top_to_bottom": False,
"std_chars": ['𞤌', '𞤮'], # 𞤌 𞤮
"base_ranges": [
(0x1E900, 0x1E95F), # Adlam
],
"non_base_ranges": [
(0x1D944, 0x1E94A),
],
"blues": [
(['𞤌', '𞤅', '𞤈', '𞤏', '𞤔', '𞤚'], "LATIN_TOP"),
(['𞤂', '𞤖'], "0"),
(['𞤬', '𞤮', '𞤻', '𞤼', '𞤾'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['𞤤', '𞤨', '𞤩', '𞤭', '𞤴', '𞤸', '𞤺', '𞥀'], "0"),
],
},
{
"name": "Arabic",
"tag": "ARAB",
"hint_top_to_bottom": False,
"std_chars": ['ل', 'ح', 'ـ'], # ل ح ـ
"base_ranges": [
(0x0600, 0x06FF), # Arabic
(0x0750, 0x07FF), # Arabic Supplement
(0x08A0, 0x08FF), # Arabic Extended-A
(0xFB50, 0xFDFF), # Arabic Presentation Forms-A
(0xFE70, 0xFEFF), # Arabic Presentation Forms-B
(0x1EE00, 0x1EEFF), # Arabic Mathematical Alphabetic Symbols
],
"non_base_ranges": [
(0x0600, 0x0605),
(0x0610, 0x061A),
(0x064B, 0x065F),
(0x0670, 0x0670),
(0x06D6, 0x06DC),
(0x06DF, 0x06E4),
(0x06E7, 0x06E8),
(0x06EA, 0x06ED),
(0x08D4, 0x08E1),
(0x08D3, 0x08FF),
(0xFBB2, 0xFBC1),
(0xFE70, 0xFE70),
(0xFE72, 0xFE72),
(0xFE74, 0xFE74),
(0xFE76, 0xFE76),
(0xFE78, 0xFE78),
(0xFE7A, 0xFE7A),
(0xFE7C, 0xFE7C),
(0xFE7E, 0xFE7E),
],
"blues": [
(['ا', 'إ', 'ل', 'ك', 'ط', 'ظ'], "LATIN_TOP"),
(['ت', 'ث', 'ط', 'ظ', 'ك'], "0"),
(['ـ'], "LATIN_NEUTRAL"),
],
},
{
"name": "Armenian",
"tag": "ARMN",
"hint_top_to_bottom": False,
"std_chars": ['ս', 'Ս'], # ս Ս
"base_ranges": [
(0x0530, 0x058F), # Armenian
(0xFB13, 0xFB17), # Alphab. Present. Forms (Armenian)
],
"non_base_ranges": [
(0x0559, 0x055F),
],
"blues": [
(['Ա', 'Մ', 'Ւ', 'Ս', 'Բ', 'Գ', 'Դ', 'Օ'], "LATIN_TOP"),
(['Ւ', 'Ո', 'Դ', 'Ճ', 'Շ', 'Ս', 'Տ', 'Օ'], "0"),
(['ե', 'է', 'ի', 'մ', 'վ', 'ֆ', 'ճ'], "LATIN_TOP"),
(['ա', 'յ', 'ւ', 'ս', 'գ', 'շ', 'ր', 'օ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['հ', 'ո', 'ճ', 'ա', 'ե', 'ծ', 'ս', 'օ'], "0"),
(['բ', 'ը', 'ի', 'լ', 'ղ', 'պ', 'փ', 'ց'], "0"),
],
},
{
"name": "Avestan",
"tag": "AVST",
"hint_top_to_bottom": False,
"std_chars": ['𐬚'], # 𐬚
"base_ranges": [
(0x10B00, 0x10B3F), # Avestan
],
"non_base_ranges": [
(0x10B39, 0x10B3F),
],
"blues": [
(['𐬀', '𐬁', '𐬐', '𐬛'], "LATIN_TOP"),
(['𐬀', '𐬁'], "0"),
],
},
{
"name": "Bamum",
"tag": "BAMU",
"hint_top_to_bottom": False,
"std_chars": ['ꛁ', 'ꛯ'], # ꛁ ꛯ
"base_ranges": [
(0xA6A0, 0xA6FF), # Bamum
# https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afranges.c#L139
# "The characters in the Bamum supplement are pictograms, not (directly) related to the syllabic Bamum script"
# (0x16800, 0x16A3F), # Bamum Supplement
],
"non_base_ranges": [
(0xA6F0, 0xA6F1),
],
"blues": [
(['ꚧ', 'ꚨ', 'ꛛ', 'ꛉ', 'ꛁ', 'ꛈ', 'ꛫ', 'ꛯ'], "LATIN_TOP"),
(['ꚭ', 'ꚳ', 'ꚶ', 'ꛬ', 'ꚢ', 'ꚽ', 'ꛯ', '꛲'], "0"),
],
},
{
"name": "Bengali",
"tag": "BENG",
"hint_top_to_bottom": True,
"std_chars": ['০', '৪'], # ০ ৪
"base_ranges": [
(0x0980, 0x09FF), # Bengali
],
"non_base_ranges": [
(0x0981, 0x0981),
(0x09BC, 0x09BC),
(0x09C1, 0x09C4),
(0x09CD, 0x09CD),
(0x09E2, 0x09E3),
(0x09FE, 0x09FE),
],
"blues": [
(['ই', 'ট', 'ঠ', 'ি', 'ী', 'ৈ', 'ৗ'], "LATIN_TOP"),
(['ও', 'এ', 'ড', 'ত', 'ন', 'ব', 'ল', 'ক'], "LATIN_TOP"),
(['অ', 'ড', 'ত', 'ন', 'ব', 'ভ', 'ল', 'ক'], "LATIN_TOP | LATIN_NEUTRAL | LATIN_X_HEIGHT"),
(['অ', 'ড', 'ত', 'ন', 'ব', 'ভ', 'ল', 'ক'], "0"),
],
},
{
"name": "Buhid",
"tag": "BUHD",
"hint_top_to_bottom": False,
"std_chars": ['ᝋ', 'ᝏ'], # ᝋ ᝏ
"base_ranges": [
(0x1740, 0x175F), # Buhid
],
"non_base_ranges": [
(0x1752, 0x1753),
],
"blues": [
(['ᝐ', 'ᝈ'], "LATIN_TOP"),
(['ᝅ', 'ᝊ', 'ᝎ'], "LATIN_TOP"),
(['ᝂ', 'ᝃ', 'ᝉ', 'ᝌ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ᝀ', 'ᝃ', 'ᝆ', 'ᝉ', 'ᝋ', 'ᝏ', 'ᝑ'], "0"),
],
},
{
"name": "Chakma",
"tag": "CAKM",
"hint_top_to_bottom": False,
"std_chars": ['𑄤', '𑄉', '𑄛'], # 𑄤 𑄉 𑄛
"base_ranges": [
(0x11100, 0x1114F), # Chakma
],
"non_base_ranges": [
(0x11100, 0x11102),
(0x11127, 0x11134),
(0x11146, 0x11146),
],
"blues": [
(['𑄃', '𑄅', '𑄉', '𑄙', '𑄗'], "LATIN_TOP"),
(['𑄅', '𑄛', '𑄝', '𑄗', '𑄓'], "0"),
(['𑄖', '𑄘', '𑄙', '𑄤', '𑄥'], "0"),
],
},
{
"name": "Canadian Syllabics",
"tag": "CANS",
"hint_top_to_bottom": False,
"std_chars": ['ᑌ', 'ᓚ'], # ᑌ ᓚ
"base_ranges": [
(0x1400, 0x167F), # Unified Canadian Aboriginal Syllabics
(0x18B0, 0x18FF), # Unified Canadian Aboriginal Syllabics Extended
],
"non_base_ranges": [
],
"blues": [
(['ᗜ', 'ᖴ', 'ᐁ', 'ᒣ', 'ᑫ', 'ᑎ', 'ᔑ', 'ᗰ'], "LATIN_TOP"),
(['ᗶ', 'ᖵ', 'ᒧ', 'ᐃ', 'ᑌ', 'ᒍ', 'ᔑ', 'ᗢ'], "0"),
(['ᓓ', 'ᓕ', 'ᓀ', 'ᓂ', 'ᓄ', 'ᕄ', 'ᕆ', 'ᘣ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ᕃ', 'ᓂ', 'ᓀ', 'ᕂ', 'ᓗ', 'ᓚ', 'ᕆ', 'ᘣ'], "0"),
(['ᐪ', 'ᙆ', 'ᣘ', 'ᐢ', 'ᒾ', 'ᣗ', 'ᔆ'], "LATIN_TOP"),
(['ᙆ', 'ᗮ', 'ᒻ', 'ᐞ', 'ᔆ', 'ᒡ', 'ᒢ', 'ᓑ'], "0"),
],
},
{
"name": "Carian",
"tag": "CARI",
"hint_top_to_bottom": False,
"std_chars": ['𐊫', '𐋉'], # 𐊫 𐋉
"base_ranges": [
(0x102A0, 0x102DF), # Carian
],
"non_base_ranges": [
],
"blues": [
(['𐊧', '𐊫', '𐊬', '𐊭', '𐊱', '𐊺', '𐊼', '𐊿'], "LATIN_TOP"),
(['𐊣', '𐊧', '𐊷', '𐋀', '𐊫', '𐊸', '𐋉'], "0"),
],
},
{
"name": "Cherokee",
"tag": "CHER",
"hint_top_to_bottom": False,
"std_chars": ['Ꭴ', 'Ꮕ', 'ꮕ'], # Ꭴ Ꮕ ꮕ
"base_ranges": [
(0x13A0, 0x13FF), # Cherokee
(0xAB70, 0xABBF), # Cherokee Supplement
],
"non_base_ranges": [
],
"blues": [
(['Ꮖ', 'Ꮋ', 'Ꭼ', 'Ꮓ', 'Ꭴ', 'Ꮳ', 'Ꭶ', 'Ꮥ'], "LATIN_TOP"),
(['Ꮖ', 'Ꮋ', 'Ꭼ', 'Ꮓ', 'Ꭴ', 'Ꮳ', 'Ꭶ', 'Ꮥ'], "0"),
(['ꮒ', 'ꮤ', 'ꮶ', 'ꭴ', 'ꭾ', 'ꮗ', 'ꮝ', 'ꮿ'], "LATIN_TOP"),
(['ꮖ', 'ꭼ', 'ꮓ', 'ꮠ', 'ꮳ', 'ꭶ', 'ꮥ', 'ꮻ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ꮖ', 'ꭼ', 'ꮓ', 'ꮠ', 'ꮳ', 'ꭶ', 'ꮥ', 'ꮻ'], "0"),
(['ᏸ', 'ꮐ', 'ꭹ', 'ꭻ'], "0"),
],
},
{
"name": "Coptic",
"tag": "COPT",
"hint_top_to_bottom": False,
"std_chars": ['Ⲟ', 'ⲟ'], # Ⲟ ⲟ
"base_ranges": [
(0x2C80, 0x2CFF), # Coptic
],
"non_base_ranges": [
(0x2CEF, 0x2CF1),
],
"blues": [
(['Ⲍ', 'Ⲏ', 'Ⲡ', 'Ⳟ', 'Ⲟ', 'Ⲑ', 'Ⲥ', 'Ⳋ'], "LATIN_TOP"),
(['Ⳑ', 'Ⳙ', 'Ⳟ', 'Ⲏ', 'Ⲟ', 'Ⲑ', 'Ⳝ', 'Ⲱ'], "0"),
(['ⲍ', 'ⲏ', 'ⲡ', 'ⳟ', 'ⲟ', 'ⲑ', 'ⲥ', 'ⳋ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ⳑ', 'ⳙ', 'ⳟ', 'ⲏ', 'ⲟ', 'ⲑ', 'ⳝ', 'Ⳓ'], "0"),
],
},
{
"name": "Cypriot",
"tag": "CPRT",
"hint_top_to_bottom": False,
"std_chars": ['𐠅', '𐠣'], # 𐠅 𐠣
"base_ranges": [
(0x10800, 0x1083F), # Cypriot
],
"non_base_ranges": [
],
"blues": [
(['𐠍', '𐠙', '𐠳', '𐠱', '𐠅', '𐠓', '𐠣', '𐠦'], "LATIN_TOP"),
(['𐠃', '𐠊', '𐠛', '𐠣', '𐠳', '𐠵', '𐠐'], "0"),
(['𐠈', '𐠏', '𐠖'], "LATIN_TOP"),
(['𐠈', '𐠏', '𐠖'], "0"),
],
},
{
"name": "Cyrillic",
"tag": "CYRL",
"hint_top_to_bottom": False,
"std_chars": ['о', 'О'], # о О
"base_ranges": [
(0x0400, 0x04FF), # Cyrillic
(0x0500, 0x052F), # Cyrillic Supplement
(0x2DE0, 0x2DFF), # Cyrillic Extended-A
(0xA640, 0xA69F), # Cyrillic Extended-B
(0x1C80, 0x1C8F), # Cyrillic Extended-C
],
"non_base_ranges": [
(0x0483, 0x0489),
(0x2DE0, 0x2DFF),
(0xA66F, 0xA67F),
(0xA69E, 0xA69F),
],
"blues": [
(['Б', 'В', 'Е', 'П', 'З', 'О', 'С', 'Э'], "LATIN_TOP"),
(['Б', 'В', 'Е', 'Ш', 'З', 'О', 'С', 'Э'], "0"),
(['х', 'п', 'н', 'ш', 'е', 'з', 'о', 'с'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['х', 'п', 'н', 'ш', 'е', 'з', 'о', 'с'], "0"),
(['р', 'у', 'ф'], "0"),
],
},
{
"name": "Devanagari",
"tag": "DEVA",
"hint_top_to_bottom": True,
"std_chars": ['ठ', 'व', 'ट'], # ठ व ट
"base_ranges": [
(0x0900, 0x093B), # Devanagari
(0x093D, 0x0950), # ... continued
(0x0953, 0x0963), # ... continued
(0x0966, 0x097F), # ... continued
(0x20B9, 0x20B9), # (new) Rupee sign
(0xA8E0, 0xA8FF), # Devanagari Extended
],
"non_base_ranges": [
(0x0900, 0x0902),
(0x093A, 0x093A),
(0x0941, 0x0948),
(0x094D, 0x094D),
(0x0953, 0x0957),
(0x0962, 0x0963),
(0xA8E0, 0xA8F1),
(0xA8FF, 0xA8FF),
],
"blues": [
(['ई', 'ऐ', 'ओ', 'औ', 'ि', 'ी', 'ो', 'ौ'], "LATIN_TOP"),
(['क', 'म', 'अ', 'आ', 'थ', 'ध', 'भ', 'श'], "LATIN_TOP"),
(['क', 'न', 'म', 'उ', 'छ', 'ट', 'ठ', 'ड'], "LATIN_TOP | LATIN_NEUTRAL | LATIN_X_HEIGHT"),
(['क', 'न', 'म', 'उ', 'छ', 'ट', 'ठ', 'ड'], "0"),
(['ु', 'ृ'], "0"),
],
},
{
"name": "Deseret",
"tag": "DSRT",
"hint_top_to_bottom": False,
"std_chars": ['𐐄', '𐐬'], # 𐐄 𐐬
"base_ranges": [
(0x10400, 0x1044F), # Deseret
],
"non_base_ranges": [
],
"blues": [
(['𐐂', '𐐄', '𐐋', '𐐗', '𐐑'], "LATIN_TOP"),
(['𐐀', '𐐂', '𐐄', '𐐗', '𐐛'], "0"),
(['𐐪', '𐐬', '𐐳', '𐐿', '𐐹'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['𐐨', '𐐪', '𐐬', '𐐿', '𐑃'], "0"),
],
},
{
"name": "Ethiopic",
"tag": "ETHI",
"hint_top_to_bottom": False,
"std_chars": ['ዐ'], # ዐ
"base_ranges": [
(0x1200, 0x137F), # Ethiopic
(0x1380, 0x139F), # Ethiopic Supplement
(0x2D80, 0x2DDF), # Ethiopic Extended
(0xAB00, 0xAB2F), # Ethiopic Extended-A
],
"non_base_ranges": [
(0x135D, 0x135F),
],
"blues": [
(['ሀ', 'ሃ', 'ዘ', 'ፐ', 'ማ', 'በ', 'ዋ', 'ዐ'], "LATIN_TOP"),
(['ለ', 'ሐ', 'በ', 'ዘ', 'ሀ', 'ሪ', 'ዐ', 'ጨ'], "0"),
],
},
{
"name": "Georgian (Mkhedruli)",
"tag": "GEOR",
"hint_top_to_bottom": False,
"std_chars": ['ი', 'ე', 'ა', 'Ჿ'], # ი ე ა Ი
"base_ranges": [
(0x10D0, 0x10FF), # Georgian (Mkhedruli)
(0x1C90, 0x1CBF), # Georgian Extended (Mtavruli)
],
"non_base_ranges": [
],
"blues": [
(['გ', 'დ', 'ე', 'ვ', 'თ', 'ი', 'ო', 'ღ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ა', 'ზ', 'მ', 'ს', 'შ', 'ძ', 'ხ', 'პ'], "0"),
(['ს', 'ხ', 'ქ', 'ზ', 'მ', 'შ', 'ჩ', 'წ'], "LATIN_TOP"),
(['ე', 'ვ', 'ჟ', 'ტ', 'უ', 'ფ', 'ქ', 'ყ'], "0"),
(['Ნ', 'Ჟ', 'Ჳ', 'Ჸ', 'Გ', 'Ე', 'Ო', 'Ჴ'], "LATIN_TOP"),
(['Ი', 'Ჲ', 'Ო', 'Ჩ', 'Მ', 'Შ', 'Ჯ', 'Ჽ'], "0"),
],
},
{
"name": "Georgian (Khutsuri)",
"tag": "GEOK",
"hint_top_to_bottom": False,
"std_chars": ['Ⴖ', 'Ⴑ', 'ⴙ'], # Ⴖ Ⴑ ⴙ
"base_ranges": [
(0x10A0, 0x10CD), # Georgian (Asomtavruli)
(0x2D00, 0x2D2D), # Georgian Supplement (Nuskhuri)
],
"non_base_ranges": [
],
"blues": [
(['Ⴑ', 'Ⴇ', 'Ⴙ', 'Ⴜ', 'Ⴄ', 'Ⴅ', 'Ⴓ', 'Ⴚ'], "LATIN_TOP"),
(['Ⴄ', 'Ⴅ', 'Ⴇ', 'Ⴈ', 'Ⴆ', 'Ⴑ', 'Ⴊ', 'Ⴋ'], "0"),
(['ⴁ', 'ⴗ', 'ⴂ', 'ⴄ', 'ⴅ', 'ⴇ', 'ⴔ', 'ⴖ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ⴈ', 'ⴌ', 'ⴖ', 'ⴎ', 'ⴃ', 'ⴆ', 'ⴋ', 'ⴢ'], "0"),
(['ⴐ', 'ⴑ', 'ⴓ', 'ⴕ', 'ⴙ', 'ⴛ', 'ⴡ', 'ⴣ'], "LATIN_TOP"),
(['ⴄ', 'ⴅ', 'ⴔ', 'ⴕ', 'ⴁ', 'ⴂ', 'ⴘ', 'ⴝ'], "0"),
],
},
{
"name": "Glagolitic",
"tag": "GLAG",
"hint_top_to_bottom": False,
"std_chars": ['Ⱅ', 'ⱅ'], # Ⱅ ⱅ
"base_ranges": [
(0x2C00, 0x2C5F), # Glagolitic
(0x1E000, 0x1E02F), # Glagolitic Supplement
],
"non_base_ranges": [
(0x1E000, 0x1E02F),
],
"blues": [
(['Ⰵ', 'Ⱄ', 'Ⱚ', 'Ⰴ', 'Ⰲ', 'Ⰺ', 'Ⱛ', 'Ⰻ'], "LATIN_TOP"),
(['Ⰵ', 'Ⰴ', 'Ⰲ', 'Ⱚ', 'Ⱎ', 'Ⱑ', 'Ⰺ', 'Ⱄ'], "0"),
(['ⰵ', 'ⱄ', 'ⱚ', 'ⰴ', 'ⰲ', 'ⰺ', 'ⱛ', 'ⰻ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ⰵ', 'ⰴ', 'ⰲ', 'ⱚ', 'ⱎ', 'ⱑ', 'ⰺ', 'ⱄ'], "0"),
],
},
{
"name": "Gothic",
"tag": "GOTH",
"hint_top_to_bottom": True,
"std_chars": ['𐌴', '𐌾', '𐍃'], # 𐌴 𐌾 𐍃
"base_ranges": [
(0x10330, 0x1034F), # Gothic
],
"non_base_ranges": [
],
"blues": [
(['𐌲', '𐌶', '𐍀', '𐍄', '𐌴', '𐍃', '𐍈', '𐌾'], "LATIN_TOP"),
(['𐌶', '𐌴', '𐍃', '𐍈'], "0"),
],
},
{
"name": "Greek",
"tag": "GREK",
"hint_top_to_bottom": False,
"std_chars": ['ο', 'Ο'], # ο Ο
"base_ranges": [
(0x0370, 0x03FF), # Greek and Coptic
(0x1F00, 0x1FFF), # Greek Extended
],
"non_base_ranges": [
(0x037A, 0x037A),
(0x0384, 0x0385),
(0x1FBD, 0x1FC1),
(0x1FCD, 0x1FCF),
(0x1FDD, 0x1FDF),
(0x1FED, 0x1FEF),
(0x1FFD, 0x1FFE),
],
"blues": [
(['Γ', 'Β', 'Ε', 'Ζ', 'Θ', 'Ο', 'Ω'], "LATIN_TOP"),
(['Β', 'Δ', 'Ζ', 'Ξ', 'Θ', 'Ο'], "0"),
(['β', 'θ', 'δ', 'ζ', 'λ', 'ξ'], "LATIN_TOP"),
(['α', 'ε', 'ι', 'ο', 'π', 'σ', 'τ', 'ω'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['α', 'ε', 'ι', 'ο', 'π', 'σ', 'τ', 'ω'], "0"),
(['β', 'γ', 'η', 'μ', 'ρ', 'φ', 'χ', 'ψ'], "0"),
],
},
{
"name": "Gujarati",
"tag": "GUJR",
"hint_top_to_bottom": False,
"std_chars": ['ટ', '૦'], # ટ ૦
"base_ranges": [
(0x0A80, 0x0AFF), # Gujarati
],
"non_base_ranges": [
(0x0A81, 0x0A82),
(0x0ABC, 0x0ABC),
(0x0AC1, 0x0AC8),
(0x0ACD, 0x0ACD),
(0x0AE2, 0x0AE3),
(0x0AFA, 0x0AFF),
],
"blues": [
(['ત', 'ન', 'ઋ', 'ઌ', 'છ', 'ટ', 'ર', '૦'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ખ', 'ગ', 'ઘ', 'ઞ', 'ઇ', 'ઈ', 'ઠ', 'જ'], "0"),
(['ઈ', 'ઊ', 'િ', 'ી', 'લ', 'શ', 'જ', 'સ'], "LATIN_TOP"),
(['ુ', 'ૃ', 'ૄ', 'ખ', 'છ', 'છ'], "0"),
(['૦', '૧', '૨', '૩', '૭'], "LATIN_TOP"),
],
},
{
"name": "Gurmukhi",
"tag": "GURU",
"hint_top_to_bottom": True,
"std_chars": ['ਠ', 'ਰ', '੦'], # ਠ ਰ ੦
"base_ranges": [
(0x0A00, 0x0A7F), # Gurmukhi
],
"non_base_ranges": [
(0x0A01, 0x0A02),
(0x0A3C, 0x0A3C),
(0x0A41, 0x0A51),
(0x0A70, 0x0A71),
(0x0A75, 0x0A75),
],
"blues": [
(['ਇ', 'ਈ', 'ਉ', 'ਏ', 'ਓ', 'ੳ', 'ਿ', 'ੀ'], "LATIN_TOP"),
(['ਕ', 'ਗ', 'ਙ', 'ਚ', 'ਜ', 'ਤ', 'ਧ', 'ਸ'], "LATIN_TOP"),
(['ਕ', 'ਗ', 'ਙ', 'ਚ', 'ਜ', 'ਤ', 'ਧ', 'ਸ'], "LATIN_TOP | LATIN_NEUTRAL | LATIN_X_HEIGHT"),
(['ਅ', 'ਏ', 'ਓ', 'ਗ', 'ਜ', 'ਠ', 'ਰ', 'ਸ'], "0"),
(['੦', '੧', '੨', '੩', '੭'], "LATIN_TOP"),
],
},
{
"name": "Hebrew",
"tag": "HEBR",
"hint_top_to_bottom": False,
"std_chars": ['ם'], # ם
"base_ranges": [
(0x0590, 0x05FF), # Hebrew
(0xFB1D, 0xFB4F), # Alphab. Present. Forms (Hebrew)
],
"non_base_ranges": [
(0x0591, 0x05BF),
(0x05C1, 0x05C2),
(0x05C4, 0x05C5),
(0x05C7, 0x05C7),
(0xFB1E, 0xFB1E),
],
"blues": [
(['ב', 'ד', 'ה', 'ח', 'ך', 'כ', 'ם', 'ס'], "LATIN_TOP | LATIN_LONG"),
(['ב', 'ט', 'כ', 'ם', 'ס', 'צ'], "0"),
(['ק', 'ך', 'ן', 'ף', 'ץ'], "0"),
],
},
{
"name": "Kayah Li",
"tag": "KALI",
"hint_top_to_bottom": False,
"std_chars": ['ꤍ', '꤀'], # ꤍ ꤀
"base_ranges": [
(0xA900, 0xA92F), # Kayah Li
],
"non_base_ranges": [
(0xA926, 0xA92D),
],
"blues": [
(['꤅', 'ꤏ', '꤁', 'ꤋ', '꤀', 'ꤍ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['꤈', 'ꤘ', '꤀', 'ꤍ', 'ꤢ'], "0"),
(['ꤖ', 'ꤡ'], "LATIN_TOP"),
(['ꤑ', 'ꤜ', 'ꤞ'], "0"),
(['ꤑ', 'ꤜ', 'ꤔ'], "0"),
],
},
{
"name": "Khmer",
"tag": "KHMR",
"hint_top_to_bottom": False,
"std_chars": ['០'], # ០
"base_ranges": [
(0x1780, 0x17FF), # Khmer
],
"non_base_ranges": [
(0x17B7, 0x17BD),
(0x17C6, 0x17C6),
(0x17C9, 0x17D3),
(0x17DD, 0x17DD),
],
"blues": [
(['ខ', 'ទ', 'ន', 'ឧ', 'ឩ', 'ា'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ក', 'ក', 'ក', 'ក'], "LATIN_SUB_TOP"),
(['ខ', 'ឃ', 'ច', 'ឋ', 'ប', 'ម', 'យ', 'ឲ'], "0"),
(['ត', 'រ', 'ឲ', 'អ'], "0"),
(['ន', 'ង', 'ក', 'ច', 'ន', 'ល'], "0"),
],
},
{
"name": "Khmer Symbols",
"tag": "KHMS",
"hint_top_to_bottom": False,
"std_chars": ['᧡', '᧪'], # ᧡ ᧪
"base_ranges": [
(0x19E0, 0x19FF), # Khmer Symbols
],
"non_base_ranges": [
],
"blues": [
(['᧠', '᧡'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['᧶', '᧹'], "0"),
],
},
{
"name": "Kannada",
"tag": "KNDA",
"hint_top_to_bottom": False,
"std_chars": ['೦', 'ಬ'], # ೦ ಬ
"base_ranges": [
(0x0C80, 0x0CFF), # Kannada
],
"non_base_ranges": [
(0x0C81, 0x0C81),
(0x0CBC, 0x0CBC),
(0x0CBF, 0x0CBF),
(0x0CC6, 0x0CC6),
(0x0CCC, 0x0CCD),
(0x0CE2, 0x0CE3),
],
"blues": [
(['ಇ', 'ಊ', 'ಐ', 'ಣ', 'ಸ', 'ನ', 'ದ', 'ರ'], "LATIN_TOP"),
(['ಅ', 'ಉ', 'ಎ', 'ಲ', '೦', '೨', '೬', '೭'], "0"),
],
},
{
"name": "Lao",
"tag": "LAOO",
"hint_top_to_bottom": False,
"std_chars": ['໐'], # ໐
"base_ranges": [
(0x0E80, 0x0EFF), # Lao
],
"non_base_ranges": [
(0x0EB1, 0x0EB1),
(0x0EB4, 0x0EBC),
(0x0EC8, 0x0ECD),
],
"blues": [
(['າ', 'ດ', 'ອ', 'ມ', 'ລ', 'ວ', 'ຣ', 'ງ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['າ', 'ອ', 'ບ', 'ຍ', 'ຣ', 'ຮ', 'ວ', 'ຢ'], "0"),
(['ປ', 'ຢ', 'ຟ', 'ຝ'], "LATIN_TOP"),
(['ໂ', 'ໄ', 'ໃ'], "LATIN_TOP"),
(['ງ', 'ຊ', 'ຖ', 'ຽ', 'ໆ', 'ຯ'], "0"),
],
},
{
"name": "Latin",
"tag": "LATN",
"hint_top_to_bottom": False,
"std_chars": ['o', 'O', '0'],
"base_ranges": [
(0x0020, 0x007F), # Basic Latin (no control chars)
(0x00A0, 0x00A9), # Latin-1 Supplement (no control chars)
(0x00AB, 0x00B1), # ... continued
(0x00B4, 0x00B8), # ... continued
(0x00BB, 0x00FF), # ... continued
(0x0100, 0x017F), # Latin Extended-A
(0x0180, 0x024F), # Latin Extended-B
(0x0250, 0x02AF), # IPA Extensions
(0x02B9, 0x02DF), # Spacing Modifier Letters
(0x02E5, 0x02FF), # ... continued
(0x0300, 0x036F), # Combining Diacritical Marks
(0x1AB0, 0x1ABE), # Combining Diacritical Marks Extended
(0x1D00, 0x1D2B), # Phonetic Extensions
(0x1D6B, 0x1D77), # ... continued
(0x1D79, 0x1D7F), # ... continued
(0x1D80, 0x1D9A), # Phonetic Extensions Supplement
(0x1DC0, 0x1DFF), # Combining Diacritical Marks Supplement
(0x1E00, 0x1EFF), # Latin Extended Additional
(0x2000, 0x206F), # General Punctuation
(0x20A0, 0x20B8), # Currency Symbols ...
(0x20BA, 0x20CF), # ... except new Rupee sign
(0x2150, 0x218F), # Number Forms
(0x2C60, 0x2C7B), # Latin Extended-C
(0x2C7E, 0x2C7F), # ... continued
(0x2E00, 0x2E7F), # Supplemental Punctuation
(0xA720, 0xA76F), # Latin Extended-D
(0xA771, 0xA7F7), # ... continued
(0xA7FA, 0xA7FF), # ... continued
(0xAB30, 0xAB5B), # Latin Extended-E
(0xAB60, 0xAB6F), # ... continued
(0xFB00, 0xFB06), # Alphab. Present. Forms (Latin Ligs)
(0x1D400, 0x1D7FF), # Mathematical Alphanumeric Symbols
],
"non_base_ranges": [
(0x005E, 0x0060),
(0x007E, 0x007E),
(0x00A8, 0x00A9),
(0x00AE, 0x00B0),
(0x00B4, 0x00B4),
(0x00B8, 0x00B8),
(0x00BC, 0x00BE),
(0x02B9, 0x02DF),
(0x02E5, 0x02FF),
(0x0300, 0x036F),
(0x1AB0, 0x1ABE),
(0x1DC0, 0x1DFF),
(0x2017, 0x2017),
(0x203E, 0x203E),
(0xA788, 0xA788),
(0xA7F8, 0xA7FA),
],
"blues": [
(['T', 'H', 'E', 'Z', 'O', 'C', 'Q', 'S'], "LATIN_TOP"),
(['H', 'E', 'Z', 'L', 'O', 'C', 'U', 'S'], "0"),
(['f', 'i', 'j', 'k', 'd', 'b', 'h'], "LATIN_TOP"),
(['u', 'v', 'x', 'z', 'o', 'e', 's', 'c'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['n', 'r', 'x', 'z', 'o', 'e', 's', 'c'], "0"),
(['p', 'q', 'g', 'j', 'y'], "0"),
],
},
{
"name": "Latin Subscript Fallback",
"tag": "LATB",
"hint_top_to_bottom": False,
"std_chars": ['ₒ', '₀'], # ₒ ₀
"base_ranges": [
(0x1D62, 0x1D6A), # some small subscript letters
(0x2080, 0x209C), # subscript digits and letters
(0x2C7C, 0x2C7C), # latin subscript small letter j
],
"non_base_ranges": [
],
"blues": [
(['₀', '₃', '₅', '₇', '₈'], "LATIN_TOP"),
(['₀', '₁', '₂', '₃', '₈'], "0"),
(['ᵢ', 'ⱼ', 'ₕ', 'ₖ', 'ₗ'], "LATIN_TOP"),
(['ₐ', 'ₑ', 'ₒ', 'ₓ', 'ₙ', 'ₛ', 'ᵥ', 'ᵤ', 'ᵣ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ₐ', 'ₑ', 'ₒ', 'ₓ', 'ₙ', 'ₛ', 'ᵥ', 'ᵤ', 'ᵣ'], "0"),
(['ᵦ', 'ᵧ', 'ᵨ', 'ᵩ', 'ₚ'], "0"),
],
},
{
"name": "Latin Superscript Fallback",
"tag": "LATP",
"hint_top_to_bottom": False,
"std_chars": ['ᵒ', 'ᴼ', '⁰'], # ᵒ ᴼ ⁰
"base_ranges": [
(0x00AA, 0x00AA), # feminine ordinal indicator
(0x00B2, 0x00B3), # superscript two and three
(0x00B9, 0x00BA), # superscript one, masc. ord. indic.
(0x02B0, 0x02B8), # some latin superscript mod. letters
(0x02E0, 0x02E4), # some IPA modifier letters
(0x1D2C, 0x1D61), # latin superscript modifier letters
(0x1D78, 0x1D78), # modifier letter cyrillic en
(0x1D9B, 0x1DBF), # more modifier letters
(0x2070, 0x207F), # superscript digits and letters
(0x2C7D, 0x2C7D), # modifier letter capital v
(0xA770, 0xA770), # modifier letter us
(0xA7F8, 0xA7F9), # more modifier letters
(0xAB5C, 0xAB5F), # more modifier letters
],
"non_base_ranges": [
],
"blues": [
(['⁰', '³', '⁵', '⁷', 'ᵀ', 'ᴴ', 'ᴱ', 'ᴼ'], "LATIN_TOP"),
(['⁰', '¹', '²', '³', 'ᴱ', 'ᴸ', 'ᴼ', 'ᵁ'], "0"),
(['ᵇ', 'ᵈ', 'ᵏ', 'ʰ', 'ʲ', 'ᶠ', 'ⁱ'], "LATIN_TOP"),
(['ᵉ', 'ᵒ', 'ʳ', 'ˢ', 'ˣ', 'ᶜ', 'ᶻ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ᵉ', 'ᵒ', 'ʳ', 'ˢ', 'ˣ', 'ᶜ', 'ᶻ'], "0"),
(['ᵖ', 'ʸ', 'ᵍ'], "0"),
],
},
{
"name": "Lisu",
"tag": "LISU",
"hint_top_to_bottom": False,
"std_chars": ['ꓳ'], # ꓳ
"base_ranges": [
(0xA4D0, 0xA4FF), # Lisu
],
"non_base_ranges": [
],
"blues": [
(['ꓡ', 'ꓧ', 'ꓱ', 'ꓶ', 'ꓩ', 'ꓚ', 'ꓵ', 'ꓳ'], "LATIN_TOP"),
(['ꓕ', 'ꓜ', 'ꓞ', 'ꓡ', 'ꓛ', 'ꓢ', 'ꓳ', 'ꓴ'], "0"),
],
},
{
"name": "Malayalam",
"tag": "MLYM",
"hint_top_to_bottom": False,
"std_chars": ['ഠ', 'റ'], # ഠ റ
"base_ranges": [
(0x0D00, 0x0D7F), # Malayalam
],
"non_base_ranges": [
(0x0D00, 0x0D01),
(0x0D3B, 0x0D3C),
(0x0D4D, 0x0D4E),
(0x0D62, 0x0D63),
],
"blues": [
(['ഒ', 'ട', 'ഠ', 'റ', 'ച', 'പ', 'ച', 'പ'], "LATIN_TOP"),
(['ട', 'ഠ', 'ധ', 'ശ', 'ഘ', 'ച', 'ഥ', 'ല'], "0"),
],
},
{
"name": "Medefaidrin",
"tag": "MEDF",
"hint_top_to_bottom": False,
"std_chars": ['𖹡', '𖹛', '𖹯'], # 𖹡 𖹛 𖹯
"base_ranges": [
(0x16E40, 0x16E9F), # Medefaidrin
],
"non_base_ranges": [
],
"blues": [
(['𖹀', '𖹁', '𖹂', '𖹃', '𖹏', '𖹚', '𖹟'], "LATIN_TOP"),
(['𖹀', '𖹁', '𖹂', '𖹃', '𖹏', '𖹚', '𖹒', '𖹓'], "0"),
(['𖹤', '𖹬', '𖹧', '𖹴', '𖹶', '𖹾'], "LATIN_TOP"),
(['𖹠', '𖹡', '𖹢', '𖹹', '𖹳', '𖹮'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['𖹠', '𖹡', '𖹢', '𖹳', '𖹭', '𖹽'], "0"),
(['𖹥', '𖹨', '𖹩'], "0"),
(['𖺀', '𖺅', '𖺈', '𖺄', '𖺍'], "LATIN_TOP"),
],
},
{
"name": "Mongolian",
"tag": "MONG",
"hint_top_to_bottom": True,
"std_chars": ['ᡂ', 'ᠪ'], # ᡂ ᠪ
"base_ranges": [
(0x1800, 0x18AF), # Mongolian
(0x11660, 0x1167F), # Mongolian Supplement
],
"non_base_ranges": [
(0x1885, 0x1886),
(0x18A9, 0x18A9),
],
"blues": [
(['ᠳ', 'ᠴ', 'ᠶ', 'ᠽ', 'ᡂ', 'ᡊ', '', ''], "LATIN_TOP"),
(['ᡃ'], "0"),
],
},
{
"name": "Myanmar",
"tag": "MYMR",
"hint_top_to_bottom": False,
"std_chars": ['ဝ', 'င', 'ဂ'], # ဝ င ဂ
"base_ranges": [
(0x1000, 0x109F), # Myanmar
(0xA9E0, 0xA9FF), # Myanmar Extended-B
(0xAA60, 0xAA7F), # Myanmar Extended-A
],
"non_base_ranges": [
(0x102D, 0x1030),
(0x1032, 0x1037),
(0x103A, 0x103A),
(0x103D, 0x103E),
(0x1058, 0x1059),
(0x105E, 0x1060),
(0x1071, 0x1074),
(0x1082, 0x1082),
(0x1085, 0x1086),
(0x108D, 0x108D),
(0xA9E5, 0xA9E5),
(0xAA7C, 0xAA7C),
],
"blues": [
(['ခ', 'ဂ', 'င', 'ဒ', 'ဝ', 'ၥ', '၊', '။'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['င', 'ဎ', 'ဒ', 'ပ', 'ဗ', 'ဝ', '၊', '။'], "0"),
(['ဩ', 'ြ', '၍', '၏', '၆', 'ါ', 'ိ'], "LATIN_TOP"),
(['ဉ', 'ည', 'ဥ', 'ဩ', 'ဨ', '၂', '၅', '၉'], "0"),
],
},
{
"name": "N'Ko",
"tag": "NKOO",
"hint_top_to_bottom": False,
"std_chars": ['ߋ', '߀'], # ߋ ߀
"base_ranges": [
(0x07C0, 0x07FF), # N'Ko
],
"non_base_ranges": [
(0x07EB, 0x07F5),
(0x07FD, 0x07FD),
],
"blues": [
(['ߐ', '߉', 'ߒ', 'ߟ', 'ߖ', 'ߜ', 'ߠ', 'ߥ'], "LATIN_TOP"),
(['߀', 'ߘ', 'ߡ', 'ߠ', 'ߥ'], "0"),
(['ߏ', 'ߛ', 'ߋ'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['ߎ', 'ߏ', 'ߛ', 'ߋ'], "0"),
],
},
{
"name": "no script",
"tag": "NONE",
"hint_top_to_bottom": False,
"std_chars": [],
"base_ranges": [
],
"non_base_ranges": [
],
"blues": [
],
},
{
"name": "Ol Chiki",
"tag": "OLCK",
"hint_top_to_bottom": False,
"std_chars": ['ᱛ'], # ᱛ
"base_ranges": [
(0x1C50, 0x1C7F), # Ol Chiki
],
"non_base_ranges": [
],
"blues": [
(['ᱛ', 'ᱜ', 'ᱝ', 'ᱡ', 'ᱢ', 'ᱥ'], "LATIN_TOP"),
(['ᱛ', 'ᱜ', 'ᱝ', 'ᱡ', 'ᱢ', 'ᱥ'], "0"),
],
},
{
"name": "Old Turkic",
"tag": "ORKH",
"hint_top_to_bottom": False,
"std_chars": ['𐰗'], # 𐰗
"base_ranges": [
(0x10C00, 0x10C4F), # Old Turkic
],
"non_base_ranges": [
],
"blues": [
(['𐰗', '𐰘', '𐰧'], "LATIN_TOP"),
(['𐰉', '𐰗', '𐰦', '𐰧'], "0"),
],
},
{
"name": "Osage",
"tag": "OSGE",
"hint_top_to_bottom": False,
"std_chars": ['𐓂', '𐓪'], # 𐓂 𐓪
"base_ranges": [
(0x104B0, 0x104FF), # Osage
],
"non_base_ranges": [
],
"blues": [
(['𐒾', '𐓍', '𐓒', '𐓓', '𐒻', '𐓂', '𐒵', '𐓆'], "LATIN_TOP"),
(['𐒰', '𐓍', '𐓂', '𐒿', '𐓎', '𐒹'], "0"),
(['𐒼', '𐒽', '𐒾'], "0"),
(['𐓵', '𐓶', '𐓺', '𐓻', '𐓝', '𐓣', '𐓪', '𐓮'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['𐓘', '𐓚', '𐓣', '𐓵', '𐓡', '𐓧', '𐓪', '𐓶'], "0"),
(['𐓤', '𐓦', '𐓸', '𐓹', '𐓛'], "LATIN_TOP"),
(['𐓤', '𐓥', '𐓦'], "0"),
],
},
{
"name": "Osmanya",
"tag": "OSMA",
"hint_top_to_bottom": False,
"std_chars": ['𐒆', '𐒠'], # 𐒆 𐒠
"base_ranges": [
(0x10480, 0x104AF), # Osmanya
],
"non_base_ranges": [
],
"blues": [
(['𐒆', '𐒉', '𐒐', '𐒒', '𐒘', '𐒛', '𐒠', '𐒣'], "LATIN_TOP"),
(['𐒀', '𐒂', '𐒆', '𐒈', '𐒊', '𐒒', '𐒠', '𐒩'], "0"),
],
},
{
"name": "Hanifi Rohingya",
"tag": "ROHG",
"hint_top_to_bottom": False,
"std_chars": ['𐴰'], # 𐴰
"base_ranges": [
(0x10D00, 0x10D3F), # Hanifi Rohingya
],
"non_base_ranges": [
],
"blues": [
(['𐴃', '𐴀', '𐴆', '𐴖', '𐴕'], "LATIN_TOP"),
(['𐴔', '𐴖', '𐴕', '𐴑', '𐴐'], "0"),
(['ـ'], "LATIN_NEUTRAL"),
],
},
{
"name": "Saurashtra",
"tag": "SAUR",
"hint_top_to_bottom": False,
"std_chars": ['ꢝ', '꣐'], # ꢝ ꣐
"base_ranges": [
(0xA880, 0xA8DF), # Saurashtra
],
"non_base_ranges": [
(0xA880, 0xA881),
(0xA8B4, 0xA8C5),
],
"blues": [
(['ꢜ', 'ꢞ', 'ꢳ', 'ꢂ', 'ꢖ', 'ꢒ', 'ꢝ', 'ꢛ'], "LATIN_TOP"),
(['ꢂ', 'ꢨ', 'ꢺ', 'ꢤ', 'ꢎ'], "0"),
],
},
{
"name": "Shavian",
"tag": "SHAW",
"hint_top_to_bottom": False,
"std_chars": ['𐑴'], # 𐑴
"base_ranges": [
(0x10450, 0x1047F), # Shavian
],
"non_base_ranges": [
],
"blues": [
(['𐑕', '𐑙'], "LATIN_TOP"),
(['𐑔', '𐑖', '𐑗', '𐑹', '𐑻'], "0"),
(['𐑟', '𐑣'], "0"),
(['𐑱', '𐑲', '𐑳', '𐑴', '𐑸', '𐑺', '𐑼'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['𐑴', '𐑻', '𐑹'], "0"),
],
},
{
"name": "Sinhala",
"tag": "SINH",
"hint_top_to_bottom": False,
"std_chars": ['ට'], # ට
"base_ranges": [
(0x0D80, 0x0DFF), # Sinhala
],
"non_base_ranges": [
(0x0DCA, 0x0DCA),
(0x0DD2, 0x0DD6),
],
"blues": [
(['ඉ', 'ක', 'ඝ', 'ඳ', 'ප', 'ය', 'ල', 'ෆ'], "LATIN_TOP"),
(['එ', 'ඔ', 'ඝ', 'ජ', 'ට', 'ථ', 'ධ', 'ර'], "0"),
(['ද', 'ඳ', 'උ', 'ල', 'ත', 'ත', 'බ', 'ද'], "0"),
],
},
{
"name": "Sundanese",
"tag": "SUND",
"hint_top_to_bottom": False,
"std_chars": ['᮰'], # ᮰
"base_ranges": [
(0x1B80, 0x1BBF), # Sundanese
(0x1CC0, 0x1CCF), # Sundanese Supplement
],
"non_base_ranges": [
(0x1B80, 0x1B82),
(0x1BA1, 0x1BAD),
],
"blues": [
(['ᮋ', 'ᮞ', 'ᮮ', 'ᮽ', '᮰', 'ᮈ'], "LATIN_TOP"),
(['ᮄ', 'ᮔ', 'ᮕ', 'ᮗ', '᮰', 'ᮆ', 'ᮈ', 'ᮉ'], "0"),
(['ᮼ', '᳄'], "0"),
],
},
{
"name": "Tamil",
"tag": "TAML",
"hint_top_to_bottom": False,
"std_chars": ['௦'], # ௦
"base_ranges": [
(0x0B80, 0x0BFF), # Tamil
],
"non_base_ranges": [
(0x0B82, 0x0B82),
(0x0BC0, 0x0BC2),
(0x0BCD, 0x0BCD),
],
"blues": [
(['உ', 'ஒ', 'ஓ', 'ற', 'ஈ', 'க', 'ங', 'ச'], "LATIN_TOP"),
(['க', 'ச', 'ல', 'ஶ', 'உ', 'ங', 'ட', 'ப'], "0"),
],
},
{
"name": "Tai Viet",
"tag": "TAVT",
"hint_top_to_bottom": False,
"std_chars": ['ꪒ', 'ꪫ'], # ꪒ ꪫ
"base_ranges": [
(0xAA80, 0xAADF), # Tai Viet
],
"non_base_ranges": [
(0xAAB0, 0xAAB0),
(0xAAB2, 0xAAB4),
(0xAAB7, 0xAAB8),
(0xAABE, 0xAABF),
(0xAAC1, 0xAAC1),
],
"blues": [
(['ꪆ', 'ꪔ', 'ꪒ', 'ꪖ', 'ꪫ'], "LATIN_TOP"),
(['ꪉ', 'ꪫ', 'ꪮ'], "0"),
],
},
{
"name": "Telugu",
"tag": "TELU",
"hint_top_to_bottom": False,
"std_chars": ['౦', '౧'], # ౦ ౧
"base_ranges": [
(0x0C00, 0x0C7F), # Telugu
],
"non_base_ranges": [
(0x0C00, 0x0C00),
(0x0C04, 0x0C04),
(0x0C3E, 0x0C40),
(0x0C46, 0x0C56),
(0x0C62, 0x0C63),
],
"blues": [
(['ఇ', 'ఌ', 'ఙ', 'ఞ', 'ణ', 'ఱ', '౯'], "LATIN_TOP"),
(['అ', 'క', 'చ', 'ర', 'ఽ', '౨', '౬'], "0"),
],
},
{
"name": "Tifinagh",
"tag": "TFNG",
"hint_top_to_bottom": False,
"std_chars": ['ⵔ'], # ⵔ
"base_ranges": [
(0x2D30, 0x2D7F), # Tifinagh
],
"non_base_ranges": [
],
"blues": [
(['ⵔ', 'ⵙ', 'ⵛ', 'ⵞ', 'ⴵ', 'ⴼ', 'ⴹ', 'ⵎ'], "LATIN_TOP"),
(['ⵔ', 'ⵙ', 'ⵛ', 'ⵞ', 'ⴵ', 'ⴼ', 'ⴹ', 'ⵎ'], "0"),
],
},
{
"name": "Thai",
"tag": "THAI",
"hint_top_to_bottom": False,
"std_chars": ['า', 'ๅ', '๐'], # า ๅ ๐
"base_ranges": [
(0x0E00, 0x0E7F), # Thai
],
"non_base_ranges": [
(0x0E31, 0x0E31),
(0x0E34, 0x0E3A),
(0x0E47, 0x0E4E),
],
"blues": [
(['บ', 'เ', 'แ', 'อ', 'ก', 'า'], "LATIN_TOP | LATIN_X_HEIGHT"),
(['บ', 'ป', 'ษ', 'ฯ', 'อ', 'ย', 'ฮ'], "0"),
(['ป', 'ฝ', 'ฟ'], "LATIN_TOP"),
(['โ', 'ใ', 'ไ'], "LATIN_TOP"),
(['ฎ', 'ฏ', 'ฤ', 'ฦ'], "0"),
(['ญ', 'ฐ'], "0"),
(['๐', '๑', '๓'], "0"),
],
},
{
"name": "Vai",
"tag": "VAII",
"hint_top_to_bottom": False,
"std_chars": ['ꘓ', 'ꖜ', 'ꖴ'], # ꘓ ꖜ ꖴ
"base_ranges": [
(0xA500, 0xA63F), # Vai
],
"non_base_ranges": [
],
"blues": [
(['ꗍ', 'ꘖ', 'ꘙ', 'ꘜ', 'ꖜ', 'ꖝ', 'ꔅ', 'ꕢ'], "LATIN_TOP"),
(['ꗍ', 'ꘖ', 'ꘙ', 'ꗞ', 'ꔅ', 'ꕢ', 'ꖜ', 'ꔆ'], "0"),
],
},
{
"name": "Limbu",
"tag": "LIMB",
"hint_top_to_bottom": False,
"std_chars": ['o'], # XXX
"base_ranges": [
(0x1900, 0x194F), # Limbu
],
"non_base_ranges": [
(0x1920, 0x1922),
(0x1927, 0x1934),
(0x1937, 0x193B),
],
"blues": [],
},
{
"name": "Oriya",
"tag": "ORYA",
"hint_top_to_bottom": False,
"std_chars": ['o'], # XXX
"base_ranges": [
(0x0B00, 0x0B7F), # Oriya
],
"non_base_ranges": [
(0x0B01, 0x0B02),
(0x0B3C, 0x0B3C),
(0x0B3F, 0x0B3F),
(0x0B41, 0x0B44),
(0x0B4D, 0x0B56),
(0x0B62, 0x0B63),
],
"blues": [],
},
{
"name": "Syloti Nagri",
"tag": "SYLO",
"hint_top_to_bottom": False,
"std_chars": ['o'], # XXX
"base_ranges": [
(0xA800, 0xA82F), # Syloti Nagri
],
"non_base_ranges": [
(0xA802, 0xA802),
(0xA806, 0xA806),
(0xA80B, 0xA80B),
(0xA825, 0xA826),
],
"blues": [],
},
{
"name": "Tibetan",
"tag": "TIBT",
"hint_top_to_bottom": False,
"std_chars": ['o'], # XXX
"base_ranges": [
(0x0F00, 0x0FFF), # Tibetan
],
"non_base_ranges": [
(0x0F18, 0x0F19),
(0x0F35, 0x0F35),
(0x0F37, 0x0F37),
(0x0F39, 0x0F39),
(0x0F3E, 0x0F3F),
(0x0F71, 0x0F7E),
(0x0F80, 0x0F84),
(0x0F86, 0x0F87),
(0x0F8D, 0x0FBC),
],
"blues": [],
},
{
"name": "CJKV ideographs",
"tag": "HANI",
"hint_top_to_bottom": False,
"std_chars": ['田', '囗'], # 田 囗
"base_ranges": [
(0x1100, 0x11FF), # Hangul Jamo
(0x2E80, 0x2EFF), # CJK Radicals Supplement
(0x2F00, 0x2FDF), # Kangxi Radicals
(0x2FF0, 0x2FFF), # Ideographic Description Characters
(0x3000, 0x303F), # CJK Symbols and Punctuation
(0x3040, 0x309F), # Hiragana
(0x30A0, 0x30FF), # Katakana
(0x3100, 0x312F), # Bopomofo
(0x3130, 0x318F), # Hangul Compatibility Jamo
(0x3190, 0x319F), # Kanbun
(0x31A0, 0x31BF), # Bopomofo Extended
(0x31C0, 0x31EF), # CJK Strokes
(0x31F0, 0x31FF), # Katakana Phonetic Extensions
(0x3300, 0x33FF), # CJK Compatibility
(0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
(0x4DC0, 0x4DFF), # Yijing Hexagram Symbols
(0x4E00, 0x9FFF), # CJK Unified Ideographs
(0xA960, 0xA97F), # Hangul Jamo Extended-A
(0xAC00, 0xD7AF), # Hangul Syllables
(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
(0xF900, 0xFAFF), # CJK Compatibility Ideographs
(0xFE10, 0xFE1F), # Vertical forms
(0xFE30, 0xFE4F), # CJK Compatibility Forms
(0xFF00, 0xFFEF), # Halfwidth and Fullwidth Forms
(0x1B000, 0x1B0FF), # Kana Supplement
(0x1B100, 0x1B12F), # Kana Extended-A
(0x1D300, 0x1D35F), # Tai Xuan Hing Symbols
(0x20000, 0x2A6DF), # CJK Unified Ideographs Extension B
(0x2A700, 0x2B73F), # CJK Unified Ideographs Extension C
(0x2B740, 0x2B81F), # CJK Unified Ideographs Extension D
(0x2B820, 0x2CEAF), # CJK Unified Ideographs Extension E
(0x2CEB0, 0x2EBEF), # CJK Unified Ideographs Extension F
(0x2F800, 0x2FA1F), # CJK Compatibility Ideographs Supplement
],
"non_base_ranges": [
(0x302A, 0x302F),
(0x3190, 0x319F),
],
"blues": [
(['他', '们', '你', '來', '們', '到', '和', '地', '对', '對', '就', '席', '我', '时', '時', '會', '来', '為', '能', '舰', '說', '说', '这', '這', '齊', '|', '军', '同', '已', '愿', '既', '星', '是', '景', '民', '照', '现', '現', '理', '用', '置', '要', '軍', '那', '配', '里', '開', '雷', '露', '面', '顾'], "CJK_TOP"),
(['个', '为', '人', '他', '以', '们', '你', '來', '個', '們', '到', '和', '大', '对', '對', '就', '我', '时', '時', '有', '来', '為', '要', '說', '说', '|', '主', '些', '因', '它', '想', '意', '理', '生', '當', '看', '着', '置', '者', '自', '著', '裡', '过', '还', '进', '進', '過', '道', '還', '里', '面'], "0"),
(['些', '们', '你', '來', '們', '到', '和', '地', '她', '将', '將', '就', '年', '得', '情', '最', '样', '樣', '理', '能', '說', '说', '这', '這', '通', '|', '即', '吗', '吧', '听', '呢', '品', '响', '嗎', '师', '師', '收', '断', '斷', '明', '眼', '間', '间', '际', '陈', '限', '除', '陳', '随', '際', '隨'], "CJK_HORIZ"),
(['事', '前', '學', '将', '將', '情', '想', '或', '政', '斯', '新', '样', '樣', '民', '沒', '没', '然', '特', '现', '現', '球', '第', '經', '谁', '起', '|', '例', '別', '别', '制', '动', '動', '吗', '嗎', '增', '指', '明', '朝', '期', '构', '物', '确', '种', '調', '调', '費', '费', '那', '都', '間', '间'], "CJK_HORIZ | CJK_RIGHT"),
],
},
]
def generate() -> str:
buf = ""
buf += "// THIS FILE IS AUTOGENERATED.\n"
buf += "// Any changes to this file will be overwritten.\n"
buf += "// Use ../scripts/gen_autohint_scripts.py to regenerate.\n\n"
char_map = {}
buf += "#[rustfmt::skip]\n"
buf += "pub(super) const SCRIPT_CLASSES: &[ScriptClass] = &[\n"
for i, script in enumerate(SCRIPT_CLASSES):
std_chars = script["std_chars"]
blues = script["blues"]
buf += " ScriptClass {\n"
buf += " name: \"{}\",\n".format(script["name"])
buf += " tag: Tag::new(b\"{}\"),\n".format(script["tag"])
buf += " index: {},\n".format(i)
buf += " hint_top_to_bottom: {},\n".format(str(script["hint_top_to_bottom"]).lower())
# standard characters
buf += " std_chars: &["
if len(std_chars) != 0:
for std_char in std_chars:
buf += "'{}', ".format(std_char)
buf += "],\n"
# blue characters
buf += " blues: &["
if len(blues) != 0:
buf += "\n";
for blue in blues:
buf += " (&["
for ch in blue[0]:
buf += "'{}', ".format(ch)
buf += "], {}),\n".format(blue[1])
buf += " ],\n"
else:
buf += "],\n"
buf += " },\n"
bases = set()
# build a char -> (script_ix, is_non_base) map for all ranges
for char_range in script["base_ranges"]:
first = char_range[0]
last = char_range[1]
# inclusive range
for ch in range(first, last + 1):
# Note: FT has overlapping ranges but we choose to keep
# the first one to match behavior
if not ch in char_map:
char_map[ch] = (i, False)
bases.add(ch)
for char_range in script["non_base_ranges"]:
first = char_range[0]
last = char_range[1]
# inclusive range
for ch in range(first, last + 1):
if ch in bases:
char_map[ch] = (i, True) # True for non-base character
buf += "];\n\n"
# Add some symbolic indices for each script so they can be
# referenced by ScriptClass::LATN for example
buf += "impl ScriptClass {\n"
for i, script in enumerate(SCRIPT_CLASSES):
buf += " pub const {}: usize = {};\n".format(script["tag"], i)
buf += "}\n\n"
# build a sorted list from the map
char_list = []
for ch in char_map:
char_list.append((ch, char_map[ch]))
char_list.sort(key=lambda entry: entry[0])
# and merge into ranges
ranges = []
for entry in char_list:
ch = entry[0]
props = entry[1]
if len(ranges) != 0:
last = ranges[-1];
# we can merge if same props and this character extends the range
# by 1
if ch == last[1] + 1 and last[2] == props:
ranges[-1] = (last[0], ch, props)
continue
ranges.append((ch, ch, props))
# and finally output the ranges
buf += "#[rustfmt::skip]\n"
buf += "pub(super) const SCRIPT_RANGES: &[ScriptRange] = &[\n"
for char_range in ranges:
first = char_range[0]
last = char_range[1]
props = char_range[2]
tag = SCRIPT_CLASSES[props[0]]["tag"]
kind = "base_range"
if props[1]:
kind = "non_base_range"
buf += " {}({}, {}, ScriptClass::{}),\n".format(kind, first, last, tag)
buf += "];\n\n"
return buf
if __name__ == "__main__":
data = generate()
with open("../generated/generated_autohint_scripts.rs", "w", encoding="utf-8") as f:
f.write(data)