gen_autohint_scripts.py | Explore in Territory

# Generates Rust tables that define Unicode "script classes" for the purposes
# of autohinting.
#
# For performance, we want to link various pieces of data by index. For ease of
# modification and to avoid errors, we want to define those links symbolically
# by name. Thus, this script exists which converts symbolic references to
# indices when generating code.
#
# The bottom of this file contains the Rust generation code.
#
# In relation to FreeType, this combines the AF_ScriptClass,
# AF_Script_UniRangeRec and AF_BlueStringset.
# Script definitions: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afscript.h
# Unicode ranges: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afranges.c
# Blues: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afblue.h
SCRIPT_CLASSES = [
    {
        "name": "Adlam",
        "tag": "ADLM",
        "hint_top_to_bottom": False,
        "std_chars": ['𞤌', '𞤮'], # 𞤌 𞤮
        "base_ranges": [
            (0x1E900, 0x1E95F), # Adlam
        ],
        "non_base_ranges": [
            (0x1D944, 0x1E94A),
        ],
        "blues": [
            (['𞤌', '𞤅', '𞤈', '𞤏', '𞤔', '𞤚'], "LATIN_TOP"),
            (['𞤂', '𞤖'], "0"),
            (['𞤬', '𞤮', '𞤻', '𞤼', '𞤾'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['𞤤', '𞤨', '𞤩', '𞤭', '𞤴', '𞤸', '𞤺', '𞥀'], "0"),
        ],
    },
    {
        "name": "Arabic",
        "tag": "ARAB",
        "hint_top_to_bottom": False,
        "std_chars": ['ل', 'ح', 'ـ'], # ل ح ـ
        "base_ranges": [
            (0x0600, 0x06FF), # Arabic
            (0x0750, 0x07FF), # Arabic Supplement
            (0x08A0, 0x08FF), # Arabic Extended-A
            (0xFB50, 0xFDFF), # Arabic Presentation Forms-A
            (0xFE70, 0xFEFF), # Arabic Presentation Forms-B
            (0x1EE00, 0x1EEFF), # Arabic Mathematical Alphabetic Symbols
        ],
        "non_base_ranges": [
            (0x0600, 0x0605),
            (0x0610, 0x061A),
            (0x064B, 0x065F),
            (0x0670, 0x0670),
            (0x06D6, 0x06DC),
            (0x06DF, 0x06E4),
            (0x06E7, 0x06E8),
            (0x06EA, 0x06ED),
            (0x08D4, 0x08E1),
            (0x08D3, 0x08FF),
            (0xFBB2, 0xFBC1),
            (0xFE70, 0xFE70),
            (0xFE72, 0xFE72),
            (0xFE74, 0xFE74),
            (0xFE76, 0xFE76),
            (0xFE78, 0xFE78),
            (0xFE7A, 0xFE7A),
            (0xFE7C, 0xFE7C),
            (0xFE7E, 0xFE7E),
        ],
        "blues": [
            (['ا', 'إ', 'ل', 'ك', 'ط', 'ظ'], "LATIN_TOP"),
            (['ت', 'ث', 'ط', 'ظ', 'ك'], "0"),
            (['ـ'], "LATIN_NEUTRAL"),
        ],
    },
    {
        "name": "Armenian",
        "tag": "ARMN",
        "hint_top_to_bottom": False,
        "std_chars": ['ս', 'Ս'], # ս Ս
        "base_ranges": [
            (0x0530, 0x058F), # Armenian
            (0xFB13, 0xFB17), # Alphab. Present. Forms (Armenian)
        ],
        "non_base_ranges": [
            (0x0559, 0x055F),
        ],
        "blues": [
            (['Ա', 'Մ', 'Ւ', 'Ս', 'Բ', 'Գ', 'Դ', 'Օ'], "LATIN_TOP"),
            (['Ւ', 'Ո', 'Դ', 'Ճ', 'Շ', 'Ս', 'Տ', 'Օ'], "0"),
            (['ե', 'է', 'ի', 'մ', 'վ', 'ֆ', 'ճ'], "LATIN_TOP"),
            (['ա', 'յ', 'ւ', 'ս', 'գ', 'շ', 'ր', 'օ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['հ', 'ո', 'ճ', 'ա', 'ե', 'ծ', 'ս', 'օ'], "0"),
            (['բ', 'ը', 'ի', 'լ', 'ղ', 'պ', 'փ', 'ց'], "0"),
        ],
    },
    {
        "name": "Avestan",
        "tag": "AVST",
        "hint_top_to_bottom": False,
        "std_chars": ['𐬚'], # 𐬚
        "base_ranges": [
            (0x10B00, 0x10B3F), # Avestan
        ],
        "non_base_ranges": [
            (0x10B39, 0x10B3F),
        ],
        "blues": [
            (['𐬀', '𐬁', '𐬐', '𐬛'], "LATIN_TOP"),
            (['𐬀', '𐬁'], "0"),
        ],
    },
    {
        "name": "Bamum",
        "tag": "BAMU",
        "hint_top_to_bottom": False,
        "std_chars": ['ꛁ', 'ꛯ'], # ꛁ ꛯ
        "base_ranges": [
            (0xA6A0, 0xA6FF), # Bamum
            # https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afranges.c#L139
            # "The characters in the Bamum supplement are pictograms, not (directly) related to the syllabic Bamum script"
            # (0x16800, 0x16A3F), # Bamum Supplement
        ],
        "non_base_ranges": [
            (0xA6F0, 0xA6F1),
        ],
        "blues": [
            (['ꚧ', 'ꚨ', 'ꛛ', 'ꛉ', 'ꛁ', 'ꛈ', 'ꛫ', 'ꛯ'], "LATIN_TOP"),
            (['ꚭ', 'ꚳ', 'ꚶ', 'ꛬ', 'ꚢ', 'ꚽ', 'ꛯ', '꛲'], "0"),
        ],
    },
    {
        "name": "Bengali",
        "tag": "BENG",
        "hint_top_to_bottom": True,
        "std_chars": ['০', '৪'], # ০ ৪
        "base_ranges": [
            (0x0980, 0x09FF), # Bengali
        ],
        "non_base_ranges": [
            (0x0981, 0x0981),
            (0x09BC, 0x09BC),
            (0x09C1, 0x09C4),
            (0x09CD, 0x09CD),
            (0x09E2, 0x09E3),
            (0x09FE, 0x09FE),
        ],
        "blues": [
            (['ই', 'ট', 'ঠ', 'ি', 'ী', 'ৈ', 'ৗ'], "LATIN_TOP"),
            (['ও', 'এ', 'ড', 'ত', 'ন', 'ব', 'ল', 'ক'], "LATIN_TOP"),
            (['অ', 'ড', 'ত', 'ন', 'ব', 'ভ', 'ল', 'ক'], "LATIN_TOP | LATIN_NEUTRAL | LATIN_X_HEIGHT"),
            (['অ', 'ড', 'ত', 'ন', 'ব', 'ভ', 'ল', 'ক'], "0"),
        ],
    },
    {
        "name": "Buhid",
        "tag": "BUHD",
        "hint_top_to_bottom": False,
        "std_chars": ['ᝋ', 'ᝏ'], # ᝋ ᝏ
        "base_ranges": [
            (0x1740, 0x175F), # Buhid
        ],
        "non_base_ranges": [
            (0x1752, 0x1753),
        ],
        "blues": [
            (['ᝐ', 'ᝈ'], "LATIN_TOP"),
            (['ᝅ', 'ᝊ', 'ᝎ'], "LATIN_TOP"),
            (['ᝂ', 'ᝃ', 'ᝉ', 'ᝌ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ᝀ', 'ᝃ', 'ᝆ', 'ᝉ', 'ᝋ', 'ᝏ', 'ᝑ'], "0"),
        ],
    },
    {
        "name": "Chakma",
        "tag": "CAKM",
        "hint_top_to_bottom": False,
        "std_chars": ['𑄤', '𑄉', '𑄛'], # 𑄤 𑄉 𑄛
        "base_ranges": [
            (0x11100, 0x1114F), # Chakma
        ],
        "non_base_ranges": [
            (0x11100, 0x11102),
            (0x11127, 0x11134),
            (0x11146, 0x11146),
        ],
        "blues": [
            (['𑄃', '𑄅', '𑄉', '𑄙', '𑄗'], "LATIN_TOP"),
            (['𑄅', '𑄛', '𑄝', '𑄗', '𑄓'], "0"),
            (['𑄖', '𑄘', '𑄙', '𑄤', '𑄥'], "0"),
        ],
    },
    {
        "name": "Canadian Syllabics",
        "tag": "CANS",
        "hint_top_to_bottom": False,
        "std_chars": ['ᑌ', 'ᓚ'], # ᑌ ᓚ
        "base_ranges": [
            (0x1400, 0x167F), # Unified Canadian Aboriginal Syllabics
            (0x18B0, 0x18FF), # Unified Canadian Aboriginal Syllabics Extended
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['ᗜ', 'ᖴ', 'ᐁ', 'ᒣ', 'ᑫ', 'ᑎ', 'ᔑ', 'ᗰ'], "LATIN_TOP"),
            (['ᗶ', 'ᖵ', 'ᒧ', 'ᐃ', 'ᑌ', 'ᒍ', 'ᔑ', 'ᗢ'], "0"),
            (['ᓓ', 'ᓕ', 'ᓀ', 'ᓂ', 'ᓄ', 'ᕄ', 'ᕆ', 'ᘣ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ᕃ', 'ᓂ', 'ᓀ', 'ᕂ', 'ᓗ', 'ᓚ', 'ᕆ', 'ᘣ'], "0"),
            (['ᐪ', 'ᙆ', 'ᣘ', 'ᐢ', 'ᒾ', 'ᣗ', 'ᔆ'], "LATIN_TOP"),
            (['ᙆ', 'ᗮ', 'ᒻ', 'ᐞ', 'ᔆ', 'ᒡ', 'ᒢ', 'ᓑ'], "0"),
        ],
    },
    {
        "name": "Carian",
        "tag": "CARI",
        "hint_top_to_bottom": False,
        "std_chars": ['𐊫', '𐋉'], # 𐊫 𐋉
        "base_ranges": [
            (0x102A0, 0x102DF), # Carian
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐊧', '𐊫', '𐊬', '𐊭', '𐊱', '𐊺', '𐊼', '𐊿'], "LATIN_TOP"),
            (['𐊣', '𐊧', '𐊷', '𐋀', '𐊫', '𐊸', '𐋉'], "0"),
        ],
    },
    {
        "name": "Cherokee",
        "tag": "CHER",
        "hint_top_to_bottom": False,
        "std_chars": ['Ꭴ', 'Ꮕ', 'ꮕ'], # Ꭴ Ꮕ ꮕ
        "base_ranges": [
            (0x13A0, 0x13FF), # Cherokee
            (0xAB70, 0xABBF), # Cherokee Supplement
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['Ꮖ', 'Ꮋ', 'Ꭼ', 'Ꮓ', 'Ꭴ', 'Ꮳ', 'Ꭶ', 'Ꮥ'], "LATIN_TOP"),
            (['Ꮖ', 'Ꮋ', 'Ꭼ', 'Ꮓ', 'Ꭴ', 'Ꮳ', 'Ꭶ', 'Ꮥ'], "0"),
            (['ꮒ', 'ꮤ', 'ꮶ', 'ꭴ', 'ꭾ', 'ꮗ', 'ꮝ', 'ꮿ'], "LATIN_TOP"),
            (['ꮖ', 'ꭼ', 'ꮓ', 'ꮠ', 'ꮳ', 'ꭶ', 'ꮥ', 'ꮻ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ꮖ', 'ꭼ', 'ꮓ', 'ꮠ', 'ꮳ', 'ꭶ', 'ꮥ', 'ꮻ'], "0"),
            (['ᏸ', 'ꮐ', 'ꭹ', 'ꭻ'], "0"),
        ],
    },
    {
        "name": "Coptic",
        "tag": "COPT",
        "hint_top_to_bottom": False,
        "std_chars": ['Ⲟ', 'ⲟ'], # Ⲟ ⲟ
        "base_ranges": [
            (0x2C80, 0x2CFF), # Coptic
        ],
        "non_base_ranges": [
            (0x2CEF, 0x2CF1),
        ],
        "blues": [
            (['Ⲍ', 'Ⲏ', 'Ⲡ', 'Ⳟ', 'Ⲟ', 'Ⲑ', 'Ⲥ', 'Ⳋ'], "LATIN_TOP"),
            (['Ⳑ', 'Ⳙ', 'Ⳟ', 'Ⲏ', 'Ⲟ', 'Ⲑ', 'Ⳝ', 'Ⲱ'], "0"),
            (['ⲍ', 'ⲏ', 'ⲡ', 'ⳟ', 'ⲟ', 'ⲑ', 'ⲥ', 'ⳋ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ⳑ', 'ⳙ', 'ⳟ', 'ⲏ', 'ⲟ', 'ⲑ', 'ⳝ', 'Ⳓ'], "0"),
        ],
    },
    {
        "name": "Cypriot",
        "tag": "CPRT",
        "hint_top_to_bottom": False,
        "std_chars": ['𐠅', '𐠣'], # 𐠅 𐠣
        "base_ranges": [
            (0x10800, 0x1083F), # Cypriot
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐠍', '𐠙', '𐠳', '𐠱', '𐠅', '𐠓', '𐠣', '𐠦'], "LATIN_TOP"),
            (['𐠃', '𐠊', '𐠛', '𐠣', '𐠳', '𐠵', '𐠐'], "0"),
            (['𐠈', '𐠏', '𐠖'], "LATIN_TOP"),
            (['𐠈', '𐠏', '𐠖'], "0"),
        ],
    },
    {
        "name": "Cyrillic",
        "tag": "CYRL",
        "hint_top_to_bottom": False,
        "std_chars": ['о', 'О'], # о О
        "base_ranges": [
            (0x0400, 0x04FF), # Cyrillic
            (0x0500, 0x052F), # Cyrillic Supplement
            (0x2DE0, 0x2DFF), # Cyrillic Extended-A
            (0xA640, 0xA69F), # Cyrillic Extended-B
            (0x1C80, 0x1C8F), # Cyrillic Extended-C
        ],
        "non_base_ranges": [
            (0x0483, 0x0489),
            (0x2DE0, 0x2DFF),
            (0xA66F, 0xA67F),
            (0xA69E, 0xA69F),
        ],
        "blues": [
            (['Б', 'В', 'Е', 'П', 'З', 'О', 'С', 'Э'], "LATIN_TOP"),
            (['Б', 'В', 'Е', 'Ш', 'З', 'О', 'С', 'Э'], "0"),
            (['х', 'п', 'н', 'ш', 'е', 'з', 'о', 'с'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['х', 'п', 'н', 'ш', 'е', 'з', 'о', 'с'], "0"),
            (['р', 'у', 'ф'], "0"),
        ],
    },
    {
        "name": "Devanagari",
        "tag": "DEVA",
        "hint_top_to_bottom": True,
        "std_chars": ['ठ', 'व', 'ट'], # ठ व ट
        "base_ranges": [
            (0x0900, 0x093B), # Devanagari
            (0x093D, 0x0950), # ... continued
            (0x0953, 0x0963), # ... continued
            (0x0966, 0x097F), # ... continued
            (0x20B9, 0x20B9), # (new) Rupee sign
            (0xA8E0, 0xA8FF), # Devanagari Extended
        ],
        "non_base_ranges": [
            (0x0900, 0x0902),
            (0x093A, 0x093A),
            (0x0941, 0x0948),
            (0x094D, 0x094D),
            (0x0953, 0x0957),
            (0x0962, 0x0963),
            (0xA8E0, 0xA8F1),
            (0xA8FF, 0xA8FF),
        ],
        "blues": [
            (['ई', 'ऐ', 'ओ', 'औ', 'ि', 'ी', 'ो', 'ौ'], "LATIN_TOP"),
            (['क', 'म', 'अ', 'आ', 'थ', 'ध', 'भ', 'श'], "LATIN_TOP"),
            (['क', 'न', 'म', 'उ', 'छ', 'ट', 'ठ', 'ड'], "LATIN_TOP | LATIN_NEUTRAL | LATIN_X_HEIGHT"),
            (['क', 'न', 'म', 'उ', 'छ', 'ट', 'ठ', 'ड'], "0"),
            (['ु', 'ृ'], "0"),
        ],
    },
    {
        "name": "Deseret",
        "tag": "DSRT",
        "hint_top_to_bottom": False,
        "std_chars": ['𐐄', '𐐬'], # 𐐄 𐐬
        "base_ranges": [
            (0x10400, 0x1044F), # Deseret
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐐂', '𐐄', '𐐋', '𐐗', '𐐑'], "LATIN_TOP"),
            (['𐐀', '𐐂', '𐐄', '𐐗', '𐐛'], "0"),
            (['𐐪', '𐐬', '𐐳', '𐐿', '𐐹'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['𐐨', '𐐪', '𐐬', '𐐿', '𐑃'], "0"),
        ],
    },
    {
        "name": "Ethiopic",
        "tag": "ETHI",
        "hint_top_to_bottom": False,
        "std_chars": ['ዐ'], # ዐ
        "base_ranges": [
            (0x1200, 0x137F), # Ethiopic
            (0x1380, 0x139F), # Ethiopic Supplement
            (0x2D80, 0x2DDF), # Ethiopic Extended
            (0xAB00, 0xAB2F), # Ethiopic Extended-A
        ],
        "non_base_ranges": [
            (0x135D, 0x135F),
        ],
        "blues": [
            (['ሀ', 'ሃ', 'ዘ', 'ፐ', 'ማ', 'በ', 'ዋ', 'ዐ'], "LATIN_TOP"),
            (['ለ', 'ሐ', 'በ', 'ዘ', 'ሀ', 'ሪ', 'ዐ', 'ጨ'], "0"),
        ],
    },
    {
        "name": "Georgian (Mkhedruli)",
        "tag": "GEOR",
        "hint_top_to_bottom": False,
        "std_chars": ['ი', 'ე', 'ა', 'Ჿ'], # ი ე ა Ი
        "base_ranges": [
            (0x10D0, 0x10FF), # Georgian (Mkhedruli)
            (0x1C90, 0x1CBF), # Georgian Extended (Mtavruli)
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['გ', 'დ', 'ე', 'ვ', 'თ', 'ი', 'ო', 'ღ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ა', 'ზ', 'მ', 'ს', 'შ', 'ძ', 'ხ', 'პ'], "0"),
            (['ს', 'ხ', 'ქ', 'ზ', 'მ', 'შ', 'ჩ', 'წ'], "LATIN_TOP"),
            (['ე', 'ვ', 'ჟ', 'ტ', 'უ', 'ფ', 'ქ', 'ყ'], "0"),
            (['Ნ', 'Ჟ', 'Ჳ', 'Ჸ', 'Გ', 'Ე', 'Ო', 'Ჴ'], "LATIN_TOP"),
            (['Ი', 'Ჲ', 'Ო', 'Ჩ', 'Მ', 'Შ', 'Ჯ', 'Ჽ'], "0"),
        ],
    },
    {
        "name": "Georgian (Khutsuri)",
        "tag": "GEOK",
        "hint_top_to_bottom": False,
        "std_chars": ['Ⴖ', 'Ⴑ', 'ⴙ'], # Ⴖ Ⴑ ⴙ
        "base_ranges": [
            (0x10A0, 0x10CD), # Georgian (Asomtavruli)
            (0x2D00, 0x2D2D), # Georgian Supplement (Nuskhuri)
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['Ⴑ', 'Ⴇ', 'Ⴙ', 'Ⴜ', 'Ⴄ', 'Ⴅ', 'Ⴓ', 'Ⴚ'], "LATIN_TOP"),
            (['Ⴄ', 'Ⴅ', 'Ⴇ', 'Ⴈ', 'Ⴆ', 'Ⴑ', 'Ⴊ', 'Ⴋ'], "0"),
            (['ⴁ', 'ⴗ', 'ⴂ', 'ⴄ', 'ⴅ', 'ⴇ', 'ⴔ', 'ⴖ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ⴈ', 'ⴌ', 'ⴖ', 'ⴎ', 'ⴃ', 'ⴆ', 'ⴋ', 'ⴢ'], "0"),
            (['ⴐ', 'ⴑ', 'ⴓ', 'ⴕ', 'ⴙ', 'ⴛ', 'ⴡ', 'ⴣ'], "LATIN_TOP"),
            (['ⴄ', 'ⴅ', 'ⴔ', 'ⴕ', 'ⴁ', 'ⴂ', 'ⴘ', 'ⴝ'], "0"),
        ],
    },
    {
        "name": "Glagolitic",
        "tag": "GLAG",
        "hint_top_to_bottom": False,
        "std_chars": ['Ⱅ', 'ⱅ'], # Ⱅ ⱅ
        "base_ranges": [
            (0x2C00, 0x2C5F), # Glagolitic
            (0x1E000, 0x1E02F), # Glagolitic Supplement
        ],
        "non_base_ranges": [
            (0x1E000, 0x1E02F),
        ],
        "blues": [
            (['Ⰵ', 'Ⱄ', 'Ⱚ', 'Ⰴ', 'Ⰲ', 'Ⰺ', 'Ⱛ', 'Ⰻ'], "LATIN_TOP"),
            (['Ⰵ', 'Ⰴ', 'Ⰲ', 'Ⱚ', 'Ⱎ', 'Ⱑ', 'Ⰺ', 'Ⱄ'], "0"),
            (['ⰵ', 'ⱄ', 'ⱚ', 'ⰴ', 'ⰲ', 'ⰺ', 'ⱛ', 'ⰻ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ⰵ', 'ⰴ', 'ⰲ', 'ⱚ', 'ⱎ', 'ⱑ', 'ⰺ', 'ⱄ'], "0"),
        ],
    },
    {
        "name": "Gothic",
        "tag": "GOTH",
        "hint_top_to_bottom": True,
        "std_chars": ['𐌴', '𐌾', '𐍃'], # 𐌴 𐌾 𐍃
        "base_ranges": [
            (0x10330, 0x1034F), # Gothic
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐌲', '𐌶', '𐍀', '𐍄', '𐌴', '𐍃', '𐍈', '𐌾'], "LATIN_TOP"),
            (['𐌶', '𐌴', '𐍃', '𐍈'], "0"),
        ],
    },
    {
        "name": "Greek",
        "tag": "GREK",
        "hint_top_to_bottom": False,
        "std_chars": ['ο', 'Ο'], # ο Ο
        "base_ranges": [
            (0x0370, 0x03FF), # Greek and Coptic
            (0x1F00, 0x1FFF), # Greek Extended
        ],
        "non_base_ranges": [
            (0x037A, 0x037A),
            (0x0384, 0x0385),
            (0x1FBD, 0x1FC1),
            (0x1FCD, 0x1FCF),
            (0x1FDD, 0x1FDF),
            (0x1FED, 0x1FEF),
            (0x1FFD, 0x1FFE),
        ],
        "blues": [
            (['Γ', 'Β', 'Ε', 'Ζ', 'Θ', 'Ο', 'Ω'], "LATIN_TOP"),
            (['Β', 'Δ', 'Ζ', 'Ξ', 'Θ', 'Ο'], "0"),
            (['β', 'θ', 'δ', 'ζ', 'λ', 'ξ'], "LATIN_TOP"),
            (['α', 'ε', 'ι', 'ο', 'π', 'σ', 'τ', 'ω'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['α', 'ε', 'ι', 'ο', 'π', 'σ', 'τ', 'ω'], "0"),
            (['β', 'γ', 'η', 'μ', 'ρ', 'φ', 'χ', 'ψ'], "0"),
        ],
    },
    {
        "name": "Gujarati",
        "tag": "GUJR",
        "hint_top_to_bottom": False,
        "std_chars": ['ટ', '૦'], # ટ ૦
        "base_ranges": [
            (0x0A80, 0x0AFF), # Gujarati
        ],
        "non_base_ranges": [
            (0x0A81, 0x0A82),
            (0x0ABC, 0x0ABC),
            (0x0AC1, 0x0AC8),
            (0x0ACD, 0x0ACD),
            (0x0AE2, 0x0AE3),
            (0x0AFA, 0x0AFF),
        ],
        "blues": [
            (['ત', 'ન', 'ઋ', 'ઌ', 'છ', 'ટ', 'ર', '૦'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ખ', 'ગ', 'ઘ', 'ઞ', 'ઇ', 'ઈ', 'ઠ', 'જ'], "0"),
            (['ઈ', 'ઊ', 'િ', 'ી', 'લ', 'શ', 'જ', 'સ'], "LATIN_TOP"),
            (['ુ', 'ૃ', 'ૄ', 'ખ', 'છ', 'છ'], "0"),
            (['૦', '૧', '૨', '૩', '૭'], "LATIN_TOP"),
        ],
    },
    {
        "name": "Gurmukhi",
        "tag": "GURU",
        "hint_top_to_bottom": True,
        "std_chars": ['ਠ', 'ਰ', '੦'], # ਠ ਰ ੦
        "base_ranges": [
            (0x0A00, 0x0A7F), # Gurmukhi
        ],
        "non_base_ranges": [
            (0x0A01, 0x0A02),
            (0x0A3C, 0x0A3C),
            (0x0A41, 0x0A51),
            (0x0A70, 0x0A71),
            (0x0A75, 0x0A75),
        ],
        "blues": [
            (['ਇ', 'ਈ', 'ਉ', 'ਏ', 'ਓ', 'ੳ', 'ਿ', 'ੀ'], "LATIN_TOP"),
            (['ਕ', 'ਗ', 'ਙ', 'ਚ', 'ਜ', 'ਤ', 'ਧ', 'ਸ'], "LATIN_TOP"),
            (['ਕ', 'ਗ', 'ਙ', 'ਚ', 'ਜ', 'ਤ', 'ਧ', 'ਸ'], "LATIN_TOP | LATIN_NEUTRAL | LATIN_X_HEIGHT"),
            (['ਅ', 'ਏ', 'ਓ', 'ਗ', 'ਜ', 'ਠ', 'ਰ', 'ਸ'], "0"),
            (['੦', '੧', '੨', '੩', '੭'], "LATIN_TOP"),
        ],
    },
    {
        "name": "Hebrew",
        "tag": "HEBR",
        "hint_top_to_bottom": False,
        "std_chars": ['ם'], # ם
        "base_ranges": [
            (0x0590, 0x05FF), # Hebrew
            (0xFB1D, 0xFB4F), # Alphab. Present. Forms (Hebrew)
        ],
        "non_base_ranges": [
            (0x0591, 0x05BF),
            (0x05C1, 0x05C2),
            (0x05C4, 0x05C5),
            (0x05C7, 0x05C7),
            (0xFB1E, 0xFB1E),
        ],
        "blues": [
            (['ב', 'ד', 'ה', 'ח', 'ך', 'כ', 'ם', 'ס'], "LATIN_TOP | LATIN_LONG"),
            (['ב', 'ט', 'כ', 'ם', 'ס', 'צ'], "0"),
            (['ק', 'ך', 'ן', 'ף', 'ץ'], "0"),
        ],
    },
    {
        "name": "Kayah Li",
        "tag": "KALI",
        "hint_top_to_bottom": False,
        "std_chars": ['ꤍ', '꤀'], # ꤍ ꤀
        "base_ranges": [
            (0xA900, 0xA92F), # Kayah Li
        ],
        "non_base_ranges": [
            (0xA926, 0xA92D),
        ],
        "blues": [
            (['꤅', 'ꤏ', '꤁', 'ꤋ', '꤀', 'ꤍ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['꤈', 'ꤘ', '꤀', 'ꤍ', 'ꤢ'], "0"),
            (['ꤖ', 'ꤡ'], "LATIN_TOP"),
            (['ꤑ', 'ꤜ', 'ꤞ'], "0"),
            (['ꤑ', 'ꤜ', 'ꤔ'], "0"),
        ],
    },
    {
        "name": "Khmer",
        "tag": "KHMR",
        "hint_top_to_bottom": False,
        "std_chars": ['០'], # ០
        "base_ranges": [
            (0x1780, 0x17FF), # Khmer
        ],
        "non_base_ranges": [
            (0x17B7, 0x17BD),
            (0x17C6, 0x17C6),
            (0x17C9, 0x17D3),
            (0x17DD, 0x17DD),
        ],
        "blues": [
            (['ខ', 'ទ', 'ន', 'ឧ', 'ឩ', 'ា'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ក', 'ក', 'ក', 'ក'], "LATIN_SUB_TOP"),
            (['ខ', 'ឃ', 'ច', 'ឋ', 'ប', 'ម', 'យ', 'ឲ'], "0"),
            (['ត', 'រ', 'ឲ', 'អ'], "0"),
            (['ន', 'ង', 'ក', 'ច', 'ន', 'ល'], "0"),
        ],
    },
    {
        "name": "Khmer Symbols",
        "tag": "KHMS",
        "hint_top_to_bottom": False,
        "std_chars": ['᧡', '᧪'], # ᧡ ᧪
        "base_ranges": [
            (0x19E0, 0x19FF), # Khmer Symbols
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['᧠', '᧡'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['᧶', '᧹'], "0"),
        ],
    },
    {
        "name": "Kannada",
        "tag": "KNDA",
        "hint_top_to_bottom": False,
        "std_chars": ['೦', 'ಬ'], # ೦ ಬ
        "base_ranges": [
            (0x0C80, 0x0CFF), # Kannada
        ],
        "non_base_ranges": [
            (0x0C81, 0x0C81),
            (0x0CBC, 0x0CBC),
            (0x0CBF, 0x0CBF),
            (0x0CC6, 0x0CC6),
            (0x0CCC, 0x0CCD),
            (0x0CE2, 0x0CE3),
        ],
        "blues": [
            (['ಇ', 'ಊ', 'ಐ', 'ಣ', 'ಸ', 'ನ', 'ದ', 'ರ'], "LATIN_TOP"),
            (['ಅ', 'ಉ', 'ಎ', 'ಲ', '೦', '೨', '೬', '೭'], "0"),
        ],
    },
    {
        "name": "Lao",
        "tag": "LAOO",
        "hint_top_to_bottom": False,
        "std_chars": ['໐'], # ໐
        "base_ranges": [
            (0x0E80, 0x0EFF), # Lao
        ],
        "non_base_ranges": [
            (0x0EB1, 0x0EB1),
            (0x0EB4, 0x0EBC),
            (0x0EC8, 0x0ECD),
        ],
        "blues": [
            (['າ', 'ດ', 'ອ', 'ມ', 'ລ', 'ວ', 'ຣ', 'ງ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['າ', 'ອ', 'ບ', 'ຍ', 'ຣ', 'ຮ', 'ວ', 'ຢ'], "0"),
            (['ປ', 'ຢ', 'ຟ', 'ຝ'], "LATIN_TOP"),
            (['ໂ', 'ໄ', 'ໃ'], "LATIN_TOP"),
            (['ງ', 'ຊ', 'ຖ', 'ຽ', 'ໆ', 'ຯ'], "0"),
        ],
    },
    {
        "name": "Latin",
        "tag": "LATN",
        "hint_top_to_bottom": False,
        "std_chars": ['o', 'O', '0'],
        "base_ranges": [
            (0x0020, 0x007F), # Basic Latin (no control chars)
            (0x00A0, 0x00A9), # Latin-1 Supplement (no control chars)
            (0x00AB, 0x00B1), # ... continued
            (0x00B4, 0x00B8), # ... continued
            (0x00BB, 0x00FF), # ... continued
            (0x0100, 0x017F), # Latin Extended-A
            (0x0180, 0x024F), # Latin Extended-B
            (0x0250, 0x02AF), # IPA Extensions
            (0x02B9, 0x02DF), # Spacing Modifier Letters
            (0x02E5, 0x02FF), # ... continued
            (0x0300, 0x036F), # Combining Diacritical Marks
            (0x1AB0, 0x1ABE), # Combining Diacritical Marks Extended
            (0x1D00, 0x1D2B), # Phonetic Extensions
            (0x1D6B, 0x1D77), # ... continued
            (0x1D79, 0x1D7F), # ... continued
            (0x1D80, 0x1D9A), # Phonetic Extensions Supplement
            (0x1DC0, 0x1DFF), # Combining Diacritical Marks Supplement
            (0x1E00, 0x1EFF), # Latin Extended Additional
            (0x2000, 0x206F), # General Punctuation
            (0x20A0, 0x20B8), # Currency Symbols ...
            (0x20BA, 0x20CF), # ... except new Rupee sign
            (0x2150, 0x218F), # Number Forms
            (0x2C60, 0x2C7B), # Latin Extended-C
            (0x2C7E, 0x2C7F), # ... continued
            (0x2E00, 0x2E7F), # Supplemental Punctuation
            (0xA720, 0xA76F), # Latin Extended-D
            (0xA771, 0xA7F7), # ... continued
            (0xA7FA, 0xA7FF), # ... continued
            (0xAB30, 0xAB5B), # Latin Extended-E
            (0xAB60, 0xAB6F), # ... continued
            (0xFB00, 0xFB06), # Alphab. Present. Forms (Latin Ligs)
            (0x1D400, 0x1D7FF), # Mathematical Alphanumeric Symbols
        ],
        "non_base_ranges": [
            (0x005E, 0x0060),
            (0x007E, 0x007E),
            (0x00A8, 0x00A9),
            (0x00AE, 0x00B0),
            (0x00B4, 0x00B4),
            (0x00B8, 0x00B8),
            (0x00BC, 0x00BE),
            (0x02B9, 0x02DF),
            (0x02E5, 0x02FF),
            (0x0300, 0x036F),
            (0x1AB0, 0x1ABE),
            (0x1DC0, 0x1DFF),
            (0x2017, 0x2017),
            (0x203E, 0x203E),
            (0xA788, 0xA788),
            (0xA7F8, 0xA7FA),
        ],
        "blues": [
            (['T', 'H', 'E', 'Z', 'O', 'C', 'Q', 'S'], "LATIN_TOP"),
            (['H', 'E', 'Z', 'L', 'O', 'C', 'U', 'S'], "0"),
            (['f', 'i', 'j', 'k', 'd', 'b', 'h'], "LATIN_TOP"),
            (['u', 'v', 'x', 'z', 'o', 'e', 's', 'c'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['n', 'r', 'x', 'z', 'o', 'e', 's', 'c'], "0"),
            (['p', 'q', 'g', 'j', 'y'], "0"),
        ],
    },
    {
        "name": "Latin Subscript Fallback",
        "tag": "LATB",
        "hint_top_to_bottom": False,
        "std_chars": ['ₒ', '₀'], # ₒ ₀
        "base_ranges": [
            (0x1D62, 0x1D6A), # some small subscript letters
            (0x2080, 0x209C), # subscript digits and letters
            (0x2C7C, 0x2C7C), # latin subscript small letter j
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['₀', '₃', '₅', '₇', '₈'], "LATIN_TOP"),
            (['₀', '₁', '₂', '₃', '₈'], "0"),
            (['ᵢ', 'ⱼ', 'ₕ', 'ₖ', 'ₗ'], "LATIN_TOP"),
            (['ₐ', 'ₑ', 'ₒ', 'ₓ', 'ₙ', 'ₛ', 'ᵥ', 'ᵤ', 'ᵣ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ₐ', 'ₑ', 'ₒ', 'ₓ', 'ₙ', 'ₛ', 'ᵥ', 'ᵤ', 'ᵣ'], "0"),
            (['ᵦ', 'ᵧ', 'ᵨ', 'ᵩ', 'ₚ'], "0"),
        ],
    },
    {
        "name": "Latin Superscript Fallback",
        "tag": "LATP",
        "hint_top_to_bottom": False,
        "std_chars": ['ᵒ', 'ᴼ', '⁰'], # ᵒ ᴼ ⁰
        "base_ranges": [
            (0x00AA, 0x00AA), # feminine ordinal indicator
            (0x00B2, 0x00B3), # superscript two and three
            (0x00B9, 0x00BA), # superscript one, masc. ord. indic.
            (0x02B0, 0x02B8), # some latin superscript mod. letters
            (0x02E0, 0x02E4), # some IPA modifier letters
            (0x1D2C, 0x1D61), # latin superscript modifier letters
            (0x1D78, 0x1D78), # modifier letter cyrillic en
            (0x1D9B, 0x1DBF), # more modifier letters
            (0x2070, 0x207F), # superscript digits and letters
            (0x2C7D, 0x2C7D), # modifier letter capital v
            (0xA770, 0xA770), # modifier letter us
            (0xA7F8, 0xA7F9), # more modifier letters
            (0xAB5C, 0xAB5F), # more modifier letters
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['⁰', '³', '⁵', '⁷', 'ᵀ', 'ᴴ', 'ᴱ', 'ᴼ'], "LATIN_TOP"),
            (['⁰', '¹', '²', '³', 'ᴱ', 'ᴸ', 'ᴼ', 'ᵁ'], "0"),
            (['ᵇ', 'ᵈ', 'ᵏ', 'ʰ', 'ʲ', 'ᶠ', 'ⁱ'], "LATIN_TOP"),
            (['ᵉ', 'ᵒ', 'ʳ', 'ˢ', 'ˣ', 'ᶜ', 'ᶻ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ᵉ', 'ᵒ', 'ʳ', 'ˢ', 'ˣ', 'ᶜ', 'ᶻ'], "0"),
            (['ᵖ', 'ʸ', 'ᵍ'], "0"),
        ],
    },
    {
        "name": "Lisu",
        "tag": "LISU",
        "hint_top_to_bottom": False,
        "std_chars": ['ꓳ'], # ꓳ
        "base_ranges": [
            (0xA4D0, 0xA4FF), # Lisu
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['ꓡ', 'ꓧ', 'ꓱ', 'ꓶ', 'ꓩ', 'ꓚ', 'ꓵ', 'ꓳ'], "LATIN_TOP"),
            (['ꓕ', 'ꓜ', 'ꓞ', 'ꓡ', 'ꓛ', 'ꓢ', 'ꓳ', 'ꓴ'], "0"),
        ],
    },
    {
        "name": "Malayalam",
        "tag": "MLYM",
        "hint_top_to_bottom": False,
        "std_chars": ['ഠ', 'റ'], # ഠ റ
        "base_ranges": [
            (0x0D00, 0x0D7F), # Malayalam
        ],
        "non_base_ranges": [
            (0x0D00, 0x0D01),
            (0x0D3B, 0x0D3C),
            (0x0D4D, 0x0D4E),
            (0x0D62, 0x0D63),
        ],
        "blues": [
            (['ഒ', 'ട', 'ഠ', 'റ', 'ച', 'പ', 'ച', 'പ'], "LATIN_TOP"),
            (['ട', 'ഠ', 'ധ', 'ശ', 'ഘ', 'ച', 'ഥ', 'ല'], "0"),
        ],
    },
    {
        "name": "Medefaidrin",
        "tag": "MEDF",
        "hint_top_to_bottom": False,
        "std_chars": ['𖹡', '𖹛', '𖹯'], # 𖹡 𖹛 𖹯
        "base_ranges": [
            (0x16E40, 0x16E9F), # Medefaidrin
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𖹀', '𖹁', '𖹂', '𖹃', '𖹏', '𖹚', '𖹟'], "LATIN_TOP"),
            (['𖹀', '𖹁', '𖹂', '𖹃', '𖹏', '𖹚', '𖹒', '𖹓'], "0"),
            (['𖹤', '𖹬', '𖹧', '𖹴', '𖹶', '𖹾'], "LATIN_TOP"),
            (['𖹠', '𖹡', '𖹢', '𖹹', '𖹳', '𖹮'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['𖹠', '𖹡', '𖹢', '𖹳', '𖹭', '𖹽'], "0"),
            (['𖹥', '𖹨', '𖹩'], "0"),
            (['𖺀', '𖺅', '𖺈', '𖺄', '𖺍'], "LATIN_TOP"),
        ],
    },
    {
        "name": "Mongolian",
        "tag": "MONG",
        "hint_top_to_bottom": True,
        "std_chars": ['ᡂ', 'ᠪ'], # ᡂ ᠪ
        "base_ranges": [
            (0x1800, 0x18AF), # Mongolian
            (0x11660, 0x1167F), # Mongolian Supplement
        ],
        "non_base_ranges": [
            (0x1885, 0x1886),
            (0x18A9, 0x18A9),
        ],
        "blues": [
            (['ᠳ', 'ᠴ', 'ᠶ', 'ᠽ', 'ᡂ', 'ᡊ', '‍', '‍'], "LATIN_TOP"),
            (['ᡃ'], "0"),
        ],
    },
    {
        "name": "Myanmar",
        "tag": "MYMR",
        "hint_top_to_bottom": False,
        "std_chars": ['ဝ', 'င', 'ဂ'], # ဝ င ဂ
        "base_ranges": [
            (0x1000, 0x109F), # Myanmar
            (0xA9E0, 0xA9FF), # Myanmar Extended-B
            (0xAA60, 0xAA7F), # Myanmar Extended-A
        ],
        "non_base_ranges": [
            (0x102D, 0x1030),
            (0x1032, 0x1037),
            (0x103A, 0x103A),
            (0x103D, 0x103E),
            (0x1058, 0x1059),
            (0x105E, 0x1060),
            (0x1071, 0x1074),
            (0x1082, 0x1082),
            (0x1085, 0x1086),
            (0x108D, 0x108D),
            (0xA9E5, 0xA9E5),
            (0xAA7C, 0xAA7C),
        ],
        "blues": [
            (['ခ', 'ဂ', 'င', 'ဒ', 'ဝ', 'ၥ', '၊', '။'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['င', 'ဎ', 'ဒ', 'ပ', 'ဗ', 'ဝ', '၊', '။'], "0"),
            (['ဩ', 'ြ', '၍', '၏', '၆', 'ါ', 'ိ'], "LATIN_TOP"),
            (['ဉ', 'ည', 'ဥ', 'ဩ', 'ဨ', '၂', '၅', '၉'], "0"),
        ],
    },
    {
        "name": "N'Ko",
        "tag": "NKOO",
        "hint_top_to_bottom": False,
        "std_chars": ['ߋ', '߀'], # ߋ ߀
        "base_ranges": [
            (0x07C0, 0x07FF), # N'Ko
        ],
        "non_base_ranges": [
            (0x07EB, 0x07F5),
            (0x07FD, 0x07FD),
        ],
        "blues": [
            (['ߐ', '߉', 'ߒ', 'ߟ', 'ߖ', 'ߜ', 'ߠ', 'ߥ'], "LATIN_TOP"),
            (['߀', 'ߘ', 'ߡ', 'ߠ', 'ߥ'], "0"),
            (['ߏ', 'ߛ', 'ߋ'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['ߎ', 'ߏ', 'ߛ', 'ߋ'], "0"),
        ],
    },
    {
        "name": "no script",
        "tag": "NONE",
        "hint_top_to_bottom": False,
        "std_chars": [],
        "base_ranges": [
        ],
        "non_base_ranges": [
        ],
        "blues": [
        ],
    },
    {
        "name": "Ol Chiki",
        "tag": "OLCK",
        "hint_top_to_bottom": False,
        "std_chars": ['ᱛ'], # ᱛ
        "base_ranges": [
            (0x1C50, 0x1C7F), # Ol Chiki
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['ᱛ', 'ᱜ', 'ᱝ', 'ᱡ', 'ᱢ', 'ᱥ'], "LATIN_TOP"),
            (['ᱛ', 'ᱜ', 'ᱝ', 'ᱡ', 'ᱢ', 'ᱥ'], "0"),
        ],
    },
    {
        "name": "Old Turkic",
        "tag": "ORKH",
        "hint_top_to_bottom": False,
        "std_chars": ['𐰗'], # 𐰗
        "base_ranges": [
            (0x10C00, 0x10C4F), # Old Turkic
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐰗', '𐰘', '𐰧'], "LATIN_TOP"),
            (['𐰉', '𐰗', '𐰦', '𐰧'], "0"),
        ],
    },
    {
        "name": "Osage",
        "tag": "OSGE",
        "hint_top_to_bottom": False,
        "std_chars": ['𐓂', '𐓪'], # 𐓂 𐓪
        "base_ranges": [
            (0x104B0, 0x104FF), # Osage
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐒾', '𐓍', '𐓒', '𐓓', '𐒻', '𐓂', '𐒵', '𐓆'], "LATIN_TOP"),
            (['𐒰', '𐓍', '𐓂', '𐒿', '𐓎', '𐒹'], "0"),
            (['𐒼', '𐒽', '𐒾'], "0"),
            (['𐓵', '𐓶', '𐓺', '𐓻', '𐓝', '𐓣', '𐓪', '𐓮'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['𐓘', '𐓚', '𐓣', '𐓵', '𐓡', '𐓧', '𐓪', '𐓶'], "0"),
            (['𐓤', '𐓦', '𐓸', '𐓹', '𐓛'], "LATIN_TOP"),
            (['𐓤', '𐓥', '𐓦'], "0"),
        ],
    },
    {
        "name": "Osmanya",
        "tag": "OSMA",
        "hint_top_to_bottom": False,
        "std_chars": ['𐒆', '𐒠'], # 𐒆 𐒠
        "base_ranges": [
            (0x10480, 0x104AF), # Osmanya
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐒆', '𐒉', '𐒐', '𐒒', '𐒘', '𐒛', '𐒠', '𐒣'], "LATIN_TOP"),
            (['𐒀', '𐒂', '𐒆', '𐒈', '𐒊', '𐒒', '𐒠', '𐒩'], "0"),
        ],
    },
    {
        "name": "Hanifi Rohingya",
        "tag": "ROHG",
        "hint_top_to_bottom": False,
        "std_chars": ['𐴰'], # 𐴰
        "base_ranges": [
            (0x10D00, 0x10D3F), # Hanifi Rohingya
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐴃', '𐴀', '𐴆', '𐴖', '𐴕'], "LATIN_TOP"),
            (['𐴔', '𐴖', '𐴕', '𐴑', '𐴐'], "0"),
            (['ـ'], "LATIN_NEUTRAL"),
        ],
    },
    {
        "name": "Saurashtra",
        "tag": "SAUR",
        "hint_top_to_bottom": False,
        "std_chars": ['ꢝ', '꣐'], # ꢝ ꣐
        "base_ranges": [
            (0xA880, 0xA8DF), # Saurashtra
        ],
        "non_base_ranges": [
            (0xA880, 0xA881),
            (0xA8B4, 0xA8C5),
        ],
        "blues": [
            (['ꢜ', 'ꢞ', 'ꢳ', 'ꢂ', 'ꢖ', 'ꢒ', 'ꢝ', 'ꢛ'], "LATIN_TOP"),
            (['ꢂ', 'ꢨ', 'ꢺ', 'ꢤ', 'ꢎ'], "0"),
        ],
    },
    {
        "name": "Shavian",
        "tag": "SHAW",
        "hint_top_to_bottom": False,
        "std_chars": ['𐑴'], # 𐑴
        "base_ranges": [
            (0x10450, 0x1047F), # Shavian
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['𐑕', '𐑙'], "LATIN_TOP"),
            (['𐑔', '𐑖', '𐑗', '𐑹', '𐑻'], "0"),
            (['𐑟', '𐑣'], "0"),
            (['𐑱', '𐑲', '𐑳', '𐑴', '𐑸', '𐑺', '𐑼'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['𐑴', '𐑻', '𐑹'], "0"),
        ],
    },
    {
        "name": "Sinhala",
        "tag": "SINH",
        "hint_top_to_bottom": False,
        "std_chars": ['ට'], # ට
        "base_ranges": [
            (0x0D80, 0x0DFF), # Sinhala
        ],
        "non_base_ranges": [
            (0x0DCA, 0x0DCA),
            (0x0DD2, 0x0DD6),
        ],
        "blues": [
            (['ඉ', 'ක', 'ඝ', 'ඳ', 'ප', 'ය', 'ල', 'ෆ'], "LATIN_TOP"),
            (['එ', 'ඔ', 'ඝ', 'ජ', 'ට', 'ථ', 'ධ', 'ර'], "0"),
            (['ද', 'ඳ', 'උ', 'ල', 'ත', 'ත', 'බ', 'ද'], "0"),
        ],
    },
    {
        "name": "Sundanese",
        "tag": "SUND",
        "hint_top_to_bottom": False,
        "std_chars": ['᮰'], # ᮰
        "base_ranges": [
            (0x1B80, 0x1BBF), # Sundanese
            (0x1CC0, 0x1CCF), # Sundanese Supplement
        ],
        "non_base_ranges": [
            (0x1B80, 0x1B82),
            (0x1BA1, 0x1BAD),
        ],
        "blues": [
            (['ᮋ', 'ᮞ', 'ᮮ', 'ᮽ', '᮰', 'ᮈ'], "LATIN_TOP"),
            (['ᮄ', 'ᮔ', 'ᮕ', 'ᮗ', '᮰', 'ᮆ', 'ᮈ', 'ᮉ'], "0"),
            (['ᮼ', '᳄'], "0"),
        ],
    },
    {
        "name": "Tamil",
        "tag": "TAML",
        "hint_top_to_bottom": False,
        "std_chars": ['௦'], # ௦
        "base_ranges": [
            (0x0B80, 0x0BFF), # Tamil
        ],
        "non_base_ranges": [
            (0x0B82, 0x0B82),
            (0x0BC0, 0x0BC2),
            (0x0BCD, 0x0BCD),
        ],
        "blues": [
            (['உ', 'ஒ', 'ஓ', 'ற', 'ஈ', 'க', 'ங', 'ச'], "LATIN_TOP"),
            (['க', 'ச', 'ல', 'ஶ', 'உ', 'ங', 'ட', 'ப'], "0"),
        ],
    },
    {
        "name": "Tai Viet",
        "tag": "TAVT",
        "hint_top_to_bottom": False,
        "std_chars": ['ꪒ', 'ꪫ'], # ꪒ ꪫ
        "base_ranges": [
            (0xAA80, 0xAADF), # Tai Viet
        ],
        "non_base_ranges": [
            (0xAAB0, 0xAAB0),
            (0xAAB2, 0xAAB4),
            (0xAAB7, 0xAAB8),
            (0xAABE, 0xAABF),
            (0xAAC1, 0xAAC1),
        ],
        "blues": [
            (['ꪆ', 'ꪔ', 'ꪒ', 'ꪖ', 'ꪫ'], "LATIN_TOP"),
            (['ꪉ', 'ꪫ', 'ꪮ'], "0"),
        ],
    },
    {
        "name": "Telugu",
        "tag": "TELU",
        "hint_top_to_bottom": False,
        "std_chars": ['౦', '౧'], # ౦ ౧
        "base_ranges": [
            (0x0C00, 0x0C7F), # Telugu
        ],
        "non_base_ranges": [
            (0x0C00, 0x0C00),
            (0x0C04, 0x0C04),
            (0x0C3E, 0x0C40),
            (0x0C46, 0x0C56),
            (0x0C62, 0x0C63),
        ],
        "blues": [
            (['ఇ', 'ఌ', 'ఙ', 'ఞ', 'ణ', 'ఱ', '౯'], "LATIN_TOP"),
            (['అ', 'క', 'చ', 'ర', 'ఽ', '౨', '౬'], "0"),
        ],
    },
    {
        "name": "Tifinagh",
        "tag": "TFNG",
        "hint_top_to_bottom": False,
        "std_chars": ['ⵔ'], # ⵔ
        "base_ranges": [
            (0x2D30, 0x2D7F), # Tifinagh
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['ⵔ', 'ⵙ', 'ⵛ', 'ⵞ', 'ⴵ', 'ⴼ', 'ⴹ', 'ⵎ'], "LATIN_TOP"),
            (['ⵔ', 'ⵙ', 'ⵛ', 'ⵞ', 'ⴵ', 'ⴼ', 'ⴹ', 'ⵎ'], "0"),
        ],
    },
    {
        "name": "Thai",
        "tag": "THAI",
        "hint_top_to_bottom": False,
        "std_chars": ['า', 'ๅ', '๐'], # า ๅ ๐
        "base_ranges": [
            (0x0E00, 0x0E7F), # Thai
        ],
        "non_base_ranges": [
            (0x0E31, 0x0E31),
            (0x0E34, 0x0E3A),
            (0x0E47, 0x0E4E),
        ],
        "blues": [
            (['บ', 'เ', 'แ', 'อ', 'ก', 'า'], "LATIN_TOP | LATIN_X_HEIGHT"),
            (['บ', 'ป', 'ษ', 'ฯ', 'อ', 'ย', 'ฮ'], "0"),
            (['ป', 'ฝ', 'ฟ'], "LATIN_TOP"),
            (['โ', 'ใ', 'ไ'], "LATIN_TOP"),
            (['ฎ', 'ฏ', 'ฤ', 'ฦ'], "0"),
            (['ญ', 'ฐ'], "0"),
            (['๐', '๑', '๓'], "0"),
        ],
    },
    {
        "name": "Vai",
        "tag": "VAII",
        "hint_top_to_bottom": False,
        "std_chars": ['ꘓ', 'ꖜ', 'ꖴ'], # ꘓ ꖜ ꖴ
        "base_ranges": [
            (0xA500, 0xA63F), # Vai
        ],
        "non_base_ranges": [
        ],
        "blues": [
            (['ꗍ', 'ꘖ', 'ꘙ', 'ꘜ', 'ꖜ', 'ꖝ', 'ꔅ', 'ꕢ'], "LATIN_TOP"),
            (['ꗍ', 'ꘖ', 'ꘙ', 'ꗞ', 'ꔅ', 'ꕢ', 'ꖜ', 'ꔆ'], "0"),
        ],
    },
    {
        "name": "Limbu",
        "tag": "LIMB",
        "hint_top_to_bottom": False,
        "std_chars": ['o'], # XXX
        "base_ranges": [
            (0x1900, 0x194F), # Limbu
        ],
        "non_base_ranges": [
            (0x1920, 0x1922),
            (0x1927, 0x1934),
            (0x1937, 0x193B),
        ],
        "blues": [],
    },
    {
        "name": "Oriya",
        "tag": "ORYA",
        "hint_top_to_bottom": False,
        "std_chars": ['o'], # XXX
        "base_ranges": [
            (0x0B00, 0x0B7F), # Oriya
        ],
        "non_base_ranges": [
            (0x0B01, 0x0B02),
            (0x0B3C, 0x0B3C),
            (0x0B3F, 0x0B3F),
            (0x0B41, 0x0B44),
            (0x0B4D, 0x0B56),
            (0x0B62, 0x0B63),
        ],
        "blues": [],
    },
    {
        "name": "Syloti Nagri",
        "tag": "SYLO",
        "hint_top_to_bottom": False,
        "std_chars": ['o'], # XXX
        "base_ranges": [
            (0xA800, 0xA82F), # Syloti Nagri
        ],
        "non_base_ranges": [
            (0xA802, 0xA802),
            (0xA806, 0xA806),
            (0xA80B, 0xA80B),
            (0xA825, 0xA826),
        ],
        "blues": [],
    },
    {
        "name": "Tibetan",
        "tag": "TIBT",
        "hint_top_to_bottom": False,
        "std_chars": ['o'], # XXX
        "base_ranges": [
            (0x0F00, 0x0FFF), # Tibetan
        ],
        "non_base_ranges": [
            (0x0F18, 0x0F19),
            (0x0F35, 0x0F35),
            (0x0F37, 0x0F37),
            (0x0F39, 0x0F39),
            (0x0F3E, 0x0F3F),
            (0x0F71, 0x0F7E),
            (0x0F80, 0x0F84),
            (0x0F86, 0x0F87),
            (0x0F8D, 0x0FBC),
        ],
        "blues": [],
    },
    {
        "name": "CJKV ideographs",
        "tag": "HANI",
        "hint_top_to_bottom": False,
        "std_chars": ['田', '囗'], # 田 囗
        "base_ranges": [
            (0x1100, 0x11FF), # Hangul Jamo
            (0x2E80, 0x2EFF), # CJK Radicals Supplement
            (0x2F00, 0x2FDF), # Kangxi Radicals
            (0x2FF0, 0x2FFF), # Ideographic Description Characters
            (0x3000, 0x303F), # CJK Symbols and Punctuation
            (0x3040, 0x309F), # Hiragana
            (0x30A0, 0x30FF), # Katakana
            (0x3100, 0x312F), # Bopomofo
            (0x3130, 0x318F), # Hangul Compatibility Jamo
            (0x3190, 0x319F), # Kanbun
            (0x31A0, 0x31BF), # Bopomofo Extended
            (0x31C0, 0x31EF), # CJK Strokes
            (0x31F0, 0x31FF), # Katakana Phonetic Extensions
            (0x3300, 0x33FF), # CJK Compatibility
            (0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
            (0x4DC0, 0x4DFF), # Yijing Hexagram Symbols
            (0x4E00, 0x9FFF), # CJK Unified Ideographs
            (0xA960, 0xA97F), # Hangul Jamo Extended-A
            (0xAC00, 0xD7AF), # Hangul Syllables
            (0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
            (0xF900, 0xFAFF), # CJK Compatibility Ideographs
            (0xFE10, 0xFE1F), # Vertical forms
            (0xFE30, 0xFE4F), # CJK Compatibility Forms
            (0xFF00, 0xFFEF), # Halfwidth and Fullwidth Forms
            (0x1B000, 0x1B0FF), # Kana Supplement
            (0x1B100, 0x1B12F), # Kana Extended-A
            (0x1D300, 0x1D35F), # Tai Xuan Hing Symbols
            (0x20000, 0x2A6DF), # CJK Unified Ideographs Extension B
            (0x2A700, 0x2B73F), # CJK Unified Ideographs Extension C
            (0x2B740, 0x2B81F), # CJK Unified Ideographs Extension D
            (0x2B820, 0x2CEAF), # CJK Unified Ideographs Extension E
            (0x2CEB0, 0x2EBEF), # CJK Unified Ideographs Extension F
            (0x2F800, 0x2FA1F), # CJK Compatibility Ideographs Supplement
        ],
        "non_base_ranges": [
            (0x302A, 0x302F),
            (0x3190, 0x319F),
        ],
        "blues": [
            (['他', '们', '你', '來', '們', '到', '和', '地', '对', '對', '就', '席', '我', '时', '時', '會', '来', '為', '能', '舰', '說', '说', '这', '這', '齊', '|', '军', '同', '已', '愿', '既', '星', '是', '景', '民', '照', '现', '現', '理', '用', '置', '要', '軍', '那', '配', '里', '開', '雷', '露', '面', '顾'], "CJK_TOP"),
            (['个', '为', '人', '他', '以', '们', '你', '來', '個', '們', '到', '和', '大', '对', '對', '就', '我', '时', '時', '有', '来', '為', '要', '說', '说', '|', '主', '些', '因', '它', '想', '意', '理', '生', '當', '看', '着', '置', '者', '自', '著', '裡', '过', '还', '进', '進', '過', '道', '還', '里', '面'], "0"),
            (['些', '们', '你', '來', '們', '到', '和', '地', '她', '将', '將', '就', '年', '得', '情', '最', '样', '樣', '理', '能', '說', '说', '这', '這', '通', '|', '即', '吗', '吧', '听', '呢', '品', '响', '嗎', '师', '師', '收', '断', '斷', '明', '眼', '間', '间', '际', '陈', '限', '除', '陳', '随', '際', '隨'], "CJK_HORIZ"),
            (['事', '前', '學', '将', '將', '情', '想', '或', '政', '斯', '新', '样', '樣', '民', '沒', '没', '然', '特', '现', '現', '球', '第', '經', '谁', '起', '|', '例', '別', '别', '制', '动', '動', '吗', '嗎', '增', '指', '明', '朝', '期', '构', '物', '确', '种', '調', '调', '費', '费', '那', '都', '間', '间'], "CJK_HORIZ | CJK_RIGHT"),
        ],
    },
]

def generate() -> str:
    buf = ""
    buf += "// THIS FILE IS AUTOGENERATED.\n"
    buf += "// Any changes to this file will be overwritten.\n"
    buf += "// Use ../scripts/gen_autohint_scripts.py to regenerate.\n\n"

    char_map = {}

    buf += "#[rustfmt::skip]\n"
    buf += "pub(super) const SCRIPT_CLASSES: &[ScriptClass] = &[\n"
    for i, script in enumerate(SCRIPT_CLASSES):
        std_chars = script["std_chars"]
        blues = script["blues"]
        buf += "    ScriptClass {\n"
        buf += "        name: \"{}\",\n".format(script["name"])
        buf += "        tag: Tag::new(b\"{}\"),\n".format(script["tag"])
        buf += "        index: {},\n".format(i)
        buf += "        hint_top_to_bottom: {},\n".format(str(script["hint_top_to_bottom"]).lower())
        # standard characters
        buf += "        std_chars: &["
        if len(std_chars) != 0:
            for std_char in std_chars:
                buf += "'{}', ".format(std_char)
        buf += "],\n"
        # blue characters
        buf += "        blues: &["
        if len(blues) != 0:
            buf += "\n";
            for blue in blues:
                buf += "            (&["
                for ch in blue[0]:
                    buf += "'{}', ".format(ch)
                buf += "], {}),\n".format(blue[1])
            buf += "        ],\n"
        else:
            buf += "],\n"
        buf += "    },\n"

        bases = set()
        # build a char -> (script_ix, is_non_base) map for all ranges
        for char_range in script["base_ranges"]:
            first = char_range[0]
            last = char_range[1]
            # inclusive range
            for ch in range(first, last + 1):
                # Note: FT has overlapping ranges but we choose to keep
                # the first one to match behavior
                if not ch in char_map:
                    char_map[ch] = (i, False)
                    bases.add(ch)
        for char_range in script["non_base_ranges"]:
            first = char_range[0]
            last = char_range[1]
            # inclusive range
            for ch in range(first, last + 1):
                if ch in bases:
                    char_map[ch] = (i, True) # True for non-base character
    buf += "];\n\n"

    # Add some symbolic indices for each script so they can be
    # referenced by ScriptClass::LATN for example
    buf += "impl ScriptClass {\n"
    for i, script in enumerate(SCRIPT_CLASSES):
        buf += "    pub const {}: usize = {};\n".format(script["tag"], i)
    buf += "}\n\n"

    # build a sorted list from the map
    char_list = []
    for ch in char_map:
        char_list.append((ch, char_map[ch]))
    char_list.sort(key=lambda entry: entry[0])

    # and merge into ranges
    ranges = []
    for entry in char_list:
        ch = entry[0]
        props = entry[1]
        if len(ranges) != 0:
            last = ranges[-1];
            # we can merge if same props and this character extends the range
            # by 1
            if ch == last[1] + 1 and last[2] == props:
                ranges[-1] = (last[0], ch, props)
                continue
        ranges.append((ch, ch, props))

    # and finally output the ranges
    buf += "#[rustfmt::skip]\n"
    buf += "pub(super) const SCRIPT_RANGES: &[ScriptRange] = &[\n"
    for char_range in ranges:
        first = char_range[0]
        last = char_range[1]
        props = char_range[2]
        tag = SCRIPT_CLASSES[props[0]]["tag"]
        kind = "base_range"
        if props[1]:
            kind = "non_base_range"
        buf += "    {}({}, {}, ScriptClass::{}),\n".format(kind, first, last, tag)
    buf += "];\n\n"
    return buf

if __name__ == "__main__":
    data = generate()
    with open("../generated/generated_autohint_scripts.rs", "w", encoding="utf-8") as f:
        f.write(data)
chromium/third_party/rust/chromium_crates_io/vendor/skrifa-0.20.0/scripts/gen_autohint_scripts.py