chromium/third_party/mediapipe/src/mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/runetypebody.h

/* Copyright 2023 The MediaPipe Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifdef RUNETYPEBODY

static Rune __isalphar[] = {
    0x0041,  0x005a,  0x0061,  0x007a,  0x00c0,  0x00d6,  0x00d8,  0x00f6,
    0x00f8,  0x02c1,  0x02c6,  0x02d1,  0x02e0,  0x02e4,  0x0370,  0x0374,
    0x0376,  0x0377,  0x037a,  0x037d,  0x0388,  0x038a,  0x038e,  0x03a1,
    0x03a3,  0x03f5,  0x03f7,  0x0481,  0x048a,  0x0527,  0x0531,  0x0556,
    0x0561,  0x0587,  0x05d0,  0x05ea,  0x05f0,  0x05f2,  0x0620,  0x064a,
    0x066e,  0x066f,  0x0671,  0x06d3,  0x06e5,  0x06e6,  0x06ee,  0x06ef,
    0x06fa,  0x06fc,  0x0712,  0x072f,  0x074d,  0x07a5,  0x07ca,  0x07ea,
    0x07f4,  0x07f5,  0x0800,  0x0815,  0x0840,  0x0858,  0x08a2,  0x08ac,
    0x0904,  0x0939,  0x0958,  0x0961,  0x0971,  0x0977,  0x0979,  0x097f,
    0x0985,  0x098c,  0x098f,  0x0990,  0x0993,  0x09a8,  0x09aa,  0x09b0,
    0x09b6,  0x09b9,  0x09dc,  0x09dd,  0x09df,  0x09e1,  0x09f0,  0x09f1,
    0x0a05,  0x0a0a,  0x0a0f,  0x0a10,  0x0a13,  0x0a28,  0x0a2a,  0x0a30,
    0x0a32,  0x0a33,  0x0a35,  0x0a36,  0x0a38,  0x0a39,  0x0a59,  0x0a5c,
    0x0a72,  0x0a74,  0x0a85,  0x0a8d,  0x0a8f,  0x0a91,  0x0a93,  0x0aa8,
    0x0aaa,  0x0ab0,  0x0ab2,  0x0ab3,  0x0ab5,  0x0ab9,  0x0ae0,  0x0ae1,
    0x0b05,  0x0b0c,  0x0b0f,  0x0b10,  0x0b13,  0x0b28,  0x0b2a,  0x0b30,
    0x0b32,  0x0b33,  0x0b35,  0x0b39,  0x0b5c,  0x0b5d,  0x0b5f,  0x0b61,
    0x0b85,  0x0b8a,  0x0b8e,  0x0b90,  0x0b92,  0x0b95,  0x0b99,  0x0b9a,
    0x0b9e,  0x0b9f,  0x0ba3,  0x0ba4,  0x0ba8,  0x0baa,  0x0bae,  0x0bb9,
    0x0c05,  0x0c0c,  0x0c0e,  0x0c10,  0x0c12,  0x0c28,  0x0c2a,  0x0c33,
    0x0c35,  0x0c39,  0x0c58,  0x0c59,  0x0c60,  0x0c61,  0x0c85,  0x0c8c,
    0x0c8e,  0x0c90,  0x0c92,  0x0ca8,  0x0caa,  0x0cb3,  0x0cb5,  0x0cb9,
    0x0ce0,  0x0ce1,  0x0cf1,  0x0cf2,  0x0d05,  0x0d0c,  0x0d0e,  0x0d10,
    0x0d12,  0x0d3a,  0x0d60,  0x0d61,  0x0d7a,  0x0d7f,  0x0d85,  0x0d96,
    0x0d9a,  0x0db1,  0x0db3,  0x0dbb,  0x0dc0,  0x0dc6,  0x0e01,  0x0e30,
    0x0e32,  0x0e33,  0x0e40,  0x0e46,  0x0e81,  0x0e82,  0x0e87,  0x0e88,
    0x0e94,  0x0e97,  0x0e99,  0x0e9f,  0x0ea1,  0x0ea3,  0x0eaa,  0x0eab,
    0x0ead,  0x0eb0,  0x0eb2,  0x0eb3,  0x0ec0,  0x0ec4,  0x0edc,  0x0edf,
    0x0f40,  0x0f47,  0x0f49,  0x0f6c,  0x0f88,  0x0f8c,  0x1000,  0x102a,
    0x1050,  0x1055,  0x105a,  0x105d,  0x1065,  0x1066,  0x106e,  0x1070,
    0x1075,  0x1081,  0x10a0,  0x10c5,  0x10d0,  0x10fa,  0x10fc,  0x1248,
    0x124a,  0x124d,  0x1250,  0x1256,  0x125a,  0x125d,  0x1260,  0x1288,
    0x128a,  0x128d,  0x1290,  0x12b0,  0x12b2,  0x12b5,  0x12b8,  0x12be,
    0x12c2,  0x12c5,  0x12c8,  0x12d6,  0x12d8,  0x1310,  0x1312,  0x1315,
    0x1318,  0x135a,  0x1380,  0x138f,  0x13a0,  0x13f4,  0x1401,  0x166c,
    0x166f,  0x167f,  0x1681,  0x169a,  0x16a0,  0x16ea,  0x1700,  0x170c,
    0x170e,  0x1711,  0x1720,  0x1731,  0x1740,  0x1751,  0x1760,  0x176c,
    0x176e,  0x1770,  0x1780,  0x17b3,  0x1820,  0x1877,  0x1880,  0x18a8,
    0x18b0,  0x18f5,  0x1900,  0x191c,  0x1950,  0x196d,  0x1970,  0x1974,
    0x1980,  0x19ab,  0x19c1,  0x19c7,  0x1a00,  0x1a16,  0x1a20,  0x1a54,
    0x1b05,  0x1b33,  0x1b45,  0x1b4b,  0x1b83,  0x1ba0,  0x1bae,  0x1baf,
    0x1bba,  0x1be5,  0x1c00,  0x1c23,  0x1c4d,  0x1c4f,  0x1c5a,  0x1c7d,
    0x1ce9,  0x1cec,  0x1cee,  0x1cf1,  0x1cf5,  0x1cf6,  0x1d00,  0x1dbf,
    0x1e00,  0x1f15,  0x1f18,  0x1f1d,  0x1f20,  0x1f45,  0x1f48,  0x1f4d,
    0x1f50,  0x1f57,  0x1f5f,  0x1f7d,  0x1f80,  0x1fb4,  0x1fb6,  0x1fbc,
    0x1fc2,  0x1fc4,  0x1fc6,  0x1fcc,  0x1fd0,  0x1fd3,  0x1fd6,  0x1fdb,
    0x1fe0,  0x1fec,  0x1ff2,  0x1ff4,  0x1ff6,  0x1ffc,  0x2090,  0x209c,
    0x210a,  0x2113,  0x2119,  0x211d,  0x212a,  0x212d,  0x212f,  0x2139,
    0x213c,  0x213f,  0x2145,  0x2149,  0x2183,  0x2184,  0x2c00,  0x2c2e,
    0x2c30,  0x2c5e,  0x2c60,  0x2ce4,  0x2ceb,  0x2cee,  0x2cf2,  0x2cf3,
    0x2d00,  0x2d25,  0x2d30,  0x2d67,  0x2d80,  0x2d96,  0x2da0,  0x2da6,
    0x2da8,  0x2dae,  0x2db0,  0x2db6,  0x2db8,  0x2dbe,  0x2dc0,  0x2dc6,
    0x2dc8,  0x2dce,  0x2dd0,  0x2dd6,  0x2dd8,  0x2dde,  0x3005,  0x3006,
    0x3031,  0x3035,  0x303b,  0x303c,  0x3041,  0x3096,  0x309d,  0x309f,
    0x30a1,  0x30fa,  0x30fc,  0x30ff,  0x3105,  0x312d,  0x3131,  0x318e,
    0x31a0,  0x31ba,  0x31f0,  0x31ff,  0x3400,  0x4db5,  0x4e00,  0x9fcc,
    0xa000,  0xa48c,  0xa4d0,  0xa4fd,  0xa500,  0xa60c,  0xa610,  0xa61f,
    0xa62a,  0xa62b,  0xa640,  0xa66e,  0xa67f,  0xa697,  0xa6a0,  0xa6e5,
    0xa717,  0xa71f,  0xa722,  0xa788,  0xa78b,  0xa78e,  0xa790,  0xa793,
    0xa7a0,  0xa7aa,  0xa7f8,  0xa801,  0xa803,  0xa805,  0xa807,  0xa80a,
    0xa80c,  0xa822,  0xa840,  0xa873,  0xa882,  0xa8b3,  0xa8f2,  0xa8f7,
    0xa90a,  0xa925,  0xa930,  0xa946,  0xa960,  0xa97c,  0xa984,  0xa9b2,
    0xaa00,  0xaa28,  0xaa40,  0xaa42,  0xaa44,  0xaa4b,  0xaa60,  0xaa76,
    0xaa80,  0xaaaf,  0xaab5,  0xaab6,  0xaab9,  0xaabd,  0xaadb,  0xaadd,
    0xaae0,  0xaaea,  0xaaf2,  0xaaf4,  0xab01,  0xab06,  0xab09,  0xab0e,
    0xab11,  0xab16,  0xab20,  0xab26,  0xab28,  0xab2e,  0xabc0,  0xabe2,
    0xac00,  0xd7a3,  0xd7b0,  0xd7c6,  0xd7cb,  0xd7fb,  0xf900,  0xfa6d,
    0xfa70,  0xfad9,  0xfb00,  0xfb06,  0xfb13,  0xfb17,  0xfb1f,  0xfb28,
    0xfb2a,  0xfb36,  0xfb38,  0xfb3c,  0xfb40,  0xfb41,  0xfb43,  0xfb44,
    0xfb46,  0xfbb1,  0xfbd3,  0xfd3d,  0xfd50,  0xfd8f,  0xfd92,  0xfdc7,
    0xfdf0,  0xfdfb,  0xfe70,  0xfe74,  0xfe76,  0xfefc,  0xff21,  0xff3a,
    0xff41,  0xff5a,  0xff66,  0xffbe,  0xffc2,  0xffc7,  0xffca,  0xffcf,
    0xffd2,  0xffd7,  0xffda,  0xffdc,  0x10000, 0x1000b, 0x1000d, 0x10026,
    0x10028, 0x1003a, 0x1003c, 0x1003d, 0x1003f, 0x1004d, 0x10050, 0x1005d,
    0x10080, 0x100fa, 0x10280, 0x1029c, 0x102a0, 0x102d0, 0x10300, 0x1031e,
    0x10330, 0x10340, 0x10342, 0x10349, 0x10380, 0x1039d, 0x103a0, 0x103c3,
    0x103c8, 0x103cf, 0x10400, 0x1049d, 0x10800, 0x10805, 0x1080a, 0x10835,
    0x10837, 0x10838, 0x1083f, 0x10855, 0x10900, 0x10915, 0x10920, 0x10939,
    0x10980, 0x109b7, 0x109be, 0x109bf, 0x10a10, 0x10a13, 0x10a15, 0x10a17,
    0x10a19, 0x10a33, 0x10a60, 0x10a7c, 0x10b00, 0x10b35, 0x10b40, 0x10b55,
    0x10b60, 0x10b72, 0x10c00, 0x10c48, 0x11003, 0x11037, 0x11083, 0x110af,
    0x110d0, 0x110e8, 0x11103, 0x11126, 0x11183, 0x111b2, 0x111c1, 0x111c4,
    0x11680, 0x116aa, 0x12000, 0x1236e, 0x13000, 0x1342e, 0x16800, 0x16a38,
    0x16f00, 0x16f44, 0x16f93, 0x16f9f, 0x1b000, 0x1b001, 0x1d400, 0x1d454,
    0x1d456, 0x1d49c, 0x1d49e, 0x1d49f, 0x1d4a5, 0x1d4a6, 0x1d4a9, 0x1d4ac,
    0x1d4ae, 0x1d4b9, 0x1d4bd, 0x1d4c3, 0x1d4c5, 0x1d505, 0x1d507, 0x1d50a,
    0x1d50d, 0x1d514, 0x1d516, 0x1d51c, 0x1d51e, 0x1d539, 0x1d53b, 0x1d53e,
    0x1d540, 0x1d544, 0x1d54a, 0x1d550, 0x1d552, 0x1d6a5, 0x1d6a8, 0x1d6c0,
    0x1d6c2, 0x1d6da, 0x1d6dc, 0x1d6fa, 0x1d6fc, 0x1d714, 0x1d716, 0x1d734,
    0x1d736, 0x1d74e, 0x1d750, 0x1d76e, 0x1d770, 0x1d788, 0x1d78a, 0x1d7a8,
    0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1ee00, 0x1ee03, 0x1ee05, 0x1ee1f,
    0x1ee21, 0x1ee22, 0x1ee29, 0x1ee32, 0x1ee34, 0x1ee37, 0x1ee4d, 0x1ee4f,
    0x1ee51, 0x1ee52, 0x1ee61, 0x1ee62, 0x1ee67, 0x1ee6a, 0x1ee6c, 0x1ee72,
    0x1ee74, 0x1ee77, 0x1ee79, 0x1ee7c, 0x1ee80, 0x1ee89, 0x1ee8b, 0x1ee9b,
    0x1eea1, 0x1eea3, 0x1eea5, 0x1eea9, 0x1eeab, 0x1eebb, 0x20000, 0x2a6d6,
    0x2a700, 0x2b734, 0x2b740, 0x2b81d, 0x2f800, 0x2fa1d,
};

static Rune __isalphas[] = {
    0x00aa,  0x00b5,  0x00ba,  0x02ec,  0x02ee,  0x0386,  0x038c,  0x0559,
    0x06d5,  0x06ff,  0x0710,  0x07b1,  0x07fa,  0x081a,  0x0824,  0x0828,
    0x08a0,  0x093d,  0x0950,  0x09b2,  0x09bd,  0x09ce,  0x0a5e,  0x0abd,
    0x0ad0,  0x0b3d,  0x0b71,  0x0b83,  0x0b9c,  0x0bd0,  0x0c3d,  0x0cbd,
    0x0cde,  0x0d3d,  0x0d4e,  0x0dbd,  0x0e84,  0x0e8a,  0x0e8d,  0x0ea5,
    0x0ea7,  0x0ebd,  0x0ec6,  0x0f00,  0x103f,  0x1061,  0x108e,  0x10c7,
    0x10cd,  0x1258,  0x12c0,  0x17d7,  0x17dc,  0x18aa,  0x1aa7,  0x1f59,
    0x1f5b,  0x1f5d,  0x1fbe,  0x2071,  0x207f,  0x2102,  0x2107,  0x2115,
    0x2124,  0x2126,  0x2128,  0x214e,  0x2d27,  0x2d2d,  0x2d6f,  0x2e2f,
    0xa8fb,  0xa9cf,  0xaa7a,  0xaab1,  0xaac0,  0xaac2,  0xfb1d,  0xfb3e,
    0x10808, 0x1083c, 0x10a00, 0x16f50, 0x1d4a2, 0x1d4bb, 0x1d546, 0x1ee24,
    0x1ee27, 0x1ee39, 0x1ee3b, 0x1ee42, 0x1ee47, 0x1ee49, 0x1ee4b, 0x1ee54,
    0x1ee57, 0x1ee59, 0x1ee5b, 0x1ee5d, 0x1ee5f, 0x1ee64, 0x1ee7e,
};

int utf_isalpharune(Rune c) {
  Rune *p;

  p = rbsearch(c, __isalphar, nelem(__isalphar) / 2, 2);
  if (p && c >= p[0] && c <= p[1]) return 1;
  p = rbsearch(c, __isalphas, nelem(__isalphas), 1);
  if (p && c == p[0]) return 1;
  return 0;
}

static Rune __tolowerr[] = {
    0x0041, 0x005a, 1048608, 0x00c0,  0x00d6,  1048608, 0x00d8, 0x00de, 1048608,
    0x0189, 0x018a, 1048781, 0x01b1,  0x01b2,  1048793, 0x0388, 0x038a, 1048613,
    0x038e, 0x038f, 1048639, 0x0391,  0x03a1,  1048608, 0x03a3, 0x03ab, 1048608,
    0x03fd, 0x03ff, 1048446, 0x0400,  0x040f,  1048656, 0x0410, 0x042f, 1048608,
    0x0531, 0x0556, 1048624, 0x10a0,  0x10c5,  1055840, 0x1f08, 0x1f0f, 1048568,
    0x1f18, 0x1f1d, 1048568, 0x1f28,  0x1f2f,  1048568, 0x1f38, 0x1f3f, 1048568,
    0x1f48, 0x1f4d, 1048568, 0x1f68,  0x1f6f,  1048568, 0x1f88, 0x1f8f, 1048568,
    0x1f98, 0x1f9f, 1048568, 0x1fa8,  0x1faf,  1048568, 0x1fb8, 0x1fb9, 1048568,
    0x1fba, 0x1fbb, 1048502, 0x1fc8,  0x1fcb,  1048490, 0x1fd8, 0x1fd9, 1048568,
    0x1fda, 0x1fdb, 1048476, 0x1fe8,  0x1fe9,  1048568, 0x1fea, 0x1feb, 1048464,
    0x1ff8, 0x1ff9, 1048448, 0x1ffa,  0x1ffb,  1048450, 0x2160, 0x216f, 1048592,
    0x24b6, 0x24cf, 1048602, 0x2c00,  0x2c2e,  1048624, 0x2c7e, 0x2c7f, 1037761,
    0xff21, 0xff3a, 1048608, 0x10400, 0x10427, 1048616,
};

static Rune __tolowerp[] = {
    0x0100, 0x012e, 1048577, 0x0132, 0x0136, 1048577, 0x0139, 0x0147, 1048577,
    0x014a, 0x0176, 1048577, 0x017b, 0x017d, 1048577, 0x01a2, 0x01a4, 1048577,
    0x01b3, 0x01b5, 1048577, 0x01cd, 0x01db, 1048577, 0x01de, 0x01ee, 1048577,
    0x01f8, 0x021e, 1048577, 0x0222, 0x0232, 1048577, 0x0248, 0x024e, 1048577,
    0x0370, 0x0372, 1048577, 0x03d8, 0x03ee, 1048577, 0x0460, 0x0480, 1048577,
    0x048a, 0x04be, 1048577, 0x04c3, 0x04cd, 1048577, 0x04d0, 0x0526, 1048577,
    0x1e00, 0x1e94, 1048577, 0x1ea0, 0x1efe, 1048577, 0x1f59, 0x1f5f, 1048568,
    0x2c67, 0x2c6b, 1048577, 0x2c80, 0x2ce2, 1048577, 0x2ceb, 0x2ced, 1048577,
    0xa640, 0xa66c, 1048577, 0xa680, 0xa696, 1048577, 0xa722, 0xa72e, 1048577,
    0xa732, 0xa76e, 1048577, 0xa779, 0xa77b, 1048577, 0xa780, 0xa786, 1048577,
    0xa790, 0xa792, 1048577, 0xa7a0, 0xa7a8, 1048577,
};

static Rune __tolowers[] = {
    0x0130, 1048377, 0x0178, 1048455, 0x0179, 1048577, 0x0181, 1048786,
    0x0182, 1048577, 0x0184, 1048577, 0x0186, 1048782, 0x0187, 1048577,
    0x018b, 1048577, 0x018e, 1048655, 0x018f, 1048778, 0x0190, 1048779,
    0x0191, 1048577, 0x0193, 1048781, 0x0194, 1048783, 0x0196, 1048787,
    0x0197, 1048785, 0x0198, 1048577, 0x019c, 1048787, 0x019d, 1048789,
    0x019f, 1048790, 0x01a0, 1048577, 0x01a6, 1048794, 0x01a7, 1048577,
    0x01a9, 1048794, 0x01ac, 1048577, 0x01ae, 1048794, 0x01af, 1048577,
    0x01b7, 1048795, 0x01b8, 1048577, 0x01bc, 1048577, 0x01c4, 1048578,
    0x01c5, 1048577, 0x01c7, 1048578, 0x01c8, 1048577, 0x01ca, 1048578,
    0x01cb, 1048577, 0x01f1, 1048578, 0x01f2, 1048577, 0x01f4, 1048577,
    0x01f6, 1048479, 0x01f7, 1048520, 0x0220, 1048446, 0x023a, 1059371,
    0x023b, 1048577, 0x023d, 1048413, 0x023e, 1059368, 0x0241, 1048577,
    0x0243, 1048381, 0x0244, 1048645, 0x0245, 1048647, 0x0246, 1048577,
    0x0376, 1048577, 0x0386, 1048614, 0x038c, 1048640, 0x03cf, 1048584,
    0x03f4, 1048516, 0x03f7, 1048577, 0x03f9, 1048569, 0x03fa, 1048577,
    0x04c0, 1048591, 0x04c1, 1048577, 0x10c7, 1055840, 0x10cd, 1055840,
    0x1e9e, 1040961, 0x1fbc, 1048567, 0x1fcc, 1048567, 0x1fec, 1048569,
    0x1ffc, 1048567, 0x2126, 1041059, 0x212a, 1040193, 0x212b, 1040314,
    0x2132, 1048604, 0x2183, 1048577, 0x2c60, 1048577, 0x2c62, 1037833,
    0x2c63, 1044762, 0x2c64, 1037849, 0x2c6d, 1037796, 0x2c6e, 1037827,
    0x2c6f, 1037793, 0x2c70, 1037794, 0x2c72, 1048577, 0x2c75, 1048577,
    0x2cf2, 1048577, 0xa77d, 1013244, 0xa77e, 1048577, 0xa78b, 1048577,
    0xa78d, 1006296, 0xa7aa, 1006268,
};

Rune utf_tolowerrune(Rune c) {
  Rune *p;

  p = rbsearch(c, __tolowerr, nelem(__tolowerr) / 3, 3);
  if (p && c >= p[0] && c <= p[1]) return c + p[2] - 1048576;
  p = rbsearch(c, __tolowerp, nelem(__tolowerp) / 3, 3);
  if (p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
    return c + p[2] - 1048576;
  p = rbsearch(c, __tolowers, nelem(__tolowers) / 2, 2);
  if (p && c == p[0]) return c + p[1] - 1048576;
  return c;
}

#endif