chromium/third_party/blink/web_tests/external/wpt/encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js

// set up a sparse array of all unicode codepoints listed in the index
// this will be used for lookup in iso2022jpEncoded
var jis0208CPs = []; // index is unicode cp, value is pointer
for (var p = 0; p < jis0208.length; p++) {
    if (jis0208[p] != null && jis0208CPs[jis0208[p]] == null) {
        jis0208CPs[jis0208[p]] = p;
    }
}

// set up mappings for half/full width katakana
// index is a katakana index pointer, value is Unicode codepoint (dec)
// this is copy-pasted from the json version of the index belonging to the Encoding spec
var iso2022jpkatakana = [
    12290,
    12300,
    12301,
    12289,
    12539,
    12530,
    12449,
    12451,
    12453,
    12455,
    12457,
    12515,
    12517,
    12519,
    12483,
    12540,
    12450,
    12452,
    12454,
    12456,
    12458,
    12459,
    12461,
    12463,
    12465,
    12467,
    12469,
    12471,
    12473,
    12475,
    12477,
    12479,
    12481,
    12484,
    12486,
    12488,
    12490,
    12491,
    12492,
    12493,
    12494,
    12495,
    12498,
    12501,
    12504,
    12507,
    12510,
    12511,
    12512,
    12513,
    12514,
    12516,
    12518,
    12520,
    12521,
    12522,
    12523,
    12524,
    12525,
    12527,
    12531,
    12443,
    12444
];

function chars2cps(chars) {
    // this is needed because of javascript's handling of supplementary characters
    // char: a string of unicode characters
    // returns an array of decimal code point values
    var haut = 0;
    var out = [];
    for (var i = 0; i < chars.length; i++) {
        var b = chars.charCodeAt(i);
        if (b < 0 || b > 0xffff) {
            alert(
                "Error in chars2cps: byte out of range " + b.toString(16) + "!"
            );
        }
        if (haut != 0) {
            if (0xdc00 <= b && b <= 0xdfff) {
                out.push(0x10000 + ((haut - 0xd800) << 10) + (b - 0xdc00));
                haut = 0;
                continue;
            } else {
                alert(
                    "Error in chars2cps: surrogate out of range " +
                        haut.toString(16) +
                        "!"
                );
                haut = 0;
            }
        }
        if (0xd800 <= b && b <= 0xdbff) {
            haut = b;
        } else {
            out.push(b);
        }
    }
    return out;
}

function iso2022jpEncoder(stream) {
    var cps = chars2cps(stream);
    var endofstream = 2000000;
    var out = "";
    var encState = "ascii";
    var finished = false;
    var cp, ptr;

    while (!finished) {
        if (cps.length == 0) cp = endofstream;
        else cp = cps.shift();
        if (cp == endofstream && encState != "ascii") {
            cps.unshift(cp);
            encState = "ascii";
            out += " 1B 28 42";
            continue;
        }
        if (cp == endofstream && encState == "ascii") {
            finished = true;
            continue;
        }
        if (
            (encState === "ascii" || encState === "roman") &&
            (cp === 0x0e || cp === 0x0f || cp === 0x1b)
        ) {
            //out += ' &#'+cp+';'
            // continue
            return null;
        }
        if (encState == "ascii" && cp >= 0x00 && cp <= 0x7f) {
            out += " " + cp.toString(16).toUpperCase();
            continue;
        }
        if (
            encState == "roman" &&
            ((cp >= 0x00 && cp <= 0x7f && cp !== 0x5c && cp !== 0x7e) ||
                cp == 0xa5 ||
                cp == 0x203e)
        ) {
            if (cp >= 0x00 && cp <= 0x7f) {
                // ASCII
                out += " " + cp.toString(16).toUpperCase();
                continue;
            }
            if (cp == 0xa5) {
                out += " 5C";
                continue;
            }
            if (cp == 0x203e) {
                out += " 7E";
                continue;
            }
        }
        if (encState != "ascii" && cp >= 0x00 && cp <= 0x7f) {
            cps.unshift(cp);
            encState = "ascii";
            out += " 1B 28 42";
            continue;
        }
        if ((cp == 0xa5 || cp == 0x203e) && encState != "roman") {
            cps.unshift(cp);
            encState = "roman";
            out += " 1B 28 4A";
            continue;
        }
        if (cp == 0x2212) cp = 0xff0d;
        if (cp >= 0xff61 && cp <= 0xff9f) {
            cp = iso2022jpkatakana[cp - 0xff61];
        }
        ptr = jis0208CPs[cp];
        if (ptr == null) {
            //out += ' &#'+cp+';'
            //continue
            return null;
        }
        if (encState != "jis0208") {
            cps.unshift(cp);
            encState = "jis0208";
            out += " 1B 24 42";
            continue;
        }
        var lead = Math.floor(ptr / 94) + 0x21;
        var trail = ptr % 94 + 0x21;
        out +=
            " " +
            lead.toString(16).toUpperCase() +
            " " +
            trail.toString(16).toUpperCase();
    }
    return out.trim();
}

function convertToHex(str) {
    // converts a string of ASCII characters to hex byte codes
    var out = "";
    var result;
    for (var c = 0; c < str.length; c++) {
        result =
            str
                .charCodeAt(c)
                .toString(16)
                .toUpperCase() + " ";
        out += result;
    }
    return out;
}

function normalizeStr(str) {
    var out = "";
    for (var c = 0; c < str.length; c++) {
        if (
            str.charAt(c) == "%" &&
            str.charAt(c + 1) != "%" &&
            str.charAt(c + 2) != "%"
        ) {
            out += String.fromCodePoint(
                parseInt(str.charAt(c + 1) + str.charAt(c + 2), 16)
            );
            c += 2;
        } else out += str.charAt(c);
    }
    var result = "";
    for (var o = 0; o < out.length; o++) {
        result +=
            "%" +
            out
                .charCodeAt(o)
                .toString(16)
                .toUpperCase();
    }
    return result.replace(/%1B%28%42$/, "");
}