cpython/Programs/_freeze_module.py

"""Python implementation of Programs/_freeze_module.c

The pure Python implementation uses same functions and arguments as the C
implementation.

The generated byte code is slightly different because
compile() sets the PyCF_SOURCE_IS_UTF8 flag and objects have a
reference count > 1. Marshal adds the `FLAG_REF` flag and creates a
reference `hashtable`.
"""

import marshal
import sys

header = "/* Auto-generated by Programs/_freeze_module.py */"


def read_text(inpath: str) -> bytes:
    with open(inpath, "rb") as f:
        return f.read()


def compile_and_marshal(name: str, text: bytes) -> bytes:
    filename = f"<frozen {name}>"
    # exec == Py_file_input
    code = compile(text, filename, "exec", optimize=0, dont_inherit=True)
    return marshal.dumps(code)


def get_varname(name: str, prefix: str) -> str:
    return f"{prefix}{name.replace('.', '_')}"


def write_code(outfile, marshalled: bytes, varname: str) -> None:
    data_size = len(marshalled)

    outfile.write(f"const unsigned char {varname}[] = {{\n")

    for n in range(0, data_size, 16):
        outfile.write("    ")
        outfile.write(",".join(str(i) for i in marshalled[n : n + 16]))
        outfile.write(",\n")
    outfile.write("};\n")


def write_frozen(outpath: str, inpath: str, name: str, marshalled: bytes) -> None:
    with open(outpath, "w") as outfile:
        outfile.write(header)
        outfile.write("\n")
        arrayname = get_varname(name, "_Py_M__")
        write_code(outfile, marshalled, arrayname)


def main():
    if len(sys.argv) != 4:
        sys.exit("need to specify the name, input and output paths\n")

    name = sys.argv[1]
    inpath = sys.argv[2]
    outpath = sys.argv[3]

    text = read_text(inpath)
    marshalled = compile_and_marshal(name, text)
    write_frozen(outpath, inpath, name, marshalled)


if __name__ == "__main__":
    main()