llvm/lld/test/MachO/tools/validate-unwind-info.py

#!/usr/bin/env python

"""Validate compact unwind info by cross checking the llvm-objdump
reports of the input object file vs final linked output.
"""
from __future__ import print_function
import sys
import argparse
import re
from pprint import pprint


def main():
    hex = r"[a-f\d]"
    hex8 = hex + "{8}"

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "files",
        metavar="FILES",
        nargs="*",
        help="output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output",
    )
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()

    if args.files:
        objdump_string = "".join([open(f).read() for f in args.files])
    else:
        objdump_string = sys.stdin.read()

    object_encodings_list = [
        (symbol, encoding, personality, lsda)
        for symbol, encoding, personality, lsda in re.findall(
            r"start:\s+0x%s+\s+(\w+)\s+" % hex
            + r"length:\s+0x%s+\s+" % hex
            + r"compact encoding:\s+0x(%s+)(?:\s+" % hex
            + r"personality function:\s+0x(%s+)\s+\w+\s+" % hex
            + r"LSDA:\s+0x(%s+)\s+\w+(?: \+ 0x%s+)?)?" % (hex, hex),
            objdump_string,
            re.DOTALL,
        )
    ]
    object_encodings_map = {
        symbol: encoding for symbol, encoding, _, _ in object_encodings_list
    }
    if not object_encodings_map:
        sys.exit("no object encodings found in input")

    # generate-cfi-funcs.py doesn't generate unwind info for _main.
    object_encodings_map["_main"] = "00000000"

    program_symbols_map = {
        address: symbol
        for address, symbol in re.findall(
            r"^%s(%s) g\s+F __TEXT,__text (x\1|_main)$" % (hex8, hex8),
            objdump_string,
            re.MULTILINE,
        )
    }
    if not program_symbols_map:
        sys.exit("no program symbols found in input")

    program_common_encodings = re.findall(
        r"^\s+encoding\[(?:\d|\d\d|1[01]\d|12[0-6])\]: 0x(%s+)$" % hex,
        objdump_string,
        re.MULTILINE,
    )
    if not program_common_encodings:
        sys.exit("no common encodings found in input")

    program_encodings_map = {
        program_symbols_map[address]: encoding
        for address, encoding in re.findall(
            r"^\s+\[\d+\]: function offset=0x(%s+), " % hex
            + r"encoding(?:\[\d+\])?=0x(%s+)$" % hex,
            objdump_string,
            re.MULTILINE,
        )
    }
    if not object_encodings_map:
        sys.exit("no program encodings found in input")

    # Fold adjacent entries from the object file that have matching encodings
    # TODO(gkm) add check for personality+lsda
    encoding0 = 0
    for symbol in sorted(object_encodings_map):
        encoding = object_encodings_map[symbol]
        fold = encoding == encoding0
        if fold:
            del object_encodings_map[symbol]
        if args.debug:
            print("%s %s with %s" % ("delete" if fold else "retain", symbol, encoding))
        encoding0 = encoding

    if program_encodings_map != object_encodings_map:
        if args.debug:
            print("program encodings map:")
            pprint(program_encodings_map)
            print("object encodings map:")
            pprint(object_encodings_map)
        sys.exit("encoding maps differ")

    # Count frequency of object-file folded encodings
    # and compare with the program-file common encodings table
    encoding_frequency_map = {}
    for _, encoding in object_encodings_map.items():
        encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)
    encoding_frequencies = [
        x
        for x in sorted(
            encoding_frequency_map,
            key=lambda x: (encoding_frequency_map.get(x), x),
            reverse=True,
        )
    ]
    del encoding_frequencies[127:]

    if program_common_encodings != encoding_frequencies:
        if args.debug:
            pprint("program common encodings:\n" + str(program_common_encodings))
            pprint("object encoding frequencies:\n" + str(encoding_frequencies))
        sys.exit("encoding frequencies differ")


if __name__ == "__main__":
    main()