#!/usr/bin/env python
"""Validate compact unwind info by cross checking the llvm-objdump
reports of the input object file vs final linked output.
"""
from __future__ import print_function
import sys
import argparse
import re
from pprint import pprint
def main():
hex = r"[a-f\d]"
hex8 = hex + "{8}"
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"files",
metavar="FILES",
nargs="*",
help="output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output",
)
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
if args.files:
objdump_string = "".join([open(f).read() for f in args.files])
else:
objdump_string = sys.stdin.read()
object_encodings_list = [
(symbol, encoding, personality, lsda)
for symbol, encoding, personality, lsda in re.findall(
r"start:\s+0x%s+\s+(\w+)\s+" % hex
+ r"length:\s+0x%s+\s+" % hex
+ r"compact encoding:\s+0x(%s+)(?:\s+" % hex
+ r"personality function:\s+0x(%s+)\s+\w+\s+" % hex
+ r"LSDA:\s+0x(%s+)\s+\w+(?: \+ 0x%s+)?)?" % (hex, hex),
objdump_string,
re.DOTALL,
)
]
object_encodings_map = {
symbol: encoding for symbol, encoding, _, _ in object_encodings_list
}
if not object_encodings_map:
sys.exit("no object encodings found in input")
# generate-cfi-funcs.py doesn't generate unwind info for _main.
object_encodings_map["_main"] = "00000000"
program_symbols_map = {
address: symbol
for address, symbol in re.findall(
r"^%s(%s) g\s+F __TEXT,__text (x\1|_main)$" % (hex8, hex8),
objdump_string,
re.MULTILINE,
)
}
if not program_symbols_map:
sys.exit("no program symbols found in input")
program_common_encodings = re.findall(
r"^\s+encoding\[(?:\d|\d\d|1[01]\d|12[0-6])\]: 0x(%s+)$" % hex,
objdump_string,
re.MULTILINE,
)
if not program_common_encodings:
sys.exit("no common encodings found in input")
program_encodings_map = {
program_symbols_map[address]: encoding
for address, encoding in re.findall(
r"^\s+\[\d+\]: function offset=0x(%s+), " % hex
+ r"encoding(?:\[\d+\])?=0x(%s+)$" % hex,
objdump_string,
re.MULTILINE,
)
}
if not object_encodings_map:
sys.exit("no program encodings found in input")
# Fold adjacent entries from the object file that have matching encodings
# TODO(gkm) add check for personality+lsda
encoding0 = 0
for symbol in sorted(object_encodings_map):
encoding = object_encodings_map[symbol]
fold = encoding == encoding0
if fold:
del object_encodings_map[symbol]
if args.debug:
print("%s %s with %s" % ("delete" if fold else "retain", symbol, encoding))
encoding0 = encoding
if program_encodings_map != object_encodings_map:
if args.debug:
print("program encodings map:")
pprint(program_encodings_map)
print("object encodings map:")
pprint(object_encodings_map)
sys.exit("encoding maps differ")
# Count frequency of object-file folded encodings
# and compare with the program-file common encodings table
encoding_frequency_map = {}
for _, encoding in object_encodings_map.items():
encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)
encoding_frequencies = [
x
for x in sorted(
encoding_frequency_map,
key=lambda x: (encoding_frequency_map.get(x), x),
reverse=True,
)
]
del encoding_frequencies[127:]
if program_common_encodings != encoding_frequencies:
if args.debug:
pprint("program common encodings:\n" + str(program_common_encodings))
pprint("object encoding frequencies:\n" + str(encoding_frequencies))
sys.exit("encoding frequencies differ")
if __name__ == "__main__":
main()