#!/usr/bin/env python3
# Generates a version script for an architecture so that it can be incorporated
# into gcc_s.ver.
from collections import defaultdict
from itertools import chain
import argparse, subprocess, sys, os
def split_suffix(symbol):
"""
Splits a symbol such as `__gttf2@GCC_3.0` into a triple representing its
function name (__gttf2), version name (GCC_3.0), and version number (300).
The version number acts as a priority. Since earlier versions are more
accessible and are likely to be used more, the lower the number is, the higher
its priortiy. A symbol that has a '@@' instead of '@' has been designated by
the linker as the default symbol, and is awarded a priority of -1.
"""
if "@" not in symbol:
return None
data = [i for i in filter(lambda s: s, symbol.split("@"))]
_, version = data[-1].split("_")
version = version.replace(".", "")
priority = -1 if "@@" in symbol else int(version + "0" * (3 - len(version)))
return data[0], data[1], priority
def invert_mapping(symbol_map):
"""Transforms a map from Key->Value to Value->Key."""
store = defaultdict(list)
for symbol, (version, _) in symbol_map.items():
store[version].append(symbol)
result = []
for k, v in store.items():
v.sort()
result.append((k, v))
result.sort(key=lambda x: x[0])
return result
def intersection(llvm, gcc):
"""
Finds the intersection between the symbols extracted from compiler-rt.a/libunwind.a
and libgcc_s.so.1.
"""
common_symbols = {}
for i in gcc:
suffix_triple = split_suffix(i)
if not suffix_triple:
continue
symbol, version_name, version_number = suffix_triple
if symbol in llvm:
if symbol not in common_symbols:
common_symbols[symbol] = (version_name, version_number)
continue
if version_number < common_symbols[symbol][1]:
common_symbols[symbol] = (version_name, version_number)
return invert_mapping(common_symbols)
def find_function_names(path):
"""
Runs readelf on a binary and reduces to only defined functions. Equivalent to
`llvm-readelf --wide ${path} | grep 'FUNC' | grep -v 'UND' | awk '{print $8}'`.
"""
result = subprocess.run(args=["llvm-readelf", "-su", path], capture_output=True)
if result.returncode != 0:
print(result.stderr.decode("utf-8"), file=sys.stderr)
sys.exit(1)
stdout = result.stdout.decode("utf-8")
stdout = filter(lambda x: "FUNC" in x and "UND" not in x, stdout.split("\n"))
stdout = chain(map(lambda x: filter(None, x), (i.split(" ") for i in stdout)))
return [list(i)[7] for i in stdout]
def to_file(versioned_symbols):
path = f"{os.path.dirname(os.path.realpath(__file__))}/new-gcc_s-symbols"
with open(path, "w") as f:
f.write(
"Do not check this version script in: you should instead work "
"out which symbols are missing in `lib/gcc_s.ver` and then "
"integrate them into `lib/gcc_s.ver`. For more information, "
"please see `doc/LLVMLibgcc.rst`.\n"
)
for version, symbols in versioned_symbols:
f.write(f"{version} {{\n")
for i in symbols:
f.write(f" {i};\n")
f.write("};\n\n")
def read_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--compiler_rt",
type=str,
help="Path to `libclang_rt.builtins-${ARCH}.a`.",
required=True,
)
parser.add_argument(
"--libunwind", type=str, help="Path to `libunwind.a`.", required=True
)
parser.add_argument(
"--libgcc_s",
type=str,
help="Path to `libgcc_s.so.1`. Note that unlike the other two arguments, this is a dynamic library.",
required=True,
)
return parser.parse_args()
def main():
args = read_args()
llvm = find_function_names(args.compiler_rt) + find_function_names(args.libunwind)
gcc = find_function_names(args.libgcc_s)
versioned_symbols = intersection(llvm, gcc)
# TODO(cjdb): work out a way to integrate new symbols in with the existing
# ones
to_file(versioned_symbols)
if __name__ == "__main__":
main()