# ===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===------------------------------------------------------------------------===#
from __future__ import absolute_import, division, print_function
import sys
import os
import subprocess
import argparse
import time
import bisect
import shlex
import tempfile
test_env = {"PATH": os.environ["PATH"]}
def findFilesWithExtension(path, extension):
filenames = []
for root, dirs, files in os.walk(path):
for filename in files:
if filename.endswith(f".{extension}"):
filenames.append(os.path.join(root, filename))
return filenames
def clean(args):
if len(args) < 2:
print(
"Usage: %s clean <paths> <extension>\n" % __file__
+ "\tRemoves all files with extension from <path>."
)
return 1
for path in args[1:-1]:
for filename in findFilesWithExtension(path, args[-1]):
os.remove(filename)
return 0
def merge(args):
if len(args) < 3:
print(
"Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
+ "\tMerges all profraw files from path into output."
)
return 1
cmd = [args[0], "merge", "-o", args[1]]
for path in args[2:]:
cmd.extend(findFilesWithExtension(path, "profraw"))
subprocess.check_call(cmd)
return 0
def merge_fdata(args):
if len(args) != 3:
print(
"Usage: %s merge-fdata <merge-fdata> <output> <path>\n" % __file__
+ "\tMerges all fdata files from path into output."
)
return 1
cmd = [args[0], "-o", args[1]]
cmd.extend(findFilesWithExtension(args[2], "fdata"))
subprocess.check_call(cmd)
return 0
def perf(args):
parser = argparse.ArgumentParser(
prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
)
parser.add_argument(
"--lbr", action="store_true", help="Use perf with branch stacks"
)
parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")
opts = parser.parse_args(args)
cmd = opts.cmd[1:]
perf_args = [
"perf",
"record",
"--event=cycles:u",
"--freq=max",
"--output=%d.perf.data" % os.getpid(),
]
if opts.lbr:
perf_args += ["--branch-filter=any,u"]
perf_args.extend(cmd)
start_time = time.time()
subprocess.check_call(perf_args)
elapsed = time.time() - start_time
print("... data collection took %.4fs" % elapsed)
return 0
def perf2bolt(args):
parser = argparse.ArgumentParser(
prog="perf-helper perf2bolt",
description="perf2bolt conversion wrapper for perf.data files",
)
parser.add_argument("bolt", help="Path to llvm-bolt")
parser.add_argument("path", help="Path containing perf.data files")
parser.add_argument("binary", help="Input binary")
parser.add_argument("--lbr", action="store_true", help="Use LBR perf2bolt mode")
opts = parser.parse_args(args)
p2b_args = [
opts.bolt,
opts.binary,
"--aggregate-only",
"--profile-format=yaml",
]
if not opts.lbr:
p2b_args += ["-nl"]
p2b_args += ["-p"]
for filename in findFilesWithExtension(opts.path, "perf.data"):
subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
return 0
def dtrace(args):
parser = argparse.ArgumentParser(
prog="perf-helper dtrace",
description="dtrace wrapper for order file generation",
)
parser.add_argument(
"--buffer-size",
metavar="size",
type=int,
required=False,
default=1,
help="dtrace buffer size in MB (default 1)",
)
parser.add_argument(
"--use-oneshot",
required=False,
action="store_true",
help="Use dtrace's oneshot probes",
)
parser.add_argument(
"--use-ustack",
required=False,
action="store_true",
help="Use dtrace's ustack to print function names",
)
parser.add_argument(
"--cc1",
required=False,
action="store_true",
help="Execute cc1 directly (don't profile the driver)",
)
parser.add_argument("cmd", nargs="*", help="")
# Use python's arg parser to handle all leading option arguments, but pass
# everything else through to dtrace
first_cmd = next(arg for arg in args if not arg.startswith("--"))
last_arg_idx = args.index(first_cmd)
opts = parser.parse_args(args[:last_arg_idx])
cmd = args[last_arg_idx:]
if opts.cc1:
cmd = get_cc1_command_for_args(cmd, test_env)
if opts.use_oneshot:
target = "oneshot$target:::entry"
else:
target = "pid$target:::entry"
predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0]))
log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
if opts.use_ustack:
action = "ustack(1);"
else:
action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)
dtrace_args = []
if not os.geteuid() == 0:
print(
"Script must be run as root, or you must add the following to your sudoers:"
+ "%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace"
)
dtrace_args.append("sudo")
dtrace_args.extend(
(
"dtrace",
"-xevaltime=exec",
"-xbufsize=%dm" % (opts.buffer_size),
"-q",
"-n",
dtrace_script,
"-c",
" ".join(cmd),
)
)
if sys.platform == "darwin":
dtrace_args.append("-xmangled")
start_time = time.time()
with open("%d.dtrace" % os.getpid(), "w") as f:
f.write("### Command: %s" % dtrace_args)
subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)
elapsed = time.time() - start_time
print("... data collection took %.4fs" % elapsed)
return 0
def get_cc1_command_for_args(cmd, env):
# Find the cc1 command used by the compiler. To do this we execute the
# compiler with '-###' to figure out what it wants to do.
cmd = cmd + ["-###"]
cc_output = subprocess.check_output(
cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True
).strip()
cc_commands = []
for ln in cc_output.split("\n"):
# Filter out known garbage.
if (
ln == "Using built-in specs."
or ln.startswith("Configured with:")
or ln.startswith("Target:")
or ln.startswith("Thread model:")
or ln.startswith("InstalledDir:")
or ln.startswith("LLVM Profile Note")
or ln.startswith(" (in-process)")
or " version " in ln
):
continue
cc_commands.append(ln)
if len(cc_commands) != 1:
print("Fatal error: unable to determine cc1 command: %r" % cc_output)
exit(1)
cc1_cmd = shlex.split(cc_commands[0])
if not cc1_cmd:
print("Fatal error: unable to determine cc1 command: %r" % cc_output)
exit(1)
return cc1_cmd
def cc1(args):
parser = argparse.ArgumentParser(
prog="perf-helper cc1", description="cc1 wrapper for order file generation"
)
parser.add_argument("cmd", nargs="*", help="")
# Use python's arg parser to handle all leading option arguments, but pass
# everything else through to dtrace
first_cmd = next(arg for arg in args if not arg.startswith("--"))
last_arg_idx = args.index(first_cmd)
opts = parser.parse_args(args[:last_arg_idx])
cmd = args[last_arg_idx:]
# clear the profile file env, so that we don't generate profdata
# when capturing the cc1 command
cc1_env = test_env
cc1_env["LLVM_PROFILE_FILE"] = os.devnull
cc1_cmd = get_cc1_command_for_args(cmd, cc1_env)
subprocess.check_call(cc1_cmd)
return 0
def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, missing_symbols, opts):
def fix_mangling(symbol):
if sys.platform == "darwin":
if symbol[0] != "_" and symbol != "start":
symbol = "_" + symbol
return symbol
def get_symbols_with_prefix(symbol):
start_index = bisect.bisect_left(all_symbols, symbol)
for s in all_symbols[start_index:]:
if not s.startswith(symbol):
break
yield s
# Extract the list of symbols from the given file, which is assumed to be
# the output of a dtrace run logging either probefunc or ustack(1) and
# nothing else. The dtrace -xdemangle option needs to be used.
#
# This is particular to OS X at the moment, because of the '_' handling.
with open(path) as f:
current_timestamp = None
for ln in f:
# Drop leading and trailing whitespace.
ln = ln.strip()
if not ln.startswith("dtrace-"):
continue
# If this is a timestamp specifier, extract it.
if ln.startswith("dtrace-TS: "):
_, data = ln.split(": ", 1)
if not data.isdigit():
print(
"warning: unrecognized timestamp line %r, ignoring" % ln,
file=sys.stderr,
)
continue
current_timestamp = int(data)
continue
elif ln.startswith("dtrace-Symbol: "):
_, ln = ln.split(": ", 1)
if not ln:
continue
# If there is a '`' in the line, assume it is a ustack(1) entry in
# the form of <modulename>`<modulefunc>, where <modulefunc> is never
# truncated (but does need the mangling patched).
if "`" in ln:
yield (current_timestamp, fix_mangling(ln.split("`", 1)[1]))
continue
# Otherwise, assume this is a probefunc printout. DTrace on OS X
# seems to have a bug where it prints the mangled version of symbols
# which aren't C++ mangled. We just add a '_' to anything but start
# which doesn't already have a '_'.
symbol = fix_mangling(ln)
# If we don't know all the symbols, or the symbol is one of them,
# just return it.
if not all_symbols_set or symbol in all_symbols_set:
yield (current_timestamp, symbol)
continue
# Otherwise, we have a symbol name which isn't present in the
# binary. We assume it is truncated, and try to extend it.
# Get all the symbols with this prefix.
possible_symbols = list(get_symbols_with_prefix(symbol))
if not possible_symbols:
continue
# If we found too many possible symbols, ignore this as a prefix.
if len(possible_symbols) > 100:
print(
"warning: ignoring symbol %r " % symbol
+ "(no match and too many possible suffixes)",
file=sys.stderr,
)
continue
# Report that we resolved a missing symbol.
if opts.show_missing_symbols and symbol not in missing_symbols:
print(
"warning: resolved missing symbol %r" % symbol, file=sys.stderr
)
missing_symbols.add(symbol)
# Otherwise, treat all the possible matches as having occurred. This
# is an over-approximation, but it should be ok in practice.
for s in possible_symbols:
yield (current_timestamp, s)
def uniq(list):
seen = set()
for item in list:
if item not in seen:
yield item
seen.add(item)
def form_by_call_order(symbol_lists):
# Simply strategy, just return symbols in order of occurrence, even across
# multiple runs.
return uniq(s for symbols in symbol_lists for s in symbols)
def form_by_call_order_fair(symbol_lists):
# More complicated strategy that tries to respect the call order across all
# of the test cases, instead of giving a huge preference to the first test
# case.
# First, uniq all the lists.
uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]
# Compute the successors for each list.
succs = {}
for symbols in uniq_lists:
for a, b in zip(symbols[:-1], symbols[1:]):
succs[a] = items = succs.get(a, [])
if b not in items:
items.append(b)
# Emit all the symbols, but make sure to always emit all successors from any
# call list whenever we see a symbol.
#
# There isn't much science here, but this sometimes works better than the
# more naive strategy. Then again, sometimes it doesn't so more research is
# probably needed.
return uniq(
s
for symbols in symbol_lists
for node in symbols
for s in ([node] + succs.get(node, []))
)
def form_by_frequency(symbol_lists):
# Form the order file by just putting the most commonly occurring symbols
# first. This assumes the data files didn't use the oneshot dtrace method.
counts = {}
for symbols in symbol_lists:
for a in symbols:
counts[a] = counts.get(a, 0) + 1
by_count = list(counts.items())
by_count.sort(key=lambda __n: -__n[1])
return [s for s, n in by_count]
def form_by_random(symbol_lists):
# Randomize the symbols.
merged_symbols = uniq(s for symbols in symbol_lists for s in symbols)
random.shuffle(merged_symbols)
return merged_symbols
def form_by_alphabetical(symbol_lists):
# Alphabetize the symbols.
merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
merged_symbols.sort()
return merged_symbols
methods = dict(
(name[len("form_by_") :], value)
for name, value in locals().items()
if name.startswith("form_by_")
)
def genOrderFile(args):
parser = argparse.ArgumentParser("%prog [options] <dtrace data file directories>]")
parser.add_argument("input", nargs="+", help="")
parser.add_argument(
"--binary",
metavar="PATH",
type=str,
dest="binary_path",
help="Path to the binary being ordered (for getting all symbols)",
default=None,
)
parser.add_argument(
"--output",
dest="output_path",
help="path to output order file to write",
default=None,
required=True,
metavar="PATH",
)
parser.add_argument(
"--show-missing-symbols",
dest="show_missing_symbols",
help="show symbols which are 'fixed up' to a valid name (requires --binary)",
action="store_true",
default=None,
)
parser.add_argument(
"--output-unordered-symbols",
dest="output_unordered_symbols_path",
help="write a list of the unordered symbols to PATH (requires --binary)",
default=None,
metavar="PATH",
)
parser.add_argument(
"--method",
dest="method",
help="order file generation method to use",
choices=list(methods.keys()),
default="call_order",
)
opts = parser.parse_args(args)
# If the user gave us a binary, get all the symbols in the binary by
# snarfing 'nm' output.
if opts.binary_path is not None:
output = subprocess.check_output(
["nm", "-P", opts.binary_path], universal_newlines=True
)
lines = output.split("\n")
all_symbols = [ln.split(" ", 1)[0] for ln in lines if ln.strip()]
print("found %d symbols in binary" % len(all_symbols))
all_symbols.sort()
else:
all_symbols = []
all_symbols_set = set(all_symbols)
# Compute the list of input files.
input_files = []
for dirname in opts.input:
input_files.extend(findFilesWithExtension(dirname, "dtrace"))
# Load all of the input files.
print("loading from %d data files" % len(input_files))
missing_symbols = set()
timestamped_symbol_lists = [
list(
parse_dtrace_symbol_file(
path, all_symbols, all_symbols_set, missing_symbols, opts
)
)
for path in input_files
]
# Reorder each symbol list.
symbol_lists = []
for timestamped_symbols_list in timestamped_symbol_lists:
timestamped_symbols_list.sort()
symbol_lists.append([symbol for _, symbol in timestamped_symbols_list])
# Execute the desire order file generation method.
method = methods.get(opts.method)
result = list(method(symbol_lists))
# Report to the user on what percentage of symbols are present in the order
# file.
num_ordered_symbols = len(result)
if all_symbols:
print(
"note: order file contains %d/%d symbols (%.2f%%)"
% (
num_ordered_symbols,
len(all_symbols),
100.0 * num_ordered_symbols / len(all_symbols),
),
file=sys.stderr,
)
if opts.output_unordered_symbols_path:
ordered_symbols_set = set(result)
with open(opts.output_unordered_symbols_path, "w") as f:
f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))
# Write the order file.
with open(opts.output_path, "w") as f:
f.write("\n".join(result))
f.write("\n")
return 0
commands = {
"clean": clean,
"merge": merge,
"dtrace": dtrace,
"cc1": cc1,
"gen-order-file": genOrderFile,
"merge-fdata": merge_fdata,
"perf": perf,
"perf2bolt": perf2bolt,
}
def main():
f = commands[sys.argv[1]]
sys.exit(f(sys.argv[2:]))
if __name__ == "__main__":
main()