#include "lib/Analysis.h"
#include "lib/BenchmarkResult.h"
#include "lib/BenchmarkRunner.h"
#include "lib/Clustering.h"
#include "lib/CodeTemplate.h"
#include "lib/Error.h"
#include "lib/LlvmState.h"
#include "lib/PerfHelper.h"
#include "lib/ProgressMeter.h"
#include "lib/ResultAggregator.h"
#include "lib/SnippetFile.h"
#include "lib/SnippetRepetitor.h"
#include "lib/Target.h"
#include "lib/TargetSelect.h"
#include "lib/ValidationEvent.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/TargetParser/Host.h"
#include <algorithm>
#include <string>
namespace llvm {
namespace exegesis {
static cl::opt<int> OpcodeIndex(
"opcode-index",
cl::desc("opcode to measure, by index, or -1 to measure all opcodes"),
cl::cat(BenchmarkOptions), cl::init(0));
static cl::opt<std::string>
OpcodeNames("opcode-name",
cl::desc("comma-separated list of opcodes to measure, by name"),
cl::cat(BenchmarkOptions), cl::init(""));
static cl::opt<std::string> SnippetsFile("snippets-file",
cl::desc("code snippets to measure"),
cl::cat(BenchmarkOptions),
cl::init(""));
static cl::opt<std::string>
BenchmarkFile("benchmarks-file",
cl::desc("File to read (analysis mode) or write "
"(latency/uops/inverse_throughput modes) benchmark "
"results. “-” uses stdin/stdout."),
cl::cat(Options), cl::init(""));
static cl::opt<Benchmark::ModeE> BenchmarkMode(
"mode", cl::desc("the mode to run"), cl::cat(Options),
cl::values(clEnumValN(Benchmark::Latency, "latency", "Instruction Latency"),
clEnumValN(Benchmark::InverseThroughput, "inverse_throughput",
"Instruction Inverse Throughput"),
clEnumValN(Benchmark::Uops, "uops", "Uop Decomposition"),
clEnumValN(Benchmark::Unknown, "analysis", "Analysis")));
static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode(
"result-aggregation-mode", cl::desc("How to aggregate multi-values result"),
cl::cat(BenchmarkOptions),
cl::values(clEnumValN(Benchmark::Min, "min", "Keep min reading"),
clEnumValN(Benchmark::Max, "max", "Keep max reading"),
clEnumValN(Benchmark::Mean, "mean",
"Compute mean of all readings"),
clEnumValN(Benchmark::MinVariance, "min-variance",
"Keep readings set with min-variance")),
cl::init(Benchmark::Min));
static cl::opt<Benchmark::RepetitionModeE> RepetitionMode(
"repetition-mode", cl::desc("how to repeat the instruction snippet"),
cl::cat(BenchmarkOptions),
cl::values(
clEnumValN(Benchmark::Duplicate, "duplicate", "Duplicate the snippet"),
clEnumValN(Benchmark::Loop, "loop", "Loop over the snippet"),
clEnumValN(Benchmark::AggregateMin, "min",
"All of the above and take the minimum of measurements"),
clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate",
"Middle half duplicate mode"),
clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop",
"Middle half loop mode")),
cl::init(Benchmark::Duplicate));
static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
"measurements-print-progress",
cl::desc("Produce progress indicator when performing measurements"),
cl::cat(BenchmarkOptions), cl::init(false));
static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
"benchmark-phase",
cl::desc(
"it is possible to stop the benchmarking process after some phase"),
cl::cat(BenchmarkOptions),
cl::values(
clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",
"Only generate the minimal instruction sequence"),
clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet,
"prepare-and-assemble-snippet",
"Same as prepare-snippet, but also dumps an excerpt of the "
"sequence (hex encoded)"),
clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode,
"assemble-measured-code",
"Same as prepare-and-assemble-snippet, but also creates the "
"full sequence "
"that can be dumped to a file using --dump-object-to-disk"),
clEnumValN(
BenchmarkPhaseSelectorE::Measure, "measure",
"Same as prepare-measured-code, but also runs the measurement "
"(default)")),
cl::init(BenchmarkPhaseSelectorE::Measure));
static cl::opt<bool>
UseDummyPerfCounters("use-dummy-perf-counters",
cl::desc("Do not read real performance counters, use "
"dummy values (for testing)"),
cl::cat(BenchmarkOptions), cl::init(false));
static cl::opt<unsigned>
MinInstructions("min-instructions",
cl::desc("The minimum number of instructions that should "
"be included in the snippet"),
cl::cat(BenchmarkOptions), cl::init(10000));
static cl::opt<unsigned>
LoopBodySize("loop-body-size",
cl::desc("when repeating the instruction snippet by looping "
"over it, duplicate the snippet until the loop body "
"contains at least this many instruction"),
cl::cat(BenchmarkOptions), cl::init(0));
static cl::opt<unsigned> MaxConfigsPerOpcode(
"max-configs-per-opcode",
cl::desc(
"allow to snippet generator to generate at most that many configs"),
cl::cat(BenchmarkOptions), cl::init(1));
static cl::opt<bool> IgnoreInvalidSchedClass(
"ignore-invalid-sched-class",
cl::desc("ignore instructions that do not define a sched class"),
cl::cat(BenchmarkOptions), cl::init(false));
static cl::opt<BenchmarkFilter> AnalysisSnippetFilter(
"analysis-filter", cl::desc("Filter the benchmarks before analysing them"),
cl::cat(BenchmarkOptions),
cl::values(
clEnumValN(BenchmarkFilter::All, "all",
"Keep all benchmarks (default)"),
clEnumValN(BenchmarkFilter::RegOnly, "reg-only",
"Keep only those benchmarks that do *NOT* involve memory"),
clEnumValN(BenchmarkFilter::WithMem, "mem-only",
"Keep only the benchmarks that *DO* involve memory")),
cl::init(BenchmarkFilter::All));
static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm(
"analysis-clustering", cl::desc("the clustering algorithm to use"),
cl::cat(AnalysisOptions),
cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan",
"use DBSCAN/OPTICS algorithm"),
clEnumValN(BenchmarkClustering::Naive, "naive",
"one cluster per opcode")),
cl::init(BenchmarkClustering::Dbscan));
static cl::opt<unsigned> AnalysisDbscanNumPoints(
"analysis-numpoints",
cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
cl::cat(AnalysisOptions), cl::init(3));
static cl::opt<float> AnalysisClusteringEpsilon(
"analysis-clustering-epsilon",
cl::desc("epsilon for benchmark point clustering"),
cl::cat(AnalysisOptions), cl::init(0.1));
static cl::opt<float> AnalysisInconsistencyEpsilon(
"analysis-inconsistency-epsilon",
cl::desc("epsilon for detection of when the cluster is different from the "
"LLVM schedule profile values"),
cl::cat(AnalysisOptions), cl::init(0.1));
static cl::opt<std::string>
AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
cl::cat(AnalysisOptions), cl::init(""));
static cl::opt<std::string>
AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
cl::desc(""), cl::cat(AnalysisOptions),
cl::init(""));
static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
"analysis-display-unstable-clusters",
cl::desc("if there is more than one benchmark for an opcode, said "
"benchmarks may end up not being clustered into the same cluster "
"if the measured performance characteristics are different. by "
"default all such opcodes are filtered out. this flag will "
"instead show only such unstable opcodes"),
cl::cat(AnalysisOptions), cl::init(false));
static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(
"analysis-override-benchmark-triple-and-cpu",
cl::desc("By default, we analyze the benchmarks for the triple/CPU they "
"were measured for, but if you want to analyze them for some "
"other combination (specified via -mtriple/-mcpu), you can "
"pass this flag."),
cl::cat(AnalysisOptions), cl::init(false));
static cl::opt<std::string>
TripleName("mtriple",
cl::desc("Target triple. See -version for available targets"),
cl::cat(Options));
static cl::opt<std::string>
MCPU("mcpu",
cl::desc("Target a specific cpu type (-mcpu=help for details)"),
cl::value_desc("cpu-name"), cl::cat(Options), cl::init("native"));
static cl::opt<std::string>
DumpObjectToDisk("dump-object-to-disk",
cl::desc("dumps the generated benchmark object to disk "
"and prints a message to access it"),
cl::ValueOptional, cl::cat(BenchmarkOptions));
static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode(
"execution-mode",
cl::desc("Selects the execution mode to use for running snippets"),
cl::cat(BenchmarkOptions),
cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess,
"inprocess",
"Executes the snippets within the same process"),
clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess,
"subprocess",
"Spawns a subprocess for each snippet execution, "
"allows for the use of memory annotations")),
cl::init(BenchmarkRunner::ExecutionModeE::InProcess));
static cl::opt<unsigned> BenchmarkRepeatCount(
"benchmark-repeat-count",
cl::desc("The number of times to repeat measurements on the benchmark k "
"before aggregating the results"),
cl::cat(BenchmarkOptions), cl::init(30));
static cl::list<ValidationEvent> ValidationCounters(
"validation-counter",
cl::desc(
"The name of a validation counter to run concurrently with the main "
"counter to validate benchmarking assumptions"),
cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions());
static cl::opt<int> BenchmarkProcessCPU(
"benchmark-process-cpu",
cl::desc("The CPU number that the benchmarking process should executon on"),
cl::cat(BenchmarkOptions), cl::init(-1));
static ExitOnError ExitOnErr("llvm-exegesis error: ");
template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) { … }
static void ExitOnFileError(const Twine &FileName, Error Err) { … }
template <typename T>
T ExitOnFileError(const Twine &FileName, Expected<T> &&E) { … }
static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { … }
static Expected<std::vector<BenchmarkCode>>
generateSnippets(const LLVMState &State, unsigned Opcode,
const BitVector &ForbiddenRegs) { … }
static void runBenchmarkConfigurations(
const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
const BenchmarkRunner &Runner) { … }
void benchmarkMain() { … }
template <typename Pass>
static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
const std::string &OutputFilename) { … }
static void filterPoints(MutableArrayRef<Benchmark> Points,
const MCInstrInfo &MCII) { … }
static void analysisMain() { … }
}
}
int main(int Argc, char **Argv) { … }