llvm-exegesis.cpp | Explore in Territory

//===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Measures execution properties (latencies/uops) of an instruction.
///
//===----------------------------------------------------------------------===//

#include "lib/Analysis.h"
#include "lib/BenchmarkResult.h"
#include "lib/BenchmarkRunner.h"
#include "lib/Clustering.h"
#include "lib/CodeTemplate.h"
#include "lib/Error.h"
#include "lib/LlvmState.h"
#include "lib/PerfHelper.h"
#include "lib/ProgressMeter.h"
#include "lib/ResultAggregator.h"
#include "lib/SnippetFile.h"
#include "lib/SnippetRepetitor.h"
#include "lib/Target.h"
#include "lib/TargetSelect.h"
#include "lib/ValidationEvent.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/TargetParser/Host.h"
#include <algorithm>
#include <string>

namespace llvm {
namespace exegesis {

static cl::opt<int> OpcodeIndex(
    "opcode-index",
    cl::desc("opcode to measure, by index, or -1 to measure all opcodes"),
    cl::cat(BenchmarkOptions), cl::init(0));

static cl::opt<std::string>
    OpcodeNames("opcode-name",
                cl::desc("comma-separated list of opcodes to measure, by name"),
                cl::cat(BenchmarkOptions), cl::init(""));

static cl::opt<std::string> SnippetsFile("snippets-file",
                                         cl::desc("code snippets to measure"),
                                         cl::cat(BenchmarkOptions),
                                         cl::init(""));

static cl::opt<std::string>
    BenchmarkFile("benchmarks-file",
                  cl::desc("File to read (analysis mode) or write "
                           "(latency/uops/inverse_throughput modes) benchmark "
                           "results. “-” uses stdin/stdout."),
                  cl::cat(Options), cl::init(""));

static cl::opt<Benchmark::ModeE> BenchmarkMode(
    "mode", cl::desc("the mode to run"), cl::cat(Options),
    cl::values(clEnumValN(Benchmark::Latency, "latency", "Instruction Latency"),
               clEnumValN(Benchmark::InverseThroughput, "inverse_throughput",
                          "Instruction Inverse Throughput"),
               clEnumValN(Benchmark::Uops, "uops", "Uop Decomposition"),
               // When not asking for a specific benchmark mode,
               // we'll analyse the results.
               clEnumValN(Benchmark::Unknown, "analysis", "Analysis")));

static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode(
    "result-aggregation-mode", cl::desc("How to aggregate multi-values result"),
    cl::cat(BenchmarkOptions),
    cl::values(clEnumValN(Benchmark::Min, "min", "Keep min reading"),
               clEnumValN(Benchmark::Max, "max", "Keep max reading"),
               clEnumValN(Benchmark::Mean, "mean",
                          "Compute mean of all readings"),
               clEnumValN(Benchmark::MinVariance, "min-variance",
                          "Keep readings set with min-variance")),
    cl::init(Benchmark::Min));

static cl::opt<Benchmark::RepetitionModeE> RepetitionMode(
    "repetition-mode", cl::desc("how to repeat the instruction snippet"),
    cl::cat(BenchmarkOptions),
    cl::values(
        clEnumValN(Benchmark::Duplicate, "duplicate", "Duplicate the snippet"),
        clEnumValN(Benchmark::Loop, "loop", "Loop over the snippet"),
        clEnumValN(Benchmark::AggregateMin, "min",
                   "All of the above and take the minimum of measurements"),
        clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate",
                   "Middle half duplicate mode"),
        clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop",
                   "Middle half loop mode")),
    cl::init(Benchmark::Duplicate));

static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
    "measurements-print-progress",
    cl::desc("Produce progress indicator when performing measurements"),
    cl::cat(BenchmarkOptions), cl::init(false));

static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
    "benchmark-phase",
    cl::desc(
        "it is possible to stop the benchmarking process after some phase"),
    cl::cat(BenchmarkOptions),
    cl::values(
        clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",
                   "Only generate the minimal instruction sequence"),
        clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet,
                   "prepare-and-assemble-snippet",
                   "Same as prepare-snippet, but also dumps an excerpt of the "
                   "sequence (hex encoded)"),
        clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode,
                   "assemble-measured-code",
                   "Same as prepare-and-assemble-snippet, but also creates the "
                   "full sequence "
                   "that can be dumped to a file using --dump-object-to-disk"),
        clEnumValN(
            BenchmarkPhaseSelectorE::Measure, "measure",
            "Same as prepare-measured-code, but also runs the measurement "
            "(default)")),
    cl::init(BenchmarkPhaseSelectorE::Measure));

static cl::opt<bool>
    UseDummyPerfCounters("use-dummy-perf-counters",
                         cl::desc("Do not read real performance counters, use "
                                  "dummy values (for testing)"),
                         cl::cat(BenchmarkOptions), cl::init(false));

static cl::opt<unsigned>
    MinInstructions("min-instructions",
                    cl::desc("The minimum number of instructions that should "
                             "be included in the snippet"),
                    cl::cat(BenchmarkOptions), cl::init(10000));

static cl::opt<unsigned>
    LoopBodySize("loop-body-size",
                 cl::desc("when repeating the instruction snippet by looping "
                          "over it, duplicate the snippet until the loop body "
                          "contains at least this many instruction"),
                 cl::cat(BenchmarkOptions), cl::init(0));

static cl::opt<unsigned> MaxConfigsPerOpcode(
    "max-configs-per-opcode",
    cl::desc(
        "allow to snippet generator to generate at most that many configs"),
    cl::cat(BenchmarkOptions), cl::init(1));

static cl::opt<bool> IgnoreInvalidSchedClass(
    "ignore-invalid-sched-class",
    cl::desc("ignore instructions that do not define a sched class"),
    cl::cat(BenchmarkOptions), cl::init(false));

static cl::opt<BenchmarkFilter> AnalysisSnippetFilter(
    "analysis-filter", cl::desc("Filter the benchmarks before analysing them"),
    cl::cat(BenchmarkOptions),
    cl::values(
        clEnumValN(BenchmarkFilter::All, "all",
                   "Keep all benchmarks (default)"),
        clEnumValN(BenchmarkFilter::RegOnly, "reg-only",
                   "Keep only those benchmarks that do *NOT* involve memory"),
        clEnumValN(BenchmarkFilter::WithMem, "mem-only",
                   "Keep only the benchmarks that *DO* involve memory")),
    cl::init(BenchmarkFilter::All));

static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm(
    "analysis-clustering", cl::desc("the clustering algorithm to use"),
    cl::cat(AnalysisOptions),
    cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan",
                          "use DBSCAN/OPTICS algorithm"),
               clEnumValN(BenchmarkClustering::Naive, "naive",
                          "one cluster per opcode")),
    cl::init(BenchmarkClustering::Dbscan));

static cl::opt<unsigned> AnalysisDbscanNumPoints(
    "analysis-numpoints",
    cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
    cl::cat(AnalysisOptions), cl::init(3));

static cl::opt<float> AnalysisClusteringEpsilon(
    "analysis-clustering-epsilon",
    cl::desc("epsilon for benchmark point clustering"),
    cl::cat(AnalysisOptions), cl::init(0.1));

static cl::opt<float> AnalysisInconsistencyEpsilon(
    "analysis-inconsistency-epsilon",
    cl::desc("epsilon for detection of when the cluster is different from the "
             "LLVM schedule profile values"),
    cl::cat(AnalysisOptions), cl::init(0.1));

static cl::opt<std::string>
    AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
                               cl::cat(AnalysisOptions), cl::init(""));
static cl::opt<std::string>
    AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
                                      cl::desc(""), cl::cat(AnalysisOptions),
                                      cl::init(""));

static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
    "analysis-display-unstable-clusters",
    cl::desc("if there is more than one benchmark for an opcode, said "
             "benchmarks may end up not being clustered into the same cluster "
             "if the measured performance characteristics are different. by "
             "default all such opcodes are filtered out. this flag will "
             "instead show only such unstable opcodes"),
    cl::cat(AnalysisOptions), cl::init(false));

static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(
    "analysis-override-benchmark-triple-and-cpu",
    cl::desc("By default, we analyze the benchmarks for the triple/CPU they "
             "were measured for, but if you want to analyze them for some "
             "other combination (specified via -mtriple/-mcpu), you can "
             "pass this flag."),
    cl::cat(AnalysisOptions), cl::init(false));

static cl::opt<std::string>
    TripleName("mtriple",
               cl::desc("Target triple. See -version for available targets"),
               cl::cat(Options));

static cl::opt<std::string>
    MCPU("mcpu",
         cl::desc("Target a specific cpu type (-mcpu=help for details)"),
         cl::value_desc("cpu-name"), cl::cat(Options), cl::init("native"));

static cl::opt<std::string>
    DumpObjectToDisk("dump-object-to-disk",
                     cl::desc("dumps the generated benchmark object to disk "
                              "and prints a message to access it"),
                     cl::ValueOptional, cl::cat(BenchmarkOptions));

static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode(
    "execution-mode",
    cl::desc("Selects the execution mode to use for running snippets"),
    cl::cat(BenchmarkOptions),
    cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess,
                          "inprocess",
                          "Executes the snippets within the same process"),
               clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess,
                          "subprocess",
                          "Spawns a subprocess for each snippet execution, "
                          "allows for the use of memory annotations")),
    cl::init(BenchmarkRunner::ExecutionModeE::InProcess));

static cl::opt<unsigned> BenchmarkRepeatCount(
    "benchmark-repeat-count",
    cl::desc("The number of times to repeat measurements on the benchmark k "
             "before aggregating the results"),
    cl::cat(BenchmarkOptions), cl::init(30));

static cl::list<ValidationEvent> ValidationCounters(
    "validation-counter",
    cl::desc(
        "The name of a validation counter to run concurrently with the main "
        "counter to validate benchmarking assumptions"),
    cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions());

static cl::opt<int> BenchmarkProcessCPU(
    "benchmark-process-cpu",
    cl::desc("The CPU number that the benchmarking process should executon on"),
    cl::cat(BenchmarkOptions), cl::init(-1));

static ExitOnError ExitOnErr("llvm-exegesis error: ");

// Helper function that logs the error(s) and exits.
template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) { … }

// Check Err. If it's in a failure state log the file error(s) and exit.
static void ExitOnFileError(const Twine &FileName, Error Err) { … }

// Check E. If it's in a success state then return the contained value.
// If it's in a failure state log the file error(s) and exit.
template <typename T>
T ExitOnFileError(const Twine &FileName, Expected<T> &&E) { … }

// Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
// and returns the opcode indices or {} if snippets should be read from
// `SnippetsFile`.
static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { … }

// Generates code snippets for opcode `Opcode`.
static Expected<std::vector<BenchmarkCode>>
generateSnippets(const LLVMState &State, unsigned Opcode,
                 const BitVector &ForbiddenRegs) { … }

static void runBenchmarkConfigurations(
    const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
    ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
    const BenchmarkRunner &Runner) { … }

void benchmarkMain() { … }

// Prints the results of running analysis pass `Pass` to file `OutputFilename`
// if OutputFilename is non-empty.
template <typename Pass>
static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
                             const std::string &OutputFilename) { … }

static void filterPoints(MutableArrayRef<Benchmark> Points,
                         const MCInstrInfo &MCII) { … }

static void analysisMain() { … }

} // namespace exegesis
} // namespace llvm

int main(int Argc, char **Argv) { … }
llvm/llvm/tools/llvm-exegesis/llvm-exegesis.cpp