llvm/llvm/lib/Transforms/IPO/SampleProfile.cpp

//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the SampleProfileLoader transformation. This pass
// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
// profile information in the given profile.
//
// This pass generates branch weight annotations on the IR:
//
// - prof: Represents branch weights. This annotation is added to branches
//      to indicate the weights of each edge coming out of the branch.
//      The weight of each edge is the weight of the target block for
//      that edge. The weight of a block B is computed as the maximum
//      number of samples found in B.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/IPO/SampleProfile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
#include "llvm/Transforms/IPO/SampleContextTracker.h"
#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Instrumentation.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
#include <limits>
#include <map>
#include <memory>
#include <queue>
#include <string>
#include <system_error>
#include <utility>
#include <vector>

usingnamespacellvm;
usingnamespacesampleprof;
usingnamespacellvm::sampleprofutil;
ProfileCount;
#define DEBUG_TYPE
#define CSINLINE_DEBUG

STATISTIC(NumCSInlined,
          "Number of functions inlined with context sensitive profile");
STATISTIC(NumCSNotInlined,
          "Number of functions not inlined with context sensitive profile");
STATISTIC(NumMismatchedProfile,
          "Number of functions with CFG mismatched profile");
STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
STATISTIC(NumDuplicatedInlinesite,
          "Number of inlined callsites with a partial distribution factor");

STATISTIC(NumCSInlinedHitMinLimit,
          "Number of functions with FDO inline stopped due to min size limit");
STATISTIC(NumCSInlinedHitMaxLimit,
          "Number of functions with FDO inline stopped due to max size limit");
STATISTIC(
    NumCSInlinedHitGrowthLimit,
    "Number of functions with FDO inline stopped due to growth size limit");

// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
    "sample-profile-file", cl::init(""), cl::value_desc("filename"),
    cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);

// The named file contains a set of transformations that may have been applied
// to the symbol names between the program from which the sample data was
// collected and the current program's symbols.
static cl::opt<std::string> SampleProfileRemappingFile(
    "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
    cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);

cl::opt<bool> SalvageStaleProfile(
    "salvage-stale-profile", cl::Hidden, cl::init(false),
    cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
             "location for sample profile query."));
cl::opt<bool>
    SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false),
                         cl::desc("Salvage unused profile by matching with new "
                                  "functions on call graph."));

cl::opt<bool> ReportProfileStaleness(
    "report-profile-staleness", cl::Hidden, cl::init(false),
    cl::desc("Compute and report stale profile statistical metrics."));

cl::opt<bool> PersistProfileStaleness(
    "persist-profile-staleness", cl::Hidden, cl::init(false),
    cl::desc("Compute stale profile statistical metrics and write it into the "
             "native object file(.llvm_stats section)."));

static cl::opt<bool> ProfileSampleAccurate(
    "profile-sample-accurate", cl::Hidden, cl::init(false),
    cl::desc("If the sample profile is accurate, we will mark all un-sampled "
             "callsite and function as having 0 samples. Otherwise, treat "
             "un-sampled callsites and functions conservatively as unknown. "));

static cl::opt<bool> ProfileSampleBlockAccurate(
    "profile-sample-block-accurate", cl::Hidden, cl::init(false),
    cl::desc("If the sample profile is accurate, we will mark all un-sampled "
             "branches and calls as having 0 samples. Otherwise, treat "
             "them conservatively as unknown. "));

static cl::opt<bool> ProfileAccurateForSymsInList(
    "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
    cl::desc("For symbols in profile symbol list, regard their profiles to "
             "be accurate. It may be overriden by profile-sample-accurate. "));

static cl::opt<bool> ProfileMergeInlinee(
    "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
    cl::desc("Merge past inlinee's profile to outline version if sample "
             "profile loader decided not to inline a call site. It will "
             "only be enabled when top-down order of profile loading is "
             "enabled. "));

static cl::opt<bool> ProfileTopDownLoad(
    "sample-profile-top-down-load", cl::Hidden, cl::init(true),
    cl::desc("Do profile annotation and inlining for functions in top-down "
             "order of call graph during sample profile loading. It only "
             "works for new pass manager. "));

static cl::opt<bool>
    UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
                         cl::desc("Process functions in a top-down order "
                                  "defined by the profiled call graph when "
                                  "-sample-profile-top-down-load is on."));

static cl::opt<bool> ProfileSizeInline(
    "sample-profile-inline-size", cl::Hidden, cl::init(false),
    cl::desc("Inline cold call sites in profile loader if it's beneficial "
             "for code size."));

// Since profiles are consumed by many passes, turning on this option has
// side effects. For instance, pre-link SCC inliner would see merged profiles
// and inline the hot functions (that are skipped in this pass).
static cl::opt<bool> DisableSampleLoaderInlining(
    "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
    cl::desc("If true, artifically skip inline transformation in sample-loader "
             "pass, and merge (or scale) profiles (as configured by "
             "--sample-profile-merge-inlinee)."));

namespace llvm {
cl::opt<bool>
    SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
                    cl::desc("Sort profiled recursion by edge weights."));

cl::opt<int> ProfileInlineGrowthLimit(
    "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
    cl::desc("The size growth ratio limit for proirity-based sample profile "
             "loader inlining."));

cl::opt<int> ProfileInlineLimitMin(
    "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
    cl::desc("The lower bound of size growth limit for "
             "proirity-based sample profile loader inlining."));

cl::opt<int> ProfileInlineLimitMax(
    "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
    cl::desc("The upper bound of size growth limit for "
             "proirity-based sample profile loader inlining."));

cl::opt<int> SampleHotCallSiteThreshold(
    "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
    cl::desc("Hot callsite threshold for proirity-based sample profile loader "
             "inlining."));

cl::opt<int> SampleColdCallSiteThreshold(
    "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
    cl::desc("Threshold for inlining cold callsites"));
} // namespace llvm

static cl::opt<unsigned> ProfileICPRelativeHotness(
    "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
    cl::desc(
        "Relative hotness percentage threshold for indirect "
        "call promotion in proirity-based sample profile loader inlining."));

static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
    "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
    cl::desc(
        "Skip relative hotness check for ICP up to given number of targets."));

static cl::opt<unsigned> HotFuncCutoffForStalenessError(
    "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000),
    cl::desc("A function is considered hot for staleness error check if its "
             "total sample count is above the specified percentile"));

static cl::opt<unsigned> MinfuncsForStalenessError(
    "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
    cl::desc("Skip the check if the number of hot functions is smaller than "
             "the specified number."));

static cl::opt<unsigned> PrecentMismatchForStalenessError(
    "precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
    cl::desc("Reject the profile if the mismatch percent is higher than the "
             "given number."));

static cl::opt<bool> CallsitePrioritizedInline(
    "sample-profile-prioritized-inline", cl::Hidden,
    cl::desc("Use call site prioritized inlining for sample profile loader."
             "Currently only CSSPGO is supported."));

static cl::opt<bool> UsePreInlinerDecision(
    "sample-profile-use-preinliner", cl::Hidden,
    cl::desc("Use the preinliner decisions stored in profile context."));

static cl::opt<bool> AllowRecursiveInline(
    "sample-profile-recursive-inline", cl::Hidden,
    cl::desc("Allow sample loader inliner to inline recursive calls."));

static cl::opt<bool> RemoveProbeAfterProfileAnnotation(
    "sample-profile-remove-probe", cl::Hidden, cl::init(false),
    cl::desc("Remove pseudo-probe after sample profile annotation."));

static cl::opt<std::string> ProfileInlineReplayFile(
    "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
    cl::desc(
        "Optimization remarks file containing inline remarks to be replayed "
        "by inlining from sample profile loader."),
    cl::Hidden);

static cl::opt<ReplayInlinerSettings::Scope> ProfileInlineReplayScope(
    "sample-profile-inline-replay-scope",
    cl::init(ReplayInlinerSettings::Scope::Function),
    cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
                          "Replay on functions that have remarks associated "
                          "with them (default)"),
               clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
                          "Replay on the entire module")),
    cl::desc("Whether inline replay should be applied to the entire "
             "Module or just the Functions (default) that are present as "
             "callers in remarks during sample profile inlining."),
    cl::Hidden);

static cl::opt<ReplayInlinerSettings::Fallback> ProfileInlineReplayFallback(
    "sample-profile-inline-replay-fallback",
    cl::init(ReplayInlinerSettings::Fallback::Original),
    cl::values(
        clEnumValN(
            ReplayInlinerSettings::Fallback::Original, "Original",
            "All decisions not in replay send to original advisor (default)"),
        clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
                   "AlwaysInline", "All decisions not in replay are inlined"),
        clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
                   "All decisions not in replay are not inlined")),
    cl::desc("How sample profile inline replay treats sites that don't come "
             "from the replay. Original: defers to original advisor, "
             "AlwaysInline: inline all sites not in replay, NeverInline: "
             "inline no sites not in replay"),
    cl::Hidden);

static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat(
    "sample-profile-inline-replay-format",
    cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
    cl::values(
        clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
        clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
                   "<Line Number>:<Column Number>"),
        clEnumValN(CallSiteFormat::Format::LineDiscriminator,
                   "LineDiscriminator", "<Line Number>.<Discriminator>"),
        clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
                   "LineColumnDiscriminator",
                   "<Line Number>:<Column Number>.<Discriminator> (default)")),
    cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);

static cl::opt<unsigned>
    MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
                     cl::desc("Max number of promotions for a single indirect "
                              "call callsite in sample profile loader"));

static cl::opt<bool> OverwriteExistingWeights(
    "overwrite-existing-weights", cl::Hidden, cl::init(false),
    cl::desc("Ignore existing branch weights on IR and always overwrite."));

static cl::opt<bool> AnnotateSampleProfileInlinePhase(
    "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
    cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
             "sample-profile inline pass name."));

namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
}

namespace {

BlockWeightMap;
EquivalenceClassMap;
Edge;
EdgeWeightMap;
BlockEdgeMap;

class GUIDToFuncNameMapper {};

// Inline candidate used by iterative callsite prioritized inliner
struct InlineCandidate {};

// Inline candidate comparer using call site weight
struct CandidateComparer {};

CandidateQueue;

/// Sample profile pass.
///
/// This pass reads profile data from the file specified by
/// -sample-profile-file and annotates every affected function with the
/// profile information found in that file.
class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {};
} // end anonymous namespace

namespace llvm {
template <>
inline bool SampleProfileInference<Function>::isExit(const BasicBlock *BB) {}

template <>
inline void SampleProfileInference<Function>::findUnlikelyJumps(
    const std::vector<const BasicBlockT *> &BasicBlocks,
    BlockEdgeMap &Successors, FlowFunction &Func) {}

template <>
void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
    Function &F) {}
} // namespace llvm

ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {}

/// Get the FunctionSamples for a call instruction.
///
/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
/// instance in which that call instruction is calling to. It contains
/// all samples that resides in the inlined instance. We first find the
/// inlined instance in which the call instruction is from, then we
/// traverse its children to find the callsite with the matching
/// location.
///
/// \param Inst Call/Invoke instruction to query.
///
/// \returns The FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {}

/// Returns a vector of FunctionSamples that are the indirect call targets
/// of \p Inst. The vector is sorted by the total number of samples. Stores
/// the total call count of the indirect call in \p Sum.
std::vector<const FunctionSamples *>
SampleProfileLoader::findIndirectCallFunctionSamples(
    const Instruction &Inst, uint64_t &Sum) const {}

const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {}

/// Check whether the indirect call promotion history of \p Inst allows
/// the promotion for \p Candidate.
/// If the profile count for the promotion candidate \p Candidate is
/// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
/// for \p Inst. If we already have at least MaxNumPromotions
/// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
/// cannot promote for \p Inst anymore.
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {}

/// Update indirect call target profile metadata for \p Inst.
/// Usually \p Sum is the sum of counts of all the targets for \p Inst.
/// If it is 0, it means updateIDTMetaData is used to mark a
/// certain target to be promoted already. If it is not zero,
/// we expect to use it to update the total count in the value profile.
static void
updateIDTMetaData(Instruction &Inst,
                  const SmallVectorImpl<InstrProfValueData> &CallTargets,
                  uint64_t Sum) {}

/// Attempt to promote indirect call and also inline the promoted call.
///
/// \param F  Caller function.
/// \param Candidate  ICP and inline candidate.
/// \param SumOrigin  Original sum of target counts for indirect call before
///                   promoting given candidate.
/// \param Sum        Prorated sum of remaining target counts for indirect call
///                   after promoting given candidate.
/// \param InlinedCallSite  Output vector for new call sites exposed after
/// inlining.
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
    Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
    SmallVector<CallBase *, 8> *InlinedCallSite) {}

bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {}

void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
    const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
    bool Hot) {}

void SampleProfileLoader::findExternalInlineCandidate(
    CallBase *CB, const FunctionSamples *Samples,
    DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {}

/// Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, so as to
/// find out callsites with corresponding inline instances.
///
/// For such callsites,
/// - If it is hot enough, inline the callsites and adds callsites of the callee
///   into the caller. If the call is an indirect call, first promote
///   it to direct call. Each indirect call is limited with a single target.
///
/// - If a callsite is not inlined, merge the its profile to the outline
///   version (if --sample-profile-merge-inlinee is true), or scale the
///   counters of standalone function based on the profile of inlined
///   instances (if --sample-profile-merge-inlinee is false).
///
///   Later passes may consume the updated profiles.
///
/// \param F function to perform iterative inlining.
/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
///     inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
bool SampleProfileLoader::inlineHotFunctions(
    Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {}

bool SampleProfileLoader::tryInlineCandidate(
    InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {}

bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
                                             CallBase *CB) {}

std::optional<InlineCost>
SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {}

bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {}

InlineCost
SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {}

bool SampleProfileLoader::inlineHotFunctionsWithPriority(
    Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {}

void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
    MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
    const Function &F) {}

/// Returns the sorted CallTargetMap \p M by count in descending order.
static SmallVector<InstrProfValueData, 2>
GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) {}

// Generate MD_prof metadata for every branch instruction using the
// edge weights computed during propagation.
void SampleProfileLoader::generateMDProfMetadata(Function &F) {}

/// Once all the branch weights are computed, we emit the MD_prof
/// metadata on BB using the computed values for each of its branches.
///
/// \param F The function to query.
///
/// \returns true if \p F was modified. Returns false, otherwise.
bool SampleProfileLoader::emitAnnotations(Function &F) {}

std::unique_ptr<ProfiledCallGraph>
SampleProfileLoader::buildProfiledCallGraph(Module &M) {}

std::vector<Function *>
SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {}

bool SampleProfileLoader::doInitialization(Module &M,
                                           FunctionAnalysisManager *FAM) {}

// Note that this is a module-level check. Even if one module is errored out,
// the entire build will be errored out. However, the user could make big
// changes to functions in single module but those changes might not be
// performance significant to the whole binary. Therefore, to avoid those false
// positives, we select a reasonable big set of hot functions that are supposed
// to be globally performance significant, only compute and check the mismatch
// within those functions. The function selection is based on two criteria:
// 1) The function is hot enough, which is tuned by a hotness-based
// flag(HotFuncCutoffForStalenessError). 2) The num of function is large enough
// which is tuned by the MinfuncsForStalenessError flag.
bool SampleProfileLoader::rejectHighStalenessProfile(
    Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {}

void SampleProfileLoader::removePseudoProbeInsts(Module &M) {}

bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
                                      ProfileSummaryInfo *_PSI) {}

bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {}
SampleProfileLoaderPass::SampleProfileLoaderPass(
    std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
    IntrusiveRefCntPtr<vfs::FileSystem> FS)
    :{}

PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
                                               ModuleAnalysisManager &AM) {}