#include "llvm/Transforms/IPO/SampleProfile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
#include "llvm/Transforms/IPO/SampleContextTracker.h"
#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Instrumentation.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
#include <limits>
#include <map>
#include <memory>
#include <queue>
#include <string>
#include <system_error>
#include <utility>
#include <vector>
usingnamespacellvm;
usingnamespacesampleprof;
usingnamespacellvm::sampleprofutil;
ProfileCount;
#define DEBUG_TYPE …
#define CSINLINE_DEBUG …
STATISTIC(NumCSInlined,
"Number of functions inlined with context sensitive profile");
STATISTIC(NumCSNotInlined,
"Number of functions not inlined with context sensitive profile");
STATISTIC(NumMismatchedProfile,
"Number of functions with CFG mismatched profile");
STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
STATISTIC(NumDuplicatedInlinesite,
"Number of inlined callsites with a partial distribution factor");
STATISTIC(NumCSInlinedHitMinLimit,
"Number of functions with FDO inline stopped due to min size limit");
STATISTIC(NumCSInlinedHitMaxLimit,
"Number of functions with FDO inline stopped due to max size limit");
STATISTIC(
NumCSInlinedHitGrowthLimit,
"Number of functions with FDO inline stopped due to growth size limit");
static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
static cl::opt<std::string> SampleProfileRemappingFile(
"sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
cl::opt<bool> SalvageStaleProfile(
"salvage-stale-profile", cl::Hidden, cl::init(false),
cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
"location for sample profile query."));
cl::opt<bool>
SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false),
cl::desc("Salvage unused profile by matching with new "
"functions on call graph."));
cl::opt<bool> ReportProfileStaleness(
"report-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute and report stale profile statistical metrics."));
cl::opt<bool> PersistProfileStaleness(
"persist-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute stale profile statistical metrics and write it into the "
"native object file(.llvm_stats section)."));
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
static cl::opt<bool> ProfileSampleBlockAccurate(
"profile-sample-block-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
"branches and calls as having 0 samples. Otherwise, treat "
"them conservatively as unknown. "));
static cl::opt<bool> ProfileAccurateForSymsInList(
"profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
cl::desc("For symbols in profile symbol list, regard their profiles to "
"be accurate. It may be overriden by profile-sample-accurate. "));
static cl::opt<bool> ProfileMergeInlinee(
"sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
cl::desc("Merge past inlinee's profile to outline version if sample "
"profile loader decided not to inline a call site. It will "
"only be enabled when top-down order of profile loading is "
"enabled. "));
static cl::opt<bool> ProfileTopDownLoad(
"sample-profile-top-down-load", cl::Hidden, cl::init(true),
cl::desc("Do profile annotation and inlining for functions in top-down "
"order of call graph during sample profile loading. It only "
"works for new pass manager. "));
static cl::opt<bool>
UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
cl::desc("Process functions in a top-down order "
"defined by the profiled call graph when "
"-sample-profile-top-down-load is on."));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
cl::desc("Inline cold call sites in profile loader if it's beneficial "
"for code size."));
static cl::opt<bool> DisableSampleLoaderInlining(
"disable-sample-loader-inlining", cl::Hidden, cl::init(false),
cl::desc("If true, artifically skip inline transformation in sample-loader "
"pass, and merge (or scale) profiles (as configured by "
"--sample-profile-merge-inlinee)."));
namespace llvm {
cl::opt<bool>
SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
cl::desc("Sort profiled recursion by edge weights."));
cl::opt<int> ProfileInlineGrowthLimit(
"sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
cl::desc("The size growth ratio limit for proirity-based sample profile "
"loader inlining."));
cl::opt<int> ProfileInlineLimitMin(
"sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
cl::desc("The lower bound of size growth limit for "
"proirity-based sample profile loader inlining."));
cl::opt<int> ProfileInlineLimitMax(
"sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
cl::desc("The upper bound of size growth limit for "
"proirity-based sample profile loader inlining."));
cl::opt<int> SampleHotCallSiteThreshold(
"sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
cl::desc("Hot callsite threshold for proirity-based sample profile loader "
"inlining."));
cl::opt<int> SampleColdCallSiteThreshold(
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
}
static cl::opt<unsigned> ProfileICPRelativeHotness(
"sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
cl::desc(
"Relative hotness percentage threshold for indirect "
"call promotion in proirity-based sample profile loader inlining."));
static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
"sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
cl::desc(
"Skip relative hotness check for ICP up to given number of targets."));
static cl::opt<unsigned> HotFuncCutoffForStalenessError(
"hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000),
cl::desc("A function is considered hot for staleness error check if its "
"total sample count is above the specified percentile"));
static cl::opt<unsigned> MinfuncsForStalenessError(
"min-functions-for-staleness-error", cl::Hidden, cl::init(50),
cl::desc("Skip the check if the number of hot functions is smaller than "
"the specified number."));
static cl::opt<unsigned> PrecentMismatchForStalenessError(
"precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
cl::desc("Reject the profile if the mismatch percent is higher than the "
"given number."));
static cl::opt<bool> CallsitePrioritizedInline(
"sample-profile-prioritized-inline", cl::Hidden,
cl::desc("Use call site prioritized inlining for sample profile loader."
"Currently only CSSPGO is supported."));
static cl::opt<bool> UsePreInlinerDecision(
"sample-profile-use-preinliner", cl::Hidden,
cl::desc("Use the preinliner decisions stored in profile context."));
static cl::opt<bool> AllowRecursiveInline(
"sample-profile-recursive-inline", cl::Hidden,
cl::desc("Allow sample loader inliner to inline recursive calls."));
static cl::opt<bool> RemoveProbeAfterProfileAnnotation(
"sample-profile-remove-probe", cl::Hidden, cl::init(false),
cl::desc("Remove pseudo-probe after sample profile annotation."));
static cl::opt<std::string> ProfileInlineReplayFile(
"sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
"Optimization remarks file containing inline remarks to be replayed "
"by inlining from sample profile loader."),
cl::Hidden);
static cl::opt<ReplayInlinerSettings::Scope> ProfileInlineReplayScope(
"sample-profile-inline-replay-scope",
cl::init(ReplayInlinerSettings::Scope::Function),
cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
"Replay on functions that have remarks associated "
"with them (default)"),
clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
"Replay on the entire module")),
cl::desc("Whether inline replay should be applied to the entire "
"Module or just the Functions (default) that are present as "
"callers in remarks during sample profile inlining."),
cl::Hidden);
static cl::opt<ReplayInlinerSettings::Fallback> ProfileInlineReplayFallback(
"sample-profile-inline-replay-fallback",
cl::init(ReplayInlinerSettings::Fallback::Original),
cl::values(
clEnumValN(
ReplayInlinerSettings::Fallback::Original, "Original",
"All decisions not in replay send to original advisor (default)"),
clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
"AlwaysInline", "All decisions not in replay are inlined"),
clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
"All decisions not in replay are not inlined")),
cl::desc("How sample profile inline replay treats sites that don't come "
"from the replay. Original: defers to original advisor, "
"AlwaysInline: inline all sites not in replay, NeverInline: "
"inline no sites not in replay"),
cl::Hidden);
static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat(
"sample-profile-inline-replay-format",
cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
cl::values(
clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
"<Line Number>:<Column Number>"),
clEnumValN(CallSiteFormat::Format::LineDiscriminator,
"LineDiscriminator", "<Line Number>.<Discriminator>"),
clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
"LineColumnDiscriminator",
"<Line Number>:<Column Number>.<Discriminator> (default)")),
cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
static cl::opt<unsigned>
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
cl::desc("Max number of promotions for a single indirect "
"call callsite in sample profile loader"));
static cl::opt<bool> OverwriteExistingWeights(
"overwrite-existing-weights", cl::Hidden, cl::init(false),
cl::desc("Ignore existing branch weights on IR and always overwrite."));
static cl::opt<bool> AnnotateSampleProfileInlinePhase(
"annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
"sample-profile inline pass name."));
namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
}
namespace {
BlockWeightMap;
EquivalenceClassMap;
Edge;
EdgeWeightMap;
BlockEdgeMap;
class GUIDToFuncNameMapper { … };
struct InlineCandidate { … };
struct CandidateComparer { … };
CandidateQueue;
class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> { … };
}
namespace llvm {
template <>
inline bool SampleProfileInference<Function>::isExit(const BasicBlock *BB) { … }
template <>
inline void SampleProfileInference<Function>::findUnlikelyJumps(
const std::vector<const BasicBlockT *> &BasicBlocks,
BlockEdgeMap &Successors, FlowFunction &Func) { … }
template <>
void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
Function &F) { … }
}
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { … }
const FunctionSamples *
SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { … }
std::vector<const FunctionSamples *>
SampleProfileLoader::findIndirectCallFunctionSamples(
const Instruction &Inst, uint64_t &Sum) const { … }
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { … }
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) { … }
static void
updateIDTMetaData(Instruction &Inst,
const SmallVectorImpl<InstrProfValueData> &CallTargets,
uint64_t Sum) { … }
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
SmallVector<CallBase *, 8> *InlinedCallSite) { … }
bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) { … }
void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
bool Hot) { … }
void SampleProfileLoader::findExternalInlineCandidate(
CallBase *CB, const FunctionSamples *Samples,
DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) { … }
bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { … }
bool SampleProfileLoader::tryInlineCandidate(
InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) { … }
bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB) { … }
std::optional<InlineCost>
SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) { … }
bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) { … }
InlineCost
SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { … }
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { … }
void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
const Function &F) { … }
static SmallVector<InstrProfValueData, 2>
GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) { … }
void SampleProfileLoader::generateMDProfMetadata(Function &F) { … }
bool SampleProfileLoader::emitAnnotations(Function &F) { … }
std::unique_ptr<ProfiledCallGraph>
SampleProfileLoader::buildProfiledCallGraph(Module &M) { … }
std::vector<Function *>
SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) { … }
bool SampleProfileLoader::doInitialization(Module &M,
FunctionAnalysisManager *FAM) { … }
bool SampleProfileLoader::rejectHighStalenessProfile(
Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) { … }
void SampleProfileLoader::removePseudoProbeInsts(Module &M) { … }
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI) { … }
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { … }
SampleProfileLoaderPass::SampleProfileLoaderPass(
std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
IntrusiveRefCntPtr<vfs::FileSystem> FS)
: … { … }
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) { … }