llvm/llvm/lib/Analysis/InlineCost.cpp

//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements inline cost analysis.
//
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/InlineCost.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
#include <climits>
#include <limits>
#include <optional>

usingnamespacellvm;

#define DEBUG_TYPE

STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");

static cl::opt<int>
    DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
                     cl::desc("Default amount of inlining to perform"));

// We introduce this option since there is a minor compile-time win by avoiding
// addition of TTI attributes (target-features in particular) to inline
// candidates when they are guaranteed to be the same as top level methods in
// some use cases. If we avoid adding the attribute, we need an option to avoid
// checking these attributes.
static cl::opt<bool> IgnoreTTIInlineCompatible(
    "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
    cl::desc("Ignore TTI attributes compatibility check between callee/caller "
             "during inline cost calculation"));

static cl::opt<bool> PrintInstructionComments(
    "print-instruction-comments", cl::Hidden, cl::init(false),
    cl::desc("Prints comments for instruction based on inline cost analysis"));

static cl::opt<int> InlineThreshold(
    "inline-threshold", cl::Hidden, cl::init(225),
    cl::desc("Control the amount of inlining to perform (default = 225)"));

static cl::opt<int> HintThreshold(
    "inlinehint-threshold", cl::Hidden, cl::init(325),
    cl::desc("Threshold for inlining functions with inline hint"));

static cl::opt<int>
    ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
                          cl::init(45),
                          cl::desc("Threshold for inlining cold callsites"));

static cl::opt<bool> InlineEnableCostBenefitAnalysis(
    "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
    cl::desc("Enable the cost-benefit analysis for the inliner"));

// InlineSavingsMultiplier overrides per TTI multipliers iff it is
// specified explicitly in command line options. This option is exposed
// for tuning and testing.
static cl::opt<int> InlineSavingsMultiplier(
    "inline-savings-multiplier", cl::Hidden, cl::init(8),
    cl::desc("Multiplier to multiply cycle savings by during inlining"));

// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
// specified explicitly in command line options. This option is exposed
// for tuning and testing.
static cl::opt<int> InlineSavingsProfitableMultiplier(
    "inline-savings-profitable-multiplier", cl::Hidden, cl::init(4),
    cl::desc("A multiplier on top of cycle savings to decide whether the "
             "savings won't justify the cost"));

static cl::opt<int>
    InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
                        cl::desc("The maximum size of a callee that get's "
                                 "inlined without sufficient cycle savings"));

// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int> ColdThreshold(
    "inlinecold-threshold", cl::Hidden, cl::init(45),
    cl::desc("Threshold for inlining functions with cold attribute"));

static cl::opt<int>
    HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
                         cl::desc("Threshold for hot callsites "));

static cl::opt<int> LocallyHotCallSiteThreshold(
    "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
    cl::desc("Threshold for locally hot callsites "));

static cl::opt<int> ColdCallSiteRelFreq(
    "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
    cl::desc("Maximum block frequency, expressed as a percentage of caller's "
             "entry frequency, for a callsite to be cold in the absence of "
             "profile information."));

static cl::opt<uint64_t> HotCallSiteRelFreq(
    "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
    cl::desc("Minimum block frequency, expressed as a multiple of caller's "
             "entry frequency, for a callsite to be hot in the absence of "
             "profile information."));

static cl::opt<int>
    InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
              cl::desc("Cost of a single instruction when inlining"));

static cl::opt<int>
    MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
                  cl::desc("Cost of load/store instruction when inlining"));

static cl::opt<int> CallPenalty(
    "inline-call-penalty", cl::Hidden, cl::init(25),
    cl::desc("Call penalty that is applied per callsite when inlining"));

static cl::opt<size_t>
    StackSizeThreshold("inline-max-stacksize", cl::Hidden,
                       cl::init(std::numeric_limits<size_t>::max()),
                       cl::desc("Do not inline functions with a stack size "
                                "that exceeds the specified limit"));

static cl::opt<size_t> RecurStackSizeThreshold(
    "recursive-inline-max-stacksize", cl::Hidden,
    cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller),
    cl::desc("Do not inline recursive functions with a stack "
             "size that exceeds the specified limit"));

static cl::opt<bool> OptComputeFullInlineCost(
    "inline-cost-full", cl::Hidden,
    cl::desc("Compute the full inline cost of a call site even when the cost "
             "exceeds the threshold."));

static cl::opt<bool> InlineCallerSupersetNoBuiltin(
    "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
    cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
             "attributes."));

static cl::opt<bool> DisableGEPConstOperand(
    "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
    cl::desc("Disables evaluation of GetElementPtr with constant operands"));

namespace llvm {
std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {}

std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {}

std::optional<int> getStringFnAttrAsInt(Function *F, StringRef AttrKind) {}

namespace InlineConstants {
int getInstrCost() {}

} // namespace InlineConstants

} // namespace llvm

namespace {
class InlineCostCallAnalyzer;

// This struct is used to store information about inline cost of a
// particular instruction
struct InstructionCostDetail {};

class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {};

/// Carry out call site analysis, in order to evaluate inlinability.
/// NOTE: the type is currently used as implementation detail of functions such
/// as llvm::getInlineCost. Note the function_ref constructor parameters - the
/// expectation is that they come from the outer scope, from the wrapper
/// functions. If we want to support constructing CallAnalyzer objects where
/// lambdas are provided inline at construction, or where the object needs to
/// otherwise survive past the scope of the provided functions, we need to
/// revisit the argument types.
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {};

// Considering forming a binary search, we should find the number of nodes
// which is same as the number of comparisons when lowered. For a given
// number of clusters, n, we can define a recursive function, f(n), to find
// the number of nodes in the tree. The recursion is :
// f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
// and f(n) = n, when n <= 3.
// This will lead a binary tree where the leaf should be either f(2) or f(3)
// when n > 3.  So, the number of comparisons from leaves should be n, while
// the number of non-leaf should be :
//   2^(log2(n) - 1) - 1
//   = 2^log2(n) * 2^-1 - 1
//   = n / 2 - 1.
// Considering comparisons from leaf and non-leaf nodes, we can estimate the
// number of comparisons in a simple closed form :
//   n + n / 2 - 1 = n * 3 / 2 - 1
int64_t getExpectedNumberOfCompare(int NumCaseCluster) {}

/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
class InlineCostCallAnalyzer final : public CallAnalyzer {};

// Return true if CB is the sole call to local function Callee.
static bool isSoleCallToLocalFunction(const CallBase &CB,
                                      const Function &Callee) {}

class InlineCostFeaturesAnalyzer final : public CallAnalyzer {};

} // namespace

/// Test whether the given value is an Alloca-derived function argument.
bool CallAnalyzer::isAllocaDerivedArg(Value *V) {}

void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {}

void InlineCostAnnotationWriter::emitInstructionAnnot(
    const Instruction *I, formatted_raw_ostream &OS) {}

/// If 'V' maps to a SROA candidate, disable SROA for it.
void CallAnalyzer::disableSROA(Value *V) {}

void CallAnalyzer::disableLoadElimination() {}

/// Accumulate a constant GEP offset into an APInt if possible.
///
/// Returns false if unable to compute the offset for any reason. Respects any
/// simplified values known during the analysis of this callsite.
bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {}

/// Use TTI to check whether a GEP is free.
///
/// Respects any simplified values known during the analysis of this callsite.
bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {}

bool CallAnalyzer::visitAlloca(AllocaInst &I) {}

bool CallAnalyzer::visitPHI(PHINode &I) {}

/// Check we can fold GEPs of constant-offset call site argument pointers.
/// This requires target data and inbounds GEPs.
///
/// \return true if the specified GEP can be folded.
bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {}

bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {}

/// Simplify \p I if its operands are constants and update SimplifiedValues.
bool CallAnalyzer::simplifyInstruction(Instruction &I) {}

/// Try to simplify a call to llvm.is.constant.
///
/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
/// we expect calls of this specific intrinsic to be infrequent.
///
/// FIXME: Given that we know CB's parent (F) caller
/// (CandidateCall->getParent()->getParent()), we might be able to determine
/// whether inlining F into F's caller would change how the call to
/// llvm.is.constant would evaluate.
bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {}

bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {}

bool CallAnalyzer::visitBitCast(BitCastInst &I) {}

bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {}

bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {}

bool CallAnalyzer::visitCastInst(CastInst &I) {}

bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {}

bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {}

bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {}

bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
                                            BlockFrequencyInfo *CallerBFI) {}

std::optional<int>
InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
                                                BlockFrequencyInfo *CallerBFI) {}

void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {}

bool CallAnalyzer::visitCmpInst(CmpInst &I) {}

bool CallAnalyzer::visitSub(BinaryOperator &I) {}

bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {}

bool CallAnalyzer::visitFNeg(UnaryOperator &I) {}

bool CallAnalyzer::visitLoad(LoadInst &I) {}

bool CallAnalyzer::visitStore(StoreInst &I) {}

bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {}

bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {}

/// Try to simplify a call site.
///
/// Takes a concrete function and callsite and tries to actually simplify it by
/// analyzing the arguments and call itself with instsimplify. Returns true if
/// it has simplified the callsite to some other entity (a constant), making it
/// free.
bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {}

bool CallAnalyzer::visitCallBase(CallBase &Call) {}

bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {}

bool CallAnalyzer::visitBranchInst(BranchInst &BI) {}

bool CallAnalyzer::visitSelectInst(SelectInst &SI) {}

bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {}

bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {}

bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {}

bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {}

bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {}

bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {}

bool CallAnalyzer::visitInstruction(Instruction &I) {}

/// Analyze a basic block for its contribution to the inline cost.
///
/// This method walks the analyzer over every instruction in the given basic
/// block and accounts for their cost during inlining at this callsite. It
/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
InlineResult
CallAnalyzer::analyzeBlock(BasicBlock *BB,
                           SmallPtrSetImpl<const Value *> &EphValues) {}

/// Compute the base pointer and cumulative constant offsets for V.
///
/// This strips all constant offsets off of V, leaving it the base pointer, and
/// accumulates the total constant offset applied in the returned constant. It
/// returns 0 if V is not a pointer, and returns the constant '0' if there are
/// no constant offsets applied.
ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {}

/// Find dead blocks due to deleted CFG edges during inlining.
///
/// If we know the successor of the current block, \p CurrBB, has to be \p
/// NextBB, the other successors of \p CurrBB are dead if these successors have
/// no live incoming CFG edges.  If one block is found to be dead, we can
/// continue growing the dead block list by checking the successors of the dead
/// blocks to see if all their incoming edges are dead or not.
void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {}

/// Analyze a call site for potential inlining.
///
/// Returns true if inlining this call is viable, and false if it is not
/// viable. It computes the cost and adjusts the threshold based on numerous
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
InlineResult CallAnalyzer::analyze() {}

void InlineCostCallAnalyzer::print(raw_ostream &OS) {}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Dump stats about this call's analysis.
LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
#endif

/// Test that there are no attribute conflicts between Caller and Callee
///        that prevent inlining.
static bool functionsHaveCompatibleAttributes(
    Function *Caller, Function *Callee, TargetTransformInfo &TTI,
    function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {}

int llvm::getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
                          const DataLayout &DL) {}

InlineCost llvm::getInlineCost(
    CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
    function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
    function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {}

std::optional<int> llvm::getInliningCostEstimate(
    CallBase &Call, TargetTransformInfo &CalleeTTI,
    function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
    function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {}

std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
    CallBase &Call, TargetTransformInfo &CalleeTTI,
    function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
    function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {}

std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
    CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
    function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {}

InlineCost llvm::getInlineCost(
    CallBase &Call, Function *Callee, const InlineParams &Params,
    TargetTransformInfo &CalleeTTI,
    function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
    function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
    ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {}

InlineResult llvm::isInlineViable(Function &F) {}

// APIs to create InlineParams based on command line flags and/or other
// parameters.

InlineParams llvm::getInlineParams(int Threshold) {}

InlineParams llvm::getInlineParams() {}

// Compute the default threshold for inlining based on the opt level and the
// size opt level.
static int computeThresholdFromOptLevels(unsigned OptLevel,
                                         unsigned SizeOptLevel) {}

InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {}

PreservedAnalyses
InlineCostAnnotationPrinterPass::run(Function &F,
                                     FunctionAnalysisManager &FAM) {}