//===- InlineFunction.cpp - Code to perform function inlining -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements inlining of a function into a call site, resolving // parameters and the return value as appropriate. // //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" #include "llvm/IR/AttributeMask.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> #include <cstdint> #include <deque> #include <iterator> #include <limits> #include <optional> #include <string> #include <utility> #include <vector> #define DEBUG_TYPE … usingnamespacellvm; usingnamespacellvm::memprof; ProfileCount; static cl::opt<bool> EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); static cl::opt<bool> UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, cl::init(true), cl::desc("Use the llvm.experimental.noalias.scope.decl " "intrinsic during inlining.")); // Disabled by default, because the added alignment assumptions may increase // compile-time and block optimizations. This option is not suitable for use // with frontends that emit comprehensive parameter alignment annotations. static cl::opt<bool> PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::init(false), cl::Hidden, cl::desc("Convert align attributes to assumptions during inlining.")); static cl::opt<unsigned> InlinerAttributeWindow( "max-inst-checked-for-throw-during-inlining", cl::Hidden, cl::desc("the maximum number of instructions analyzed for may throw during " "attribute inference in inlined body"), cl::init(4)); namespace { /// A class for recording information about inlining a landing pad. class LandingPadInliningInfo { … }; } // end anonymous namespace /// Get or create a target for the branch from ResumeInsts. BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { … } /// Forward the 'resume' instruction to the caller's landing pad block. /// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. void LandingPadInliningInfo::forwardResume( ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { … } /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. static Value *getParentPad(Value *EHPad) { … } UnwindDestMemoTy; /// Helper for getUnwindDestToken that does the descendant-ward part of /// the search. static Value *getUnwindDestTokenHelper(Instruction *EHPad, UnwindDestMemoTy &MemoMap) { … } /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, /// return that pad instruction. If it unwinds to caller, return /// ConstantTokenNone. If it does not have a definitive unwind destination, /// return nullptr. /// /// This routine gets invoked for calls in funclets in inlinees when inlining /// an invoke. Since many funclets don't have calls inside them, it's queried /// on-demand rather than building a map of pads to unwind dests up front. /// Determining a funclet's unwind dest may require recursively searching its /// descendants, and also ancestors and cousins if the descendants don't provide /// an answer. Since most funclets will have their unwind dest immediately /// available as the unwind dest of a catchswitch or cleanupret, this routine /// searches top-down from the given pad and then up. To avoid worst-case /// quadratic run-time given that approach, it uses a memo map to avoid /// re-processing funclet trees. The callers that rewrite the IR as they go /// take advantage of this, for correctness, by checking/forcing rewritten /// pads' entries to match the original callee view. static Value *getUnwindDestToken(Instruction *EHPad, UnwindDestMemoTy &MemoMap) { … } /// When we inline a basic block into an invoke, /// we have to turn all of the calls that can throw into invokes. /// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( BasicBlock *BB, BasicBlock *UnwindEdge, UnwindDestMemoTy *FuncletUnwindMap = nullptr) { … } /// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo) { … } /// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo) { … } static bool haveCommonPrefix(MDNode *MIBStackContext, MDNode *CallsiteStackContext) { … } static void removeMemProfMetadata(CallBase *Call) { … } static void removeCallsiteMetadata(CallBase *Call) { … } static void updateMemprofMetadata(CallBase *CI, const std::vector<Metadata *> &MIBList) { … } // Update the metadata on the inlined copy ClonedCall of a call OrigCall in the // inlined callee body, based on the callsite metadata InlinedCallsiteMD from // the call that was inlined. static void propagateMemProfHelper(const CallBase *OrigCall, CallBase *ClonedCall, MDNode *InlinedCallsiteMD) { … } // Update memprof related metadata (!memprof and !callsite) based on the // inlining of Callee into the callsite at CB. The updates include merging the // inlined callee's callsite metadata with that of the inlined call, // and moving the subset of any memprof contexts to the inlined callee // allocations if they match the new inlined call stack. static void propagateMemProfMetadata(Function *Callee, CallBase &CB, bool ContainsMemProfMetadata, const ValueMap<const Value *, WeakTrackingVH> &VMap) { … } /// When inlining a call site that has !llvm.mem.parallel_loop_access, /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should /// be propagated to all memory-accessing cloned instructions. static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, Function::iterator FEnd) { … } /// Bundle operands of the inlined function must be added to inlined call sites. static void PropagateOperandBundles(Function::iterator InlinedBB, Instruction *CallSiteEHPad) { … } namespace { /// Utility for cloning !noalias and !alias.scope metadata. When a code region /// using scoped alias metadata is inlined, the aliasing relationships may not /// hold between the two version. It is necessary to create a deep clone of the /// metadata, putting the two versions in separate scope domains. class ScopedAliasMetadataDeepCloner { … }; } // namespace ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( const Function *F) { … } void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { … } void ScopedAliasMetadataDeepCloner::clone() { … } void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, Function::iterator FEnd) { … } /// If the inlined function has noalias arguments, /// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, const DataLayout &DL, AAResults *CalleeAAR, ClonedCodeInfo &InlinedFunctionInfo) { … } static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, ReturnInst *End) { … } // Add attributes from CB params and Fn attributes that can always be propagated // to the corresponding argument / inner callbases. static void AddParamAndFnBasicAttributes(const CallBase &CB, ValueToValueMapTy &VMap, ClonedCodeInfo &InlinedFunctionInfo) { … } // Only allow these white listed attributes to be propagated back to the // callee. This is because other attributes may only be valid on the call // itself, i.e. attributes such as signext and zeroext. // Attributes that are always okay to propagate as if they are violated its // immediate UB. static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) { … } // Attributes that need additional checks as propagating them may change // behavior or cause new UB. static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { … } static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap, ClonedCodeInfo &InlinedFunctionInfo) { … } /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { … } static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI, Function *CalledFunc) { … } /// When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Type *ByValType, Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, MaybeAlign ByValAlignment) { … } // Check whether this Value is used by a lifetime intrinsic. static bool isUsedByLifetimeMarker(Value *V) { … } // Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { … } /// Return the result of AI->isStaticAlloca() if AI were moved to the entry /// block. Allocas used in inalloca calls and allocas of dynamic array size /// cannot be static. static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { … } /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, LLVMContext &Ctx, DenseMap<const MDNode *, MDNode *> &IANodes) { … } /// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall, bool CalleeHasDebugInfo) { … } #undef DEBUG_TYPE #define DEBUG_TYPE … /// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB. static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, const CallBase &CB) { … } static void trackInlinedStores(Function::iterator Start, Function::iterator End, const CallBase &CB) { … } /// Update inlined instructions' DIAssignID metadata. We need to do this /// otherwise a function inlined more than once into the same function /// will cause DIAssignID to be shared by many instructions. static void fixupAssignments(Function::iterator Start, Function::iterator End) { … } #undef DEBUG_TYPE #define DEBUG_TYPE … /// Update the block frequencies of the caller after a callee has been inlined. /// /// Each block cloned into the caller has its block frequency scaled by the /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of /// callee's entry block gets the same frequency as the callsite block and the /// relative frequencies of all cloned blocks remain the same after cloning. static void updateCallerBFI(BasicBlock *CallSiteBlock, const ValueToValueMapTy &VMap, BlockFrequencyInfo *CallerBFI, BlockFrequencyInfo *CalleeBFI, const BasicBlock &CalleeEntryBlock) { … } /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const ProfileCount &CalleeEntryCount, const CallBase &TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI) { … } void llvm::updateProfileCallee( Function *Callee, int64_t EntryDelta, const ValueMap<const Value *, WeakTrackingVH> *VMap) { … } /// An operand bundle "clang.arc.attachedcall" on a call indicates the call /// result is implicitly consumed by a call to retainRV or claimRV immediately /// after the call. This function inlines the retainRV/claimRV calls. /// /// There are three cases to consider: /// /// 1. If there is a call to autoreleaseRV that takes a pointer to the returned /// object in the callee return block, the autoreleaseRV call and the /// retainRV/claimRV call in the caller cancel out. If the call in the caller /// is a claimRV call, a call to objc_release is emitted. /// /// 2. If there is a call in the callee return block that doesn't have operand /// bundle "clang.arc.attachedcall", the operand bundle on the original call /// is transferred to the call in the callee. /// /// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is /// a retainRV call. static void inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, const SmallVectorImpl<ReturnInst *> &Returns) { … } // In contextual profiling, when an inline succeeds, we want to remap the // indices of the callee into the index space of the caller. We can't just leave // them as-is because the same callee may appear in other places in this caller // (other callsites), and its (callee's) counters and sub-contextual profile // tree would be potentially different. // Not all BBs of the callee may survive the opportunistic DCE InlineFunction // does (same goes for callsites in the callee). // We will return a pair of vectors, one for basic block IDs and one for // callsites. For such a vector V, V[Idx] will be -1 if the callee // instrumentation with index Idx did not survive inlining, and a new value // otherwise. // This function will update the caller's instrumentation intrinsics // accordingly, mapping indices as described above. We also replace the "name" // operand because we use it to distinguish between "own" instrumentation and // "from callee" instrumentation when performing the traversal of the CFG of the // caller. We traverse depth-first from the callsite's BB and up to the point we // hit BBs owned by the caller. // The return values will be then used to update the contextual // profile. Note: we only update the "name" and "index" operands in the // instrumentation intrinsics, we leave the hash and total nr of indices as-is, // it's not worth updating those. static const std::pair<std::vector<int64_t>, std::vector<int64_t>> remapIndices(Function &Caller, BasicBlock *StartBB, PGOContextualProfile &CtxProf, uint32_t CalleeCounters, uint32_t CalleeCallsites) { … } // Inline. If successful, update the contextual profile (if a valid one is // given). // The contextual profile data is organized in trees, as follows: // - each node corresponds to a function // - the root of each tree corresponds to an "entrypoint" - e.g. // RPC handler for server side // - the path from the root to a node is a particular call path // - the counters stored in a node are counter values observed in that // particular call path ("context") // - the edges between nodes are annotated with callsite IDs. // // Updating the contextual profile after an inlining means, at a high level, // copying over the data of the callee, **intentionally without any value // scaling**, and copying over the callees of the inlined callee. llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, PGOContextualProfile &CtxProf, bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime, Function *ForwardVarArgsTo) { … } /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime, Function *ForwardVarArgsTo) { … }