//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass munges the code in the input function to better prepare it for // SelectionDAG-based code generation. This works around limitations in it's // basic-block-at-a-time approach. It should eventually be removed. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstdint> #include <iterator> #include <limits> #include <memory> #include <optional> #include <utility> #include <vector> usingnamespacellvm; usingnamespacellvm::PatternMatch; #define DEBUG_TYPE … STATISTIC(NumBlocksElim, "Number of blocks eliminated"); STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"); STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); STATISTIC(NumMemoryInstsPhiCreated, "Number of phis created when address " "computations were sunk to memory instructions"); STATISTIC(NumMemoryInstsSelectCreated, "Number of select created when address " "computations were sunk to memory instructions"); STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumAndsAdded, "Number of and mask instructions added to form ext loads"); STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); static cl::opt<bool> DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare")); static cl::opt<bool> DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); static cl::opt<bool> AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs.")); static cl::opt<bool> EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches.")); static cl::opt<bool> DisableStoreExtract( "disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); static cl::opt<bool> StressStoreExtract( "stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); static cl::opt<bool> DisableExtLdPromotion( "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare")); static cl::opt<bool> StressExtLdPromotion( "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare")); static cl::opt<bool> DisablePreheaderProtect( "disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders")); static cl::opt<bool> ProfileGuidedSectionPrefix( "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions")); static cl::opt<bool> ProfileUnknownInSpecialSection( "profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. ")); static cl::opt<bool> BBSectionsGuidedSectionPrefix( "bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles.")); static cl::opt<uint64_t> FreqRatioToSkipMerge( "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio")); static cl::opt<bool> ForceSplitStore( "force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says.")); static cl::opt<bool> EnableTypePromotionMerge( "cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); static cl::opt<bool> DisableComplexAddrModes( "disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst.")); static cl::opt<bool> AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking.")); static cl::opt<bool> AddrSinkNewSelects( "addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking.")); static cl::opt<bool> AddrSinkCombineBaseReg( "addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking.")); static cl::opt<bool> AddrSinkCombineBaseGV( "addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking.")); static cl::opt<bool> AddrSinkCombineBaseOffs( "addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking.")); static cl::opt<bool> AddrSinkCombineScaledReg( "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking.")); static cl::opt<bool> EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP.")); static cl::opt<bool> EnableICMP_EQToICMP_ST( "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); static cl::opt<bool> VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare.")); static cl::opt<bool> OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare")); static cl::opt<unsigned> HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function.")); static cl::opt<unsigned> MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at")); static cl::opt<bool> DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes.")); namespace { enum ExtType { … }; enum ModifyDT { … }; SetOfInstrs; TypeIsSExt; InstrToOrigTy; SExts; ValueToSExts; class TypePromotionTransaction; class CodeGenPrepare { … }; class CodeGenPrepareLegacyPass : public FunctionPass { … }; } // end anonymous namespace char CodeGenPrepareLegacyPass::ID = …; bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) { … } INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE, "Optimize for code generation", false, false) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE, "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPrepareLegacyPass() { … } PreservedAnalyses CodeGenPreparePass::run(Function &F, FunctionAnalysisManager &AM) { … } bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) { … } bool CodeGenPrepare::_run(Function &F) { … } bool CodeGenPrepare::eliminateAssumptions(Function &F) { … } /// An instruction is about to be deleted, so remove all references to it in our /// GEP-tracking data strcutures. void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { … } // Verify BFI has been updated correctly by recomputing BFI and comparing them. void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { … } /// Merge basic blocks which are connected by a single edge, where one of the /// basic blocks has a single successor pointing to the other basic block, /// which has a single predecessor. bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) { … } /// Find a destination block from BB if BB is mergeable empty block. BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { … } /// Eliminate blocks that contain only PHI nodes, debug info directives, and an /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split /// edges in ways that are non-optimal for isel. Start by eliminating these /// blocks so we can split them the way we want them. bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { … } bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader) { … } /// Return true if we can merge BB into DestBB if there is a single /// unconditional branch between them, and BB contains no other non-phi /// instructions. bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const { … } /// Replace all old uses with new ones, and push the updated BBs into FreshBBs. static void replaceAllUsesWith(Value *Old, Value *New, SmallSet<BasicBlock *, 32> &FreshBBs, bool IsHuge) { … } /// Eliminate a basic block that has only phi's and an unconditional branch in /// it. void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { … } // Computes a map of base pointer relocation instructions to corresponding // derived pointer relocation instructions given a vector of all relocate calls static void computeBaseDerivedRelocateMap( const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls, MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> &RelocateInstMap) { … } // Accepts a GEP and extracts the operands into a vector provided they're all // small integer constants static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl<Value *> &OffsetV) { … } // Takes a RelocatedBase (base pointer relocation instruction) and Targets to // replace, computes a replacement, and affects it. static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl<GCRelocateInst *> &Targets) { … } // Turns this: // // %base = ... // %ptr = gep %base + 15 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) // %base' = relocate(%tok, i32 4, i32 4) // %ptr' = relocate(%tok, i32 4, i32 5) // %val = load %ptr' // // into this: // // %base = ... // %ptr = gep %base + 15 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) // %base' = gc.relocate(%tok, i32 4, i32 4) // %ptr' = gep %base' + 15 // %val = load %ptr' bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) { … } /// Sink the specified cast instruction into its user blocks. static bool SinkCast(CastInst *CI) { … } /// If the specified cast instruction is a noop copy (e.g. it's casting from /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to /// reduce the number of virtual registers that must be created and coalesced. /// /// Return true if any changes are made. static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL) { … } // Match a simple increment by constant operation. Note that if a sub is // matched, the step is negated (as if the step had been canonicalized to // an add, even though we leave the instruction alone.) static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step) { … } /// If given \p PN is an inductive variable with value IVInc coming from the /// backedge, and on each iteration it gets increased by Step, return pair /// <IVInc, Step>. Otherwise, return std::nullopt. static std::optional<std::pair<Instruction *, Constant *>> getIVIncrement(const PHINode *PN, const LoopInfo *LI) { … } static bool isIVIncrement(const Value *V, const LoopInfo *LI) { … } bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID) { … } /// Match special-case patterns that check for unsigned add overflow. static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add) { … } /// Try to combine the compare into a call to the llvm.uadd.with.overflow /// intrinsic. Return true if any changes were made. bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT) { … } bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT) { … } /// Sink the given CmpInst into user blocks to reduce the number of virtual /// registers that must be created and coalesced. This is a clear win except on /// targets with multiple condition code registers (PowerPC), where it might /// lose; some adjustment may be wanted there. /// /// Return true if any changes are made. static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { … } /// For pattern like: /// /// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) /// ... /// DomBB: /// ... /// br DomCond, TrueBB, CmpBB /// CmpBB: (with DomBB being the single predecessor) /// ... /// Cmp = icmp eq CmpOp0, CmpOp1 /// ... /// /// It would use two comparison on targets that lowering of icmp sgt/slt is /// different from lowering of icmp eq (PowerPC). This function try to convert /// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. /// After that, DomCond and Cmp can use the same comparison so reduce one /// comparison. /// /// Return true if any changes are made. static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI) { … } /// Many architectures use the same instruction for both subtract and cmp. Try /// to swap cmp operands to match subtract operations to allow for CSE. static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) { … } static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL) { … } static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, PHINode *&LoopIncrPNOut) { … } // Try to transform: // // for(i = Start; i < End; ++i) // Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant; // // -> // // Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant; // for(i = Start; i < End; ++i, ++rem) // Rem = rem == RemAmtLoopInvariant ? 0 : Rem; // // Currently only implemented for `IncrLoopInvariant` being zero. static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallSet<BasicBlock *, 32> &FreshBBs, bool IsHuge) { … } bool CodeGenPrepare::optimizeURem(Instruction *Rem) { … } bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { … } /// Duplicate and sink the given 'and' instruction into user blocks where it is /// used in a compare to allow isel to generate better code for targets where /// this operation can be combined. /// /// Return true if any changes are made. static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts) { … } /// Check if the candidates could be combined with a shift instruction, which /// includes: /// 1. Truncate instruction /// 2. And instruction and the imm is a mask of the low bits: /// imm & (imm+1) == 0 static bool isExtractBitsCandidateUse(Instruction *User) { … } /// Sink both shift and truncate instruction to the use of truncate's BB. static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL) { … } /// Sink the shift *right* instruction into user blocks if the uses could /// potentially be combined with this shift instruction and generate BitExtract /// instruction. It will only be applied if the architecture supports BitExtract /// instruction. Here is an example: /// BB1: /// %x.extract.shift = lshr i64 %arg1, 32 /// BB2: /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 /// ==> /// /// BB2: /// %x.extract.shift.1 = lshr i64 %arg1, 32 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 /// /// CodeGen will recognize the pattern in BB2 and generate BitExtract /// instruction. /// Return true if any changes are made. static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL) { … } /// If counting leading or trailing zeros is an expensive operation and a zero /// input is defined, add a check for zero to avoid calling the intrinsic. /// /// We want to transform: /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) /// /// into: /// entry: /// %cmpz = icmp eq i64 %A, 0 /// br i1 %cmpz, label %cond.end, label %cond.false /// cond.false: /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) /// br label %cond.end /// cond.end: /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] /// /// If the transform is performed, return true and set ModifiedDT to true. static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet<BasicBlock *, 32> &FreshBBs, bool IsHugeFunc) { … } bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { … } static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI) { … } /// Look for opportunities to duplicate return instructions to the predecessor /// to enable tail call optimizations. The case it is currently looking for is /// the following one. Known intrinsics or library function that may be tail /// called are taken into account as well. /// @code /// bb0: /// %tmp0 = tail call i32 @f0() /// br label %return /// bb1: /// %tmp1 = tail call i32 @f1() /// br label %return /// bb2: /// %tmp2 = tail call i32 @f2() /// br label %return /// return: /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] /// ret i32 %retval /// @endcode /// /// => /// /// @code /// bb0: /// %tmp0 = tail call i32 @f0() /// ret i32 %tmp0 /// bb1: /// %tmp1 = tail call i32 @f1() /// ret i32 %tmp1 /// bb2: /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT) { … } //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// namespace { /// This is an extended version of TargetLowering::AddrMode /// which holds actual Value*'s for register values. struct ExtAddrMode : public TargetLowering::AddrMode { … }; #ifndef NDEBUG static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { AM.print(OS); return OS; } #endif #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ExtAddrMode::print(raw_ostream &OS) const { bool NeedPlus = false; OS << "["; if (InBounds) OS << "inbounds "; if (BaseGV) { OS << "GV:"; BaseGV->printAsOperand(OS, /*PrintType=*/false); NeedPlus = true; } if (BaseOffs) { OS << (NeedPlus ? " + " : "") << BaseOffs; NeedPlus = true; } if (BaseReg) { OS << (NeedPlus ? " + " : "") << "Base:"; BaseReg->printAsOperand(OS, /*PrintType=*/false); NeedPlus = true; } if (Scale) { OS << (NeedPlus ? " + " : "") << Scale << "*"; ScaledReg->printAsOperand(OS, /*PrintType=*/false); } OS << ']'; } LLVM_DUMP_METHOD void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } #endif } // end anonymous namespace namespace { /// This class provides transaction based operation on the IR. /// Every change made through this class is recorded in the internal state and /// can be undone (rollback) until commit is called. /// CGP does not check if instructions could be speculatively executed when /// moved. Preserving the original location would pessimize the debugging /// experience, as well as negatively impact the quality of sample PGO. class TypePromotionTransaction { … }; } // end anonymous namespace void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { … } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { … } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { … } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { … } Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) { … } Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd, Type *Ty) { … } Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd, Type *Ty) { … } TypePromotionTransaction::ConstRestorationPt TypePromotionTransaction::getRestorationPoint() const { … } bool TypePromotionTransaction::commit() { … } void TypePromotionTransaction::rollback( TypePromotionTransaction::ConstRestorationPt Point) { … } namespace { /// A helper class for matching addressing modes. /// /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { … }; class PhiNodeSet; /// An iterator for PhiNodeSet. class PhiNodeSetIterator { … }; /// Keeps a set of PHINodes. /// /// This is a minimal set implementation for a specific use case: /// It is very fast when there are very few elements, but also provides good /// performance when there are many. It is similar to SmallPtrSet, but also /// provides iteration by insertion order, which is deterministic and stable /// across runs. It is also similar to SmallSetVector, but provides removing /// elements in O(1) time. This is achieved by not actually removing the element /// from the underlying vector, so comes at the cost of using more memory, but /// that is fine, since PhiNodeSets are used as short lived objects. class PhiNodeSet { … }; PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start) : … { … } PHINode *PhiNodeSetIterator::operator*() const { … } PhiNodeSetIterator &PhiNodeSetIterator::operator++() { … } bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const { … } bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { … } /// Keep track of simplification of Phi nodes. /// Accept the set of all phi nodes and erase phi node from this set /// if it is simplified. class SimplificationTracker { … }; /// A helper class for combining addressing modes. class AddressingModeCombiner { … }; } // end anonymous namespace /// Try adding ScaleReg*Scale to the current addressing mode. /// Return true and update AddrMode if this addr mode is legal for the target, /// false if not. bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth) { … } /// This is a little filter, which returns true if an addressing computation /// involving I might be folded into a load/store accessing it. /// This doesn't need to be perfect, but needs to accept at least /// the set of instructions that MatchOperationAddr can. static bool MightBeFoldableInst(Instruction *I) { … } /// Check whether or not \p Val is a legal instruction for \p TLI. /// \note \p Val is assumed to be the product of some type promotion. /// Therefore if \p Val has an undefined state in \p TLI, this is assumed /// to be legal, as the non-promoted value would have had the same state. static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val) { … } namespace { /// Hepler class to perform type promotion. class TypePromotionHelper { … }; } // end anonymous namespace bool TypePromotionHelper::canGetThrough(const Instruction *Inst, Type *ConsideredExtType, const InstrToOrigTy &PromotedInsts, bool IsSExt) { … } TypePromotionHelper::Action TypePromotionHelper::getAction( Instruction *Ext, const SetOfInstrs &InsertedInsts, const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { … } Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( Instruction *SExt, TypePromotionTransaction &TPT, InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { … } Value *TypePromotionHelper::promoteOperandForOther( Instruction *Ext, TypePromotionTransaction &TPT, InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, bool IsSExt) { … } /// Check whether or not promoting an instruction to a wider type is profitable. /// \p NewCost gives the cost of extension instructions created by the /// promotion. /// \p OldCost gives the cost of extension instructions before the promotion /// plus the number of instructions that have been /// matched in the addressing mode the promotion. /// \p PromotedOperand is the value that has been promoted. /// \return True if the promotion is profitable, false otherwise. bool AddressingModeMatcher::isPromotionProfitable( unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { … } /// Given an instruction or constant expr, see if we can fold the operation /// into the addressing mode. If so, update the addressing mode and return /// true, otherwise return false without modifying AddrMode. /// If \p MovedAway is not NULL, it contains the information of whether or /// not AddrInst has to be folded into the addressing mode on success. /// If \p MovedAway == true, \p AddrInst will not be part of the addressing /// because it has been moved away. /// Thus AddrInst must not be added in the matched instructions. /// This state can happen when AddrInst is a sext, since it may be moved away. /// Therefore, AddrInst may not be valid when MovedAway is true and it must /// not be referenced anymore. bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, bool *MovedAway) { … } /// If we can, try to add the value of 'Addr' into the current addressing mode. /// If Addr can't be added to AddrMode this returns false and leaves AddrMode /// unmodified. This assumes that Addr is either a pointer type or intptr_t /// for the target. /// bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { … } /// Check to see if all uses of OpVal by the specified inline asm call are due /// to memory operands. If so, return true, otherwise return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI) { … } /// Recursively walk all the uses of I until we find a memory use. /// If we find an obviously non-foldable instruction, return true. /// Add accessed addresses and types to MemoryUses. static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses, SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts) { … } static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { … } /// Return true if Val is already known to be live at the use site that we're /// folding it into. If so, there is no cost to include it in the addressing /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the /// instruction already. bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2) { … } /// It is possible for the addressing mode of the machine to fold the specified /// instruction into a load or store that ultimately uses it. /// However, the specified instruction has multiple uses. /// Given this, it may actually increase register pressure to fold it /// into the load. For example, consider this code: /// /// X = ... /// Y = X+1 /// use(Y) -> nonload/store /// Z = Y+1 /// load Z /// /// In this case, Y has multiple uses, and can be folded into the load of Z /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to /// be live at the use(Y) line. If we don't fold Y into load Z, we use one /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the /// number of computations either. /// /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If /// X was live across 'load Z' for other reasons, we actually *would* want to /// fold the addressing mode in the Z case. This would make Y die earlier. bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode( Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) { … } /// Return true if the specified values are defined in a /// different basic block than BB. static bool IsNonLocalValue(Value *V, BasicBlock *BB) { … } /// Sink addressing mode computation immediate before MemoryInst if doing so /// can be done without increasing register pressure. The need for the /// register pressure constraint means this can end up being an all or nothing /// decision for all uses of the same addressing computation. /// /// Load and Store Instructions often have addressing modes that can do /// significant amounts of computation. As such, instruction selection will try /// to get the load or store to do as much computation as possible for the /// program. The problem is that isel can only see within a single block. As /// such, we sink as much legal addressing mode work into the block as possible. /// /// This method is used to optimize both load/store and inline asms with memory /// operands. It's also used to sink addressing computations feeding into cold /// call sites into their (cold) basic block. /// /// The motivation for handling sinking into cold blocks is that doing so can /// both enable other address mode sinking (by satisfying the register pressure /// constraint above), and reduce register pressure globally (by removing the /// addressing mode computation from the fast path entirely.). bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace) { … } /// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find /// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can /// only handle a 2 operand GEP in the same basic block or a splat constant /// vector. The 2 operands to the GEP must have a scalar pointer and a vector /// index. /// /// If the existing GEP has a vector base pointer that is splat, we can look /// through the splat to find the scalar pointer. If we can't find a scalar /// pointer there's nothing we can do. /// /// If we have a GEP with more than 2 indices where the middle indices are all /// zeroes, we can replace it with 2 GEPs where the second has 2 operands. /// /// If the final index isn't a vector or is a splat, we can emit a scalar GEP /// followed by a GEP with an all zeroes vector index. This will enable /// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a /// zero index. bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr) { … } /// If there are any memory operands, use OptimizeMemoryInst to sink their /// address computing into the block when possible / profitable. bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { … } /// Check if all the uses of \p Val are equivalent (or free) zero or /// sign extensions. static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { … } /// Try to speculatively promote extensions in \p Exts and continue /// promoting through newly promoted operands recursively as far as doing so is /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts. /// When some promotion happened, \p TPT contains the proper state to revert /// them. /// /// \return true if some promotion happened, false otherwise. bool CodeGenPrepare::tryToPromoteExts( TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts, SmallVectorImpl<Instruction *> &ProfitablyMovedExts, unsigned CreatedInstsCost) { … } /// Merging redundant sexts when one is dominating the other. bool CodeGenPrepare::mergeSExts(Function &F) { … } // Splitting large data structures so that the GEPs accessing them can have // smaller offsets so that they can be sunk to the same blocks as their users. // For example, a large struct starting from %base is split into two parts // where the second part starts from %new_base. // // Before: // BB0: // %base = // // BB1: // %gep0 = gep %base, off0 // %gep1 = gep %base, off1 // %gep2 = gep %base, off2 // // BB2: // %load1 = load %gep0 // %load2 = load %gep1 // %load3 = load %gep2 // // After: // BB0: // %base = // %new_base = gep %base, off0 // // BB1: // %new_gep0 = %new_base // %new_gep1 = gep %new_base, off1 - off0 // %new_gep2 = gep %new_base, off2 - off0 // // BB2: // %load1 = load i32, i32* %new_gep0 // %load2 = load i32, i32* %new_gep1 // %load3 = load i32, i32* %new_gep2 // // %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because // their offsets are smaller enough to fit into the addressing mode. bool CodeGenPrepare::splitLargeGEPOffsets() { … } bool CodeGenPrepare::optimizePhiType( PHINode *I, SmallPtrSetImpl<PHINode *> &Visited, SmallPtrSetImpl<Instruction *> &DeletedInstrs) { … } bool CodeGenPrepare::optimizePhiTypes(Function &F) { … } /// Return true, if an ext(load) can be formed from an extension in /// \p MovedExts. bool CodeGenPrepare::canFormExtLd( const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI, Instruction *&Inst, bool HasPromoted) { … } /// Move a zext or sext fed by a load into the same basic block as the load, /// unless conditions are unfavorable. This allows SelectionDAG to fold the /// extend into the load. /// /// E.g., /// \code /// %ld = load i32* %addr /// %add = add nuw i32 %ld, 4 /// %zext = zext i32 %add to i64 // \endcode /// => /// \code /// %ld = load i32* %addr /// %zext = zext i32 %ld to i64 /// %add = add nuw i64 %zext, 4 /// \encode /// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which /// allow us to match zext(load i32*) to i64. /// /// Also, try to promote the computations used to obtain a sign extended /// value used into memory accesses. /// E.g., /// \code /// a = add nsw i32 b, 3 /// d = sext i32 a to i64 /// e = getelementptr ..., i64 d /// \endcode /// => /// \code /// f = sext i32 b to i64 /// a = add nsw i64 f, 3 /// e = getelementptr ..., i64 a /// \endcode /// /// \p Inst[in/out] the extension may be modified during the process if some /// promotions apply. bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { … } // Perform address type promotion if doing so is profitable. // If AllowPromotionWithoutCommonHeader == false, we should find other sext // instructions that sign extended the same initial value. However, if // AllowPromotionWithoutCommonHeader == true, we expect promoting the // extension is just profitable. bool CodeGenPrepare::performAddressTypePromotion( Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, bool HasPromoted, TypePromotionTransaction &TPT, SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) { … } bool CodeGenPrepare::optimizeExtUses(Instruction *I) { … } // Find loads whose uses only use some of the loaded value's bits. Add an "and" // just after the load if the target can fold this into one extload instruction, // with the hope of eliminating some of the other later "and" instructions using // the loaded value. "and"s that are made trivially redundant by the insertion // of the new "and" are removed by this function, while others (e.g. those whose // path from the load goes through a phi) are left for isel to potentially // remove. // // For example: // // b0: // x = load i32 // ... // b1: // y = and x, 0xff // z = use y // // becomes: // // b0: // x = load i32 // x' = and x, 0xff // ... // b1: // z = use x' // // whereas: // // b0: // x1 = load i32 // ... // b1: // x2 = load i32 // ... // b2: // x = phi x1, x2 // y = and x, 0xff // // becomes (after a call to optimizeLoadExt for each load): // // b0: // x1 = load i32 // x1' = and x1, 0xff // ... // b1: // x2 = load i32 // x2' = and x2, 0xff // ... // b2: // x = phi x1', x2' // y = and x, 0xff bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { … } /// Check if V (an operand of a select instruction) is an expensive instruction /// that is only used once. static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { … } /// Returns true if a SelectInst should be turned into an explicit branch. static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI) { … } /// If \p isTrue is true, return the true value of \p SI, otherwise return /// false value of \p SI. If the true/false value of \p SI is defined by any /// select instructions in \p Selects, look through the defining select /// instruction until the true/false value is not defined in \p Selects. static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet<const Instruction *, 2> &Selects) { … } bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { … } bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) { … } /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { … } /// Some targets only accept certain types for splat inputs. For example a VDUP /// in MVE takes a GPR (integer) register, and the instruction that incorporate /// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { … } bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { … } bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { … } bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) { … } bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { … } namespace { /// Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. /// E.g., /// a = vector_op <2 x i32> /// b = extractelement <2 x i32> a, i32 0 /// c = scalar_op b /// store c /// /// => /// a = vector_op <2 x i32> /// c = vector_op a (equivalent to scalar_op on the related lane) /// * d = extractelement <2 x i32> c, i32 0 /// * store d /// Assuming both extractelement and store can be combine, we get rid of the /// transition. class VectorPromoteHelper { … }; } // end anonymous namespace void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { … } /// Some targets can do store(extractelement) with one instruction. /// Try to push the extractelement towards the stores when the target /// has this feature and this is profitable. bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { … } /// For the instruction sequence of store below, F and I values /// are bundled together as an i64 value before being stored into memory. /// Sometimes it is more efficient to generate separate stores for F and I, /// which can remove the bitwise instructions or sink them to colder places. /// /// (store (or (zext (bitcast F to i32) to i64), /// (shl (zext I to i64), 32)), addr) --> /// (store F, addr) and (store I, addr+4) /// /// Similarly, splitting for other merged store can also be beneficial, like: /// For pair of {i32, i32}, i64 store --> two i32 stores. /// For pair of {i32, i16}, i64 store --> two i32 stores. /// For pair of {i16, i16}, i32 store --> two i16 stores. /// For pair of {i16, i8}, i32 store --> two i16 stores. /// For pair of {i8, i8}, i16 store --> two i8 stores. /// /// We allow each target to determine specifically which kind of splitting is /// supported. /// /// The store patterns are commonly seen from the simple code snippet below /// if only std::make_pair(...) is sroa transformed before inlined into hoo. /// void goo(const std::pair<int, float> &); /// hoo() { /// ... /// goo(std::make_pair(tmp, ftmp)); /// ... /// } /// /// Although we already have similar splitting in DAG Combine, we duplicate /// it in CodeGenPrepare to catch the case in which pattern is across /// multiple BBs. The logic in DAG Combine is kept to catch case generated /// during code expansion. static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI) { … } // Return true if the GEP has two operands, the first operand is of a sequential // type, and the second operand is a constant. static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) { … } // Try unmerging GEPs to reduce liveness interference (register pressure) across // IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, // reducing liveness interference across those edges benefits global register // allocation. Currently handles only certain cases. // // For example, unmerge %GEPI and %UGEPI as below. // // ---------- BEFORE ---------- // SrcBlock: // ... // %GEPIOp = ... // ... // %GEPI = gep %GEPIOp, Idx // ... // indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ] // (* %GEPI is alive on the indirectbr edges due to other uses ahead) // (* %GEPIOp is alive on the indirectbr edges only because of it's used by // %UGEPI) // // DstB0: ... (there may be a gep similar to %UGEPI to be unmerged) // DstB1: ... (there may be a gep similar to %UGEPI to be unmerged) // ... // // DstBi: // ... // %UGEPI = gep %GEPIOp, UIdx // ... // --------------------------- // // ---------- AFTER ---------- // SrcBlock: // ... (same as above) // (* %GEPI is still alive on the indirectbr edges) // (* %GEPIOp is no longer alive on the indirectbr edges as a result of the // unmerging) // ... // // DstBi: // ... // %UGEPI = gep %GEPI, (UIdx-Idx) // ... // --------------------------- // // The register pressure on the IndirectBr edges is reduced because %GEPIOp is // no longer alive on them. // // We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging // of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as // not to disable further simplications and optimizations as a result of GEP // merging. // // Note this unmerging may increase the length of the data flow critical path // (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff // between the register pressure and the length of data-flow critical // path. Restricting this to the uncommon IndirectBr case would minimize the // impact of potentially longer critical path, if any, and the impact on compile // time. static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI) { … } static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet<BasicBlock *, 32> &FreshBBs, bool IsHugeFunc) { … } bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { … } /// Given an OR instruction, check to see if this is a bitreverse /// idiom. If so, insert the new intrinsic and return true. bool CodeGenPrepare::makeBitReverse(Instruction &I) { … } // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) { … } // Some CGP optimizations may move or alter what's computed in a block. Check // whether a dbg.value intrinsic could be pointed at a more appropriate operand. bool CodeGenPrepare::fixupDbgValue(Instruction *I) { … } bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) { … } // FIXME: should updating debug-info really cause the "changed" flag to fire, // which can cause a function to be reprocessed? bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) { … } static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI) { … } static void DbgInserterHelper(DbgVariableRecord *DVR, Instruction *VI) { … } // A llvm.dbg.value may be using a value before its definition, due to // optimizations in this pass and others. Scan for such dbg.values, and rescue // them by moving the dbg.value to immediately after the value definition. // FIXME: Ideally this should never be necessary, and this has the potential // to re-order dbg.value intrinsics. bool CodeGenPrepare::placeDbgValues(Function &F) { … } // Group scattered pseudo probes in a block to favor SelectionDAG. Scattered // probes can be chained dependencies of other regular DAG nodes and block DAG // combine optimizations. bool CodeGenPrepare::placePseudoProbes(Function &F) { … } /// Scale down both weights to fit into uint32_t. static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { … } /// Some targets prefer to split a conditional branch like: /// \code /// %0 = icmp ne i32 %a, 0 /// %1 = icmp ne i32 %b, 0 /// %or.cond = or i1 %0, %1 /// br i1 %or.cond, label %TrueBB, label %FalseBB /// \endcode /// into multiple branch instructions like: /// \code /// bb1: /// %0 = icmp ne i32 %a, 0 /// br i1 %0, label %TrueBB, label %bb2 /// bb2: /// %1 = icmp ne i32 %b, 0 /// br i1 %1, label %TrueBB, label %FalseBB /// \endcode /// This usually allows instruction selection to do even further optimizations /// and combine the compare with the branch instruction. Currently this is /// applied for targets which have "cheap" jump instructions. /// /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) { … }