//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Peephole optimize the CFG. // //===----------------------------------------------------------------------===// #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> #include <climits> #include <cstddef> #include <cstdint> #include <iterator> #include <map> #include <optional> #include <set> #include <tuple> #include <utility> #include <vector> usingnamespacellvm; usingnamespacePatternMatch; #define DEBUG_TYPE … cl::opt<bool> llvm::RequireAndPreserveDomTree( "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " "into preserving DomTree,")); // Chosen as 2 so as to be cheap, but still to have enough power to fold // a select, so the "clamp" idiom (of a min followed by a max) will be caught. // To catch this, we need to fold a compare and a select, hence '2' being the // minimum reasonable default. static cl::opt<unsigned> PHINodeFoldingThreshold( "phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc( "Control the amount of phi node folding to perform (default = 2)")); static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold( "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)")); static cl::opt<bool> HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block")); static cl::opt<bool> HoistLoadsStoresWithCondFaulting( "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads/stores if the target supports " "conditional faulting")); static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold( "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditonal load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)")); static cl::opt<unsigned> HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting")); static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); static cl::opt<bool> HoistCondStores( "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); static cl::opt<bool> MergeCondStores( "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store")); static cl::opt<bool> MergeCondStoresAggressively( "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result")); static cl::opt<bool> SpeculateOneExpensiveInst( "speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed")); static cl::opt<unsigned> MaxSpeculationDepth( "max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); static cl::opt<int> MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through")); // Two is chosen to allow one negation and a logical combine. static cl::opt<unsigned> BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches")); static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier( "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present")); static cl::opt<bool> EnableMergeCompatibleInvokes( "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate")); static cl::opt<unsigned> MaxSwitchCasesPerResult( "max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select")); STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC( NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); STATISTIC(NumFoldValueComparisonIntoPredecessors, "Number of value comparisons folded into predecessor basic blocks"); STATISTIC(NumFoldBranchToCommonDest, "Number of branches folded into predecessor basic block"); STATISTIC( NumHoistCommonCode, "Number of common instruction 'blocks' hoisted up to the begin block"); STATISTIC(NumHoistCommonInstrs, "Number of common instructions hoisted up to the begin block"); STATISTIC(NumSinkCommonCode, "Number of common instruction 'blocks' sunk down to the end block"); STATISTIC(NumSinkCommonInstrs, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); STATISTIC(NumInvokes, "Number of invokes with empty resume blocks simplified into calls"); STATISTIC(NumInvokesMerged, "Number of invokes that were merged together"); STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed"); namespace { // The first field contains the value that the switch produces when a certain // case group is selected, and the second field is a vector containing the // cases composing the case group. SwitchCaseResultVectorTy; // The first field contains the phi node that generates a result of the switch // and the second field contains the value generated for a certain case in the // switch for that PHI. SwitchCaseResultsTy; /// ValueEqualityComparisonCase - Represents a case of a switch. struct ValueEqualityComparisonCase { … }; class SimplifyCFGOpt { … }; } // end anonymous namespace /// Return true if all the PHI nodes in the basic block \p BB /// receive compatible (identical) incoming values when coming from /// all of the predecessor blocks that are specified in \p IncomingBlocks. /// /// Note that if the values aren't exactly identical, but \p EquivalenceSet /// is provided, and *both* of the values are present in the set, /// then they are considered equal. static bool incomingValuesAreCompatible( BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks, SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) { … } /// Return true if it is safe to merge these two /// terminator instructions together. static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) { … } /// Update PHI nodes in Succ to indicate that there will now be entries in it /// from the 'NewPred' block. The values that will be flowing into the PHI nodes /// will be the same as those coming in from ExistPred, an existing predecessor /// of Succ. static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU = nullptr) { … } /// Compute an abstract "cost" of speculating the given instruction, /// which is assumed to be safe to speculate. TCC_Free means cheap, /// TCC_Basic means less cheap, and TCC_Expensive means prohibitively /// expensive. static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { … } /// If we have a merge point of an "if condition" as accepted above, /// return true if the specified value dominates the block. We don't handle /// the true generality of domination here, just a special case which works /// well enough for us. /// /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to /// see if V (which must be an instruction) and its recursive operands /// that do not dominate BB have a combined cost lower than Budget and /// are non-trapping. If both are true, the instruction is inserted into the /// set and true is returned. /// /// The cost for most non-trapping instructions is defined as 1 except for /// Select whose cost is 2. /// /// After this function returns, Cost is increased by the cost of /// V plus its non-dominating operands. If that cost is greater than /// Budget, false is returned and Cost is undefined. static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned Depth = 0) { … } /// Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. static ConstantInt *getConstantInt(Value *V, const DataLayout &DL) { … } namespace { /// Given a chain of or (||) or and (&&) comparison of a value against a /// constant, this will try to recover the information required for a switch /// structure. /// It will depth-first traverse the chain of comparison, seeking for patterns /// like %a == 12 or %a < 4 and combine them to produce a set of integer /// representing the different cases for the switch. /// Note that if the chain is composed of '||' it will build the set of elements /// that matches the comparisons (i.e. any of this value validate the chain) /// while for a chain of '&&' it will build the set elements that make the test /// fail. struct ConstantComparesGatherer { … }; } // end anonymous namespace static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU = nullptr) { … } /// Return true if the specified terminator checks /// to see if a value is equal to constant integer value. Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) { … } /// Given a value comparison instruction, /// decode all of the 'cases' that it represents and return the 'default' block. BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases( Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) { … } /// Given a vector of bb/value pairs, remove any entries /// in the list that match the specified block. static void eliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { … } /// Return true if there are any keys in C1 that exist in C2 as well. static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, std::vector<ValueEqualityComparisonCase> &C2) { … } // Set branch weights on SwitchInst. This sets the metadata if there is at // least one non-zero weight. static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights, bool IsExpected) { … } // Similar to the above, but for branch and select instructions that take // exactly 2 weights. static void setBranchWeights(Instruction *I, uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected) { … } /// If TI is known to be a terminator instruction and its block is known to /// only have a single predecessor block, check to see if that predecessor is /// also a value comparison with the same value, and if that comparison /// determines the outcome of this comparison. If so, simplify TI. This does a /// very limited form of jump threading. bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor( Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) { … } namespace { /// This class implements a stable ordering of constant /// integers that does not depend on their address. This is important for /// applications that sort ConstantInt's to ensure uniqueness. struct ConstantIntOrdering { … }; } // end anonymous namespace static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2) { … } /// Get Weights of a given terminator, the default weight is at the front /// of the vector. If TI is a conditional eq, we need to swap the branch-weight /// metadata. static void getBranchWeights(Instruction *TI, SmallVectorImpl<uint64_t> &Weights) { … } /// Keep halving the weights until all can fit in uint32_t. static void fitWeights(MutableArrayRef<uint64_t> Weights) { … } static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { … } bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding( Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) { … } /// The specified terminator is a value equality comparison instruction /// (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons /// on the same value. If so, and if safe to do so, fold them together. bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder) { … } // If we would need to insert a select that uses the value of this invoke // (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would // need to do this), we can't hoist the invoke, as there is nowhere to put the // select in this case. static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2) { … } // Get interesting characteristics of instructions that // `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of // instructions can be reordered across. enum SkipFlags { … }; static unsigned skippedInstrFlags(Instruction *I) { … } // Returns true if it is safe to reorder an instruction across preceding // instructions in a basic block. static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) { … } static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); /// Helper function for hoistCommonCodeFromSuccessors. Return true if identical /// instructions \p I1 and \p I2 can and should be hoisted. static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI) { … } /// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical /// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in /// hoistCommonCodeFromSuccessors. e.g. The input: /// I1 DVRs: { x, z }, /// OtherInsts: { I2 DVRs: { x, y, z } } /// would result in hoisting only DbgVariableRecord x. static void hoistLockstepIdenticalDbgVariableRecords( Instruction *TI, Instruction *I1, SmallVectorImpl<Instruction *> &OtherInsts) { … } static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2) { … } /// If the target supports conditional faulting, /// we look for the following pattern: /// \code /// BB: /// ... /// %cond = icmp ult %x, %y /// br i1 %cond, label %TrueBB, label %FalseBB /// FalseBB: /// store i32 1, ptr %q, align 4 /// ... /// TrueBB: /// %maskedloadstore = load i32, ptr %b, align 4 /// store i32 %maskedloadstore, ptr %p, align 4 /// ... /// \endcode /// /// and transform it into: /// /// \code /// BB: /// ... /// %cond = icmp ult %x, %y /// %maskedloadstore = cload i32, ptr %b, %cond /// cstore i32 %maskedloadstore, ptr %p, %cond /// cstore i32 1, ptr %q, ~%cond /// br i1 %cond, label %TrueBB, label %FalseBB /// FalseBB: /// ... /// TrueBB: /// ... /// \endcode /// /// where cload/cstore are represented by llvm.masked.load/store intrinsics, /// e.g. /// /// \code /// %vcond = bitcast i1 %cond to <1 x i1> /// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0 /// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison) /// %maskedloadstore = bitcast <1 x i32> %v0 to i32 /// call void @llvm.masked.store.v1i32.p0 /// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond) /// %cond.not = xor i1 %cond, true /// %vcond.not = bitcast i1 %cond.not to <1 x i> /// call void @llvm.masked.store.v1i32.p0 /// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not) /// \endcode /// /// So we need to turn hoisted load/store into cload/cstore. static void hoistConditionalLoadsStores( BranchInst *BI, SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores, bool Invert) { … } static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI) { … } /// Hoist any common code in the successor blocks up into the block. This /// function guarantees that BB dominates all successors. If EqTermsOnly is /// given, only perform hoisting in case both blocks only contain a terminator. /// In that case, only the original BI will be replaced and selects for PHIs are /// added. bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly) { … } bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( Instruction *TI, Instruction *I1, SmallVectorImpl<Instruction *> &OtherSuccTIs) { … } // Check lifetime markers. static bool isLifeTimeMarker(const Instruction *I) { … } // TODO: Refine this. This should avoid cases like turning constant memcpy sizes // into variables. static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx) { … } // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common // instruction instead. For every value that would be required to be provided by // PHI node (because an operand varies in each input block), add to PHIOperands. static bool canSinkInstructions( ArrayRef<Instruction *> Insts, DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) { … } // Assuming canSinkInstructions(Blocks) has returned true, sink the last // instruction of every block in Blocks to their common successor, commoning // into one instruction. static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { … } namespace { // LockstepReverseIterator - Iterates through instructions // in a set of blocks in reverse order from the first non-terminator. // For example (assume all blocks have size n): // LockstepReverseIterator I([B1, B2, B3]); // *I-- = [B1[n], B2[n], B3[n]]; // *I-- = [B1[n-1], B2[n-1], B3[n-1]]; // *I-- = [B1[n-2], B2[n-2], B3[n-2]]; // ... class LockstepReverseIterator { … }; } // end anonymous namespace /// Check whether BB's predecessors end with unconditional branches. If it is /// true, sink any common code from the predecessors to BB. static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU) { … } namespace { struct CompatibleSets { … }; CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) { … } void CompatibleSets::insert(InvokeInst *II) { … } bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) { … } } // namespace // Merge all invokes in the provided set, all of which are compatible // as per the `CompatibleSets::shouldBelongToSameSet()`. static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes, DomTreeUpdater *DTU) { … } /// If this block is a `landingpad` exception handling block, categorize all /// the predecessor `invoke`s into sets, with all `invoke`s in each set /// being "mergeable" together, and then merge invokes in each set together. /// /// This is a weird mix of hoisting and sinking. Visually, it goes from: /// [...] [...] /// | | /// [invoke0] [invoke1] /// / \ / \ /// [cont0] [landingpad] [cont1] /// to: /// [...] [...] /// \ / /// [invoke] /// / \ /// [cont] [landingpad] /// /// But of course we can only do that if the invokes share the `landingpad`, /// edges invoke0->cont0 and invoke1->cont1 are "compatible", /// and the invoked functions are "compatible". static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) { … } namespace { /// Track ephemeral values, which should be ignored for cost-modelling /// purposes. Requires walking instructions in reverse order. class EphemeralValueTracker { … }; } // namespace /// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// /// We are looking for code like the following: /// BrBB: /// store i32 %add, i32* %arrayidx2 /// ... // No other stores or function calls (we could be calling a memory /// ... // function). /// %cmp = icmp ult %x, %y /// br i1 %cmp, label %EndBB, label %ThenBB /// ThenBB: /// store i32 %add5, i32* %arrayidx2 /// br label EndBB /// EndBB: /// ... /// We are going to transform this into: /// BrBB: /// store i32 %add, i32* %arrayidx2 /// ... // /// %cmp = icmp ult %x, %y /// %add.add5 = select i1 %cmp, i32 %add, %add5 /// store i32 %add.add5, i32* %arrayidx2 /// ... /// /// \return The pointer to the value of the previous store if the store can be /// hoisted into the predecessor block. 0 otherwise. static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB) { … } /// Estimate the cost of the insertion(s) and check that the PHI nodes can be /// converted to selects. static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI) { … } static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, const TargetTransformInfo &TTI) { … } /// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating /// instructions like this is most often not desirable. Instead, there is an MI /// pass which can do it with full awareness of the resource constraints. /// However, some cases are "obvious" and we should do directly. An example of /// this is speculating a single, reasonably cheap instruction. /// /// There is only one distinct advantage to flattening the CFG at the IR level: /// it makes very common but simplistic optimizations such as are common in /// instcombine and the DAG combiner more powerful by removing CFG edges and /// modeling their effects with easier to reason about SSA value graphs. /// /// /// An illustration of this transform is turning this IR: /// \code /// BB: /// %cmp = icmp ult %x, %y /// br i1 %cmp, label %EndBB, label %ThenBB /// ThenBB: /// %sub = sub %x, %y /// br label BB2 /// EndBB: /// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ] /// ... /// \endcode /// /// Into this IR: /// \code /// BB: /// %cmp = icmp ult %x, %y /// %sub = sub %x, %y /// %cond = select i1 %cmp, 0, %sub /// ... /// \endcode /// /// \returns true if the conditional block is removed. bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { … } /// Return true if we can thread a branch across this block. static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { … } static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To) { … } /// If we have a conditional branch on something for which we know the constant /// value in predecessors (e.g. a phi node in the current block), thread edges /// from the predecessor to their ultimate destination. static std::optional<bool> foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC) { … } static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC) { … } /// Given a BB that starts with the specified two-entry PHI node, /// see if we can eliminate it. static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables) { … } static Value *createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name = "") { … } /// Return true if either PBI or BI has branch weight available, and store /// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does /// not have branch weight, use 1:1 as its weight. static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight) { … } /// Determine if the two branches share a common destination and deduce a glue /// that joins the branches' conditions to arrive at the common destination if /// that would be profitable. static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>> shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI) { … } static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI) { … } /// Return if an instruction's type or any of its operands' types are a vector /// type. static bool isVectorOp(Instruction &I) { … } /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { … } // If there is only one store in BB1 and BB2, return it, otherwise return // nullptr. static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { … } static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV = nullptr) { … } static bool mergeConditionalStoreToAddress( BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { … } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { … } /// If the previous block ended with a widenable branch, determine if reusing /// the target block is profitable and legal. This will have the effect of /// "widening" PBI, but doesn't require us to reason about hosting safety. static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU) { … } /// If we have a conditional branch as a predecessor of another block, /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { … } // Simplifies a terminator by replacing it with a branch to TrueBB if Cond is // true or to FalseBB if Cond is false. // Takes care of updating the successors and removing the old terminator. // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, BasicBlock *TrueBB, BasicBlock *FalseBB, uint32_t TrueWeight, uint32_t FalseWeight) { … } // Replaces // (switch (select cond, X, Y)) on constant X, Y // with a branch - conditional if X and Y lead to distinct BBs, // unconditional otherwise. bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { … } // Replaces // (indirectbr (select cond, blockaddress(@fn, BlockA), // blockaddress(@fn, BlockB))) // with // (br cond, BlockA, BlockB). bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { … } /// This is called when we find an icmp instruction /// (a seteq/setne with a constant) as the only instruction in a /// block that ends with an uncond branch. We are looking for a very specific /// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In /// this case, we merge the first two "or's of icmp" into a switch, but then the /// default value goes to an uncond block with a seteq in it, we get something /// like: /// /// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] /// DEFAULT: /// %tmp = icmp eq i8 %A, 92 /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] /// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder) { … } /// The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, const DataLayout &DL) { … } bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { … } // Check if cleanup block is empty static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) { … } // Simplify resume that is shared by several landing pads (phi of landing pad). bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { … } // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { … } static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { … } // Try to merge two cleanuppads together. static bool mergeCleanupPad(CleanupReturnInst *RI) { … } bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) { … } // WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()! bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { … } static bool casesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { … } static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock = true) { … } /// Turn a switch into an integer range comparison and branch. /// Switches with more than 2 destinations are ignored. /// Switches with 1 destination are also ignored. bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { … } /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL) { … } /// If BB would be eligible for simplification by /// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated /// by an unconditional branch), look at the phi node for BB in the successor /// block and see if the incoming value is equal to CaseValue. If so, return /// the phi node, and set PhiIndex to BB's index in the phi node. static PHINode *findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex) { … } /// Try to forward the condition of a switch instruction to a phi node /// dominated by the switch, if that would mean that some of the destination /// blocks of the switch can be folded away. Return true if a change is made. static bool forwardSwitchConditionToPHI(SwitchInst *SI) { … } /// Return true if the backend will be able to handle /// initializing an array of constants like C. static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) { … } /// If V is a Constant, return it. Otherwise, try to look up /// its constant value in ConstantPool, returning 0 if it's not there. static Constant * lookupConstant(Value *V, const SmallDenseMap<Value *, Constant *> &ConstantPool) { … } /// Try to fold instruction I into a constant. This works for /// simple instructions such as binary operations where both operands are /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap<Value *, Constant *> &ConstantPool) { … } /// Try to determine the resulting constant values in phi nodes /// at the common destination basic block, *CommonDest, for one of the case /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, const DataLayout &DL, const TargetTransformInfo &TTI) { … } // Helper function used to add CaseVal to the list of cases that generate // Result. Returns the updated number of cases that generate this result. static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result) { … } // Helper function that initializes a map containing // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults) { … } // Helper function that checks if it is possible to transform a switch with only // two cases (or two cases + default) that produces a result into a select. // TODO: Handle switches with more than 2 cases that map to the same result. static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder) { … } // Helper function to cleanup a switch instruction that has been converted into // a select, fixing up PHI nodes and basic blocks. static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU) { … } /// If a switch is only used to initialize one or more phi nodes in a common /// successor block with only two different constant values, try to replace the /// switch with a select. Returns true if the fold was made. static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { … } namespace { /// This class represents a lookup table that can be used to replace a switch. class SwitchLookupTable { … }; } // end anonymous namespace SwitchLookupTable::SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { … } Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder) { … } bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL, uint64_t TableSize, Type *ElementType) { … } static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL) { … } static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) { … } static bool isSwitchDense(ArrayRef<int64_t> Values) { … } /// Determine whether a lookup table should be built for this switch, based on /// the number of cases, size of the table, and the types of the results. // TODO: We could support larger than legal types by limiting based on the // number of loads required and/or table size. If the constants are small we // could use smaller table entries and extend after the load. static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap<PHINode *, Type *> &ResultTypes) { … } static bool shouldUseSwitchConditionAsTableIndex( ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI) { … } /// Try to reuse the switch table index compare. Following pattern: /// \code /// if (idx < tablesize) /// r = table[idx]; // table does not contain default_value /// else /// r = default_value; /// if (r != default_value) /// ... /// \endcode /// Is optimized to: /// \code /// cond = idx < tablesize; /// if (cond) /// r = table[idx]; /// else /// r = default_value; /// if (cond) /// ... /// \endcode /// Jump threading will then eliminate the second if(cond). static void reuseTableCompare( User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) { … } /// If the switch is only used to initialize one or more phi nodes in a common /// successor block with different constant values, replace the switch with /// lookup tables. static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { … } /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// /// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be /// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. /// /// This converts a sparse switch into a dense switch which allows better /// lowering and could also allow transforming into a lookup table. static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { … } /// Tries to transform switch of powers of two to reduce switch range. /// For example, switch like: /// switch (C) { case 1: case 2: case 64: case 128: } /// will be transformed to: /// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: } /// /// This transformation allows better lowering and could allow transforming into /// a lookup table. static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { … } /// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have /// the same destination. static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU) { … } bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { … } bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { … } /// Given an block with only a single landing pad and a unconditional branch /// try to find another basic block which this one can be merged with. This /// handles cases where we have multiple invokes with unique landing pads, but /// a shared handler. /// /// We specifically choose to not worry about merging non-empty blocks /// here. That is a PRE/scheduling problem and is best solved elsewhere. In /// practice, the optimizer produces empty landing pad blocks quite frequently /// when dealing with exception dense code. (see: instcombine, gvn, if-else /// sinking in this file) /// /// This is primarily a code size optimization. We need to avoid performing /// any transform which might inhibit optimization (such as our ability to /// specialize a particular handler via tail commoning). We do this by not /// merging any blocks which require us to introduce a phi. Since the same /// values are flowing through both blocks, we don't lose any ability to /// specialize. If anything, we make such specialization more likely. /// /// TODO - This transformation could remove entries from a phi in the target /// block when the inputs in the phi are the same for the two blocks being /// merged. In some cases, this could result in removal of the PHI entirely. static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU) { … } bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) { … } bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { … } static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { … } /// Fold the following pattern: /// bb0: /// br i1 %cond1, label %bb1, label %bb2 /// bb1: /// br i1 %cond2, label %bb3, label %bb4 /// bb2: /// br i1 %cond2, label %bb4, label %bb3 /// bb3: /// ... /// bb4: /// ... /// into /// bb0: /// %cond = xor i1 %cond1, %cond2 /// br i1 %cond, label %bb4, label %bb3 /// bb3: /// ... /// bb4: /// ... /// NOTE: %cond2 always dominates the terminator of bb0. static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU) { … } bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { … } /// Check if passing a value to an instruction will cause undefined behavior. static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { … } /// If BB has an incoming value that will always trigger undefined behavior /// (eg. null pointer dereference), remove the branch leading here. static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC) { … } bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { … } bool SimplifyCFGOpt::run(BasicBlock *BB) { … } bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, ArrayRef<WeakVH> LoopHeaders) { … }