//===- JumpThreading.cpp - Thread control through conditional blocks ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the Jump Threading pass. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> #include <cstdint> #include <iterator> #include <memory> #include <utility> usingnamespacellvm; usingnamespacejumpthreading; #define DEBUG_TYPE … STATISTIC(NumThreads, "Number of jumps threaded"); STATISTIC(NumFolds, "Number of terminators folded"); STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); static cl::opt<unsigned> BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden); static cl::opt<unsigned> ImplicationSearchThreshold( "jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden); static cl::opt<unsigned> PhiDuplicateThreshold( "jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden); static cl::opt<bool> ThreadAcrossLoopHeaders( "jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden); JumpThreadingPass::JumpThreadingPass(int T) { … } // Update branch probability information according to conditional // branch probability. This is usually made possible for cloned branches // in inline instances by the context specific profile in the caller. // For instance, // // [Block PredBB] // [Branch PredBr] // if (t) { // Block A; // } else { // Block B; // } // // [Block BB] // cond = PN([true, %A], [..., %B]); // PHI node // [Branch CondBr] // if (cond) { // ... // P(cond == true) = 1% // } // // Here we know that when block A is taken, cond must be true, which means // P(cond == true | A) = 1 // // Given that P(cond == true) = P(cond == true | A) * P(A) + // P(cond == true | B) * P(B) // we get: // P(cond == true ) = P(A) + P(cond == true | B) * P(B) // // which gives us: // P(A) is less than P(cond == true), i.e. // P(t == true) <= P(cond == true) // // In other words, if we know P(cond == true) is unlikely, we know // that P(t == true) is also unlikely. // static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { … } PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { … } bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, std::unique_ptr<DomTreeUpdater> DTU_, std::optional<BlockFrequencyInfo *> BFI_, std::optional<BranchProbabilityInfo *> BPI_) { … } // Replace uses of Cond with ToVal when safe to do so. If all uses are // replaced, we can remove Cond. We cannot blindly replace all uses of Cond // because we may incorrectly replace uses when guards/assumes are uses of // of `Cond` and we used the guards/assume to reason about the `Cond` value // at the end of block. RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB) { … } /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block /// when exceeding the threshold. If duplication is impossible, returns ~0U. static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold) { … } /// findLoopHeaders - We do not want jump threading to turn proper loop /// structures into irreducible loops. Doing this breaks up the loop nesting /// hierarchy and pessimizes later transformations. To prevent this from /// happening, we first have to find the loop headers. Here we approximate this /// by finding targets of backedges in the CFG. /// /// Note that there definitely are cases when we want to allow threading of /// edges across a loop header. For example, threading a jump from outside the /// loop (the preheader) to an exit block of the loop is definitely profitable. /// It is also almost always profitable to thread backedges from within the loop /// to exit blocks, and is often profitable to thread backedges to other blocks /// within the loop (forming a nested loop). This simple analysis is not rich /// enough to track all of these properties and keep it up-to-date as the CFG /// mutates, so we don't allow any of these transformations. void JumpThreadingPass::findLoopHeaders(Function &F) { … } /// getKnownConstant - Helper method to determine if we can thread over a /// terminator with the given value as its condition, and if so what value to /// use for that. What kind of value this is depends on whether we want an /// integer or a block address, but an undef is always accepted. /// Returns null if Val is null or not an appropriate constant. static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { … } /// computeValueKnownInPredecessors - Given a basic block BB and a value V, see /// if we can infer that the value is a known ConstantInt/BlockAddress or undef /// in any of our predecessors. If so, return the known list of value and pred /// BB in the result vector. /// /// This returns true if there were any known values. bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( Value *V, BasicBlock *BB, PredValueInfo &Result, ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet, Instruction *CxtI) { … } /// GetBestDestForBranchOnUndef - If we determine that the specified block ends /// in an undefined jump, decide which block is best to revector to. /// /// Since we can pick an arbitrary destination, we pick the successor with the /// fewest predecessors. This should reduce the in-degree of the others. static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) { … } static bool hasAddressTakenAndUsed(BasicBlock *BB) { … } /// processBlock - If there are any predecessors whose control can be threaded /// through to a successor, transform them now. bool JumpThreadingPass::processBlock(BasicBlock *BB) { … } bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) { … } /// Return true if Op is an instruction defined in the given block. static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) { … } /// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially /// redundant load instruction, eliminate it by replacing it with a PHI node. /// This is an important optimization that encourages jump threading, and needs /// to be run interlaced with other jump threading tasks. bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { … } /// findMostPopularDest - The specified list contains multiple possible /// threadable destinations. Pick the one that occurs the most frequently in /// the list. static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &PredToDestList) { … } // Try to evaluate the value of V when the control flows from PredPredBB to // BB->getSinglePredecessor() and then on to BB. Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *V, const DataLayout &DL) { … } bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, ConstantPreference Preference, Instruction *CxtI) { … } /// processBranchOnPHI - We have an otherwise unthreadable conditional branch on /// a PHI node (or freeze PHI) in the current block. See if there are any /// simplifications we can do based on inputs to the phi node. bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) { … } /// processBranchOnXOR - We have an otherwise unthreadable conditional branch on /// a xor instruction in the current block. See if there are any /// simplifications we can do based on inputs to the xor. bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) { … } /// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for /// NewPred using the entries from OldPred (suitably mapped). static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap) { … } /// Merge basic block BB into its sole predecessor if possible. bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) { … } /// Update the SSA form. NewBB contains instructions that are copied from BB. /// ValueMapping maps old values in BB to new ones in NewBB. void JumpThreadingPass::updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping) { … } /// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone /// arguments that come from PredBB. Return the map from the variables in the /// source basic block to the variables in the newly created basic block. void JumpThreadingPass::cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB) { … } /// Attempt to thread through two successive basic blocks. bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond) { … } void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB) { … } /// tryThreadEdge - Thread an edge if it's safe and profitable to do so. bool JumpThreadingPass::tryThreadEdge( BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs, BasicBlock *SuccBB) { … } /// threadEdge - We have decided that it is safe and profitable to factor the /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB /// across BB. Transform the IR to reflect this change. void JumpThreadingPass::threadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs, BasicBlock *SuccBB) { … } /// Create a new basic block that will be the predecessor of BB and successor of /// all blocks in Preds. When profile data is available, update the frequency of /// this new block. BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, const char *Suffix) { … } bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { … } /// Update the block frequency of BB and branch weight and the metadata on the /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 - /// Freq(PredBB->BB) / Freq(BB->SuccBB). void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB, BasicBlock *NewBB, BasicBlock *SuccBB, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, bool HasProfile) { … } /// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch /// to BB which contains an i1 PHI node and a conditional branch on that PHI. /// If we can duplicate the contents of BB up into PredBB do so now, this /// improves the odds that the branch will be on an analyzable instruction like /// a compare. bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) { … } // Pred is a predecessor of BB with an unconditional branch to BB. SI is // a Select instruction in Pred. BB has other predecessors and SI is used in // a PHI node in BB. SI has no other use. // A new basic block, NewBB, is created and SI is converted to compare and // conditional branch. SI is erased from parent. void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx) { … } bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) { … } /// tryToUnfoldSelect - Look for blocks of the form /// bb1: /// %a = select /// br bb2 /// /// bb2: /// %p = phi [%a, %bb1] ... /// %c = icmp %p /// br i1 %c /// /// And expand the select into a branch structure if one of its arms allows %c /// to be folded. This later enables threading from bb1 over bb2. bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { … } /// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the /// same BB in the form /// bb: /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... /// %s = select %p, trueval, falseval /// /// or /// /// bb: /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ... /// %c = cmp %p, 0 /// %s = select %c, trueval, falseval /// /// And expand the select into a branch structure. This later enables /// jump-threading over bb in this pass. /// /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold /// select if the associated PHI has at least one constant. If the unfolded /// select is not jump-threaded, it will be folded again in the later /// optimizations. bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { … } /// Try to propagate a guard from the current BB into one of its predecessors /// in case if another branch of execution implies that the condition of this /// guard is always true. Currently we only process the simplest case that /// looks like: /// /// Start: /// %cond = ... /// br i1 %cond, label %T1, label %F1 /// T1: /// br label %Merge /// F1: /// br label %Merge /// Merge: /// %condGuard = ... /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ] /// /// And cond either implies condGuard or !condGuard. In this case all the /// instructions before the guard can be duplicated in both branches, and the /// guard is then threaded to one of them. bool JumpThreadingPass::processGuards(BasicBlock *BB) { … } /// Try to propagate the guard from BB which is the lower block of a diamond /// to one of its branches, in case if diamond's condition implies guard's /// condition. bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI) { … } PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const { … } template <typename AnalysisT> typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() { … } BranchProbabilityInfo *JumpThreadingPass::getBPI() { … } BlockFrequencyInfo *JumpThreadingPass::getBFI() { … } // Important note on validity of BPI/BFI. JumpThreading tries to preserve // BPI/BFI as it goes. Thus if cached instance exists it will be updated. // Otherwise, new instance of BPI/BFI is created (up to date by definition). BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) { … } BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) { … }