//===-- LoopPredication.cpp - Guard based loop predication pass -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // The LoopPredication pass tries to convert loop variant range checks to loop // invariant by widening checks across loop iterations. For example, it will // convert // // for (i = 0; i < n; i++) { // guard(i < len); // ... // } // // to // // for (i = 0; i < n; i++) { // guard(n - 1 < len); // ... // } // // After this transformation the condition of the guard is loop invariant, so // loop-unswitch can later unswitch the loop by this condition which basically // predicates the loop by the widened condition: // // if (n - 1 < len) // for (i = 0; i < n; i++) { // ... // } // else // deoptimize // // It's tempting to rely on SCEV here, but it has proven to be problematic. // Generally the facts SCEV provides about the increment step of add // recurrences are true if the backedge of the loop is taken, which implicitly // assumes that the guard doesn't fail. Using these facts to optimize the // guard results in a circular logic where the guard is optimized under the // assumption that it never fails. // // For example, in the loop below the induction variable will be marked as nuw // basing on the guard. Basing on nuw the guard predicate will be considered // monotonic. Given a monotonic condition it's tempting to replace the induction // variable in the condition with its value on the last iteration. But this // transformation is not correct, e.g. e = 4, b = 5 breaks the loop. // // for (int i = b; i != e; i++) // guard(i u< len) // // One of the ways to reason about this problem is to use an inductive proof // approach. Given the loop: // // if (B(0)) { // do { // I = PHI(0, I.INC) // I.INC = I + Step // guard(G(I)); // } while (B(I)); // } // // where B(x) and G(x) are predicates that map integers to booleans, we want a // loop invariant expression M such the following program has the same semantics // as the above: // // if (B(0)) { // do { // I = PHI(0, I.INC) // I.INC = I + Step // guard(G(0) && M); // } while (B(I)); // } // // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step) // // Informal proof that the transformation above is correct: // // By the definition of guards we can rewrite the guard condition to: // G(I) && G(0) && M // // Let's prove that for each iteration of the loop: // G(0) && M => G(I) // And the condition above can be simplified to G(Start) && M. // // Induction base. // G(0) && M => G(0) // // Induction step. Assuming G(0) && M => G(I) on the subsequent // iteration: // // B(I) is true because it's the backedge condition. // G(I) is true because the backedge is guarded by this condition. // // So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step). // // Note that we can use anything stronger than M, i.e. any condition which // implies M. // // When S = 1 (i.e. forward iterating loop), the transformation is supported // when: // * The loop has a single latch with the condition of the form: // B(X) = latchStart + X <pred> latchLimit, // where <pred> is u<, u<=, s<, or s<=. // * The guard condition is of the form // G(X) = guardStart + X u< guardLimit // // For the ult latch comparison case M is: // forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit => // guardStart + X + 1 u< guardLimit // // The only way the antecedent can be true and the consequent can be false is // if // X == guardLimit - 1 - guardStart // (and guardLimit is non-zero, but we won't use this latter fact). // If X == guardLimit - 1 - guardStart then the second half of the antecedent is // latchStart + guardLimit - 1 - guardStart u< latchLimit // and its negation is // latchStart + guardLimit - 1 - guardStart u>= latchLimit // // In other words, if // latchLimit u<= latchStart + guardLimit - 1 - guardStart // then: // (the ranges below are written in ConstantRange notation, where [A, B) is the // set for (I = A; I != B; I++ /*maywrap*/) yield(I);) // // forall X . guardStart + X u< guardLimit && // latchStart + X u< latchLimit => // guardStart + X + 1 u< guardLimit // == forall X . guardStart + X u< guardLimit && // latchStart + X u< latchStart + guardLimit - 1 - guardStart => // guardStart + X + 1 u< guardLimit // == forall X . (guardStart + X) in [0, guardLimit) && // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) => // (guardStart + X + 1) in [0, guardLimit) // == forall X . X in [-guardStart, guardLimit - guardStart) && // X in [-latchStart, guardLimit - 1 - guardStart) => // X in [-guardStart - 1, guardLimit - guardStart - 1) // == true // // So the widened condition is: // guardStart u< guardLimit && // latchStart + guardLimit - 1 - guardStart u>= latchLimit // Similarly for ule condition the widened condition is: // guardStart u< guardLimit && // latchStart + guardLimit - 1 - guardStart u> latchLimit // For slt condition the widened condition is: // guardStart u< guardLimit && // latchStart + guardLimit - 1 - guardStart s>= latchLimit // For sle condition the widened condition is: // guardStart u< guardLimit && // latchStart + guardLimit - 1 - guardStart s> latchLimit // // When S = -1 (i.e. reverse iterating loop), the transformation is supported // when: // * The loop has a single latch with the condition of the form: // B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=. // * The guard condition is of the form // G(X) = X - 1 u< guardLimit // // For the ugt latch comparison case M is: // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit // // The only way the antecedent can be true and the consequent can be false is if // X == 1. // If X == 1 then the second half of the antecedent is // 1 u> latchLimit, and its negation is latchLimit u>= 1. // // So the widened condition is: // guardStart u< guardLimit && latchLimit u>= 1. // Similarly for sgt condition the widened condition is: // guardStart u< guardLimit && latchLimit s>= 1. // For uge condition the widened condition is: // guardStart u< guardLimit && latchLimit u> 1. // For sge condition the widened condition is: // guardStart u< guardLimit && latchLimit s> 1. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/GuardUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include <optional> #define DEBUG_TYPE … STATISTIC(TotalConsidered, "Number of guards considered"); STATISTIC(TotalWidened, "Number of checks widened"); usingnamespacellvm; static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation", cl::Hidden, cl::init(true)); static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop", cl::Hidden, cl::init(true)); static cl::opt<bool> SkipProfitabilityChecks("loop-predication-skip-profitability-checks", cl::Hidden, cl::init(false)); // This is the scale factor for the latch probability. We use this during // profitability analysis to find other exiting blocks that have a much higher // probability of exiting the loop instead of loop exiting via latch. // This value should be greater than 1 for a sane profitability check. static cl::opt<float> LatchExitProbabilityScale( "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0), cl::desc("scale factor for the latch probability. Value should be greater " "than 1. Lower values are ignored")); static cl::opt<bool> PredicateWidenableBranchGuards( "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden, cl::desc("Whether or not we should predicate guards " "expressed as widenable branches to deoptimize blocks"), cl::init(true)); static cl::opt<bool> InsertAssumesOfPredicatedGuardsConditions( "loop-predication-insert-assumes-of-predicated-guards-conditions", cl::Hidden, cl::desc("Whether or not we should insert assumes of conditions of " "predicated guards"), cl::init(true)); namespace { /// Represents an induction variable check: /// icmp Pred, <induction variable>, <loop invariant limit> struct LoopICmp { … }; class LoopPredication { … }; } // end namespace PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { … } std::optional<LoopICmp> LoopPredication::parseLoopICmp(ICmpInst *ICI) { … } Value *LoopPredication::expandCheck(SCEVExpander &Expander, Instruction *Guard, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { … } // Returns true if its safe to truncate the IV to RangeCheckType. // When the IV type is wider than the range operand type, we can still do loop // predication, by generating SCEVs for the range and latch that are of the // same type. We achieve this by generating a SCEV truncate expression for the // latch IV. This is done iff truncation of the IV is a safe operation, // without loss of information. // Another way to achieve this is by generating a wider type SCEV for the // range check operand, however, this needs a more involved check that // operands do not overflow. This can lead to loss of information when the // range operand is of the form: add i32 %offset, %iv. We need to prove that // sext(x + y) is same as sext(x) + sext(y). // This function returns true if we can safely represent the IV type in // the RangeCheckType without loss of information. static bool isSafeToTruncateWideIVType(const DataLayout &DL, ScalarEvolution &SE, const LoopICmp LatchCheck, Type *RangeCheckType) { … } // Return an LoopICmp describing a latch check equivlent to LatchCheck but with // the requested type if safe to do so. May involve the use of a new IV. static std::optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL, ScalarEvolution &SE, const LoopICmp LatchCheck, Type *RangeCheckType) { … } bool LoopPredication::isSupportedStep(const SCEV* Step) { … } Instruction *LoopPredication::findInsertPt(Instruction *Use, ArrayRef<Value*> Ops) { … } Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander, Instruction *Use, ArrayRef<const SCEV *> Ops) { … } bool LoopPredication::isLoopInvariantValue(const SCEV* S) { … } std::optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander, Instruction *Guard) { … } std::optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop( LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander, Instruction *Guard) { … } static void normalizePredicate(ScalarEvolution *SE, Loop *L, LoopICmp& RC) { … } /// If ICI can be widened to a loop invariant condition emits the loop /// invariant condition in the loop preheader and return it, otherwise /// returns std::nullopt. std::optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, Instruction *Guard) { … } void LoopPredication::widenChecks(SmallVectorImpl<Value *> &Checks, SmallVectorImpl<Value *> &WidenedChecks, SCEVExpander &Expander, Instruction *Guard) { … } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, SCEVExpander &Expander) { … } bool LoopPredication::widenWidenableBranchGuardConditions( BranchInst *BI, SCEVExpander &Expander) { … } std::optional<LoopICmp> LoopPredication::parseLoopLatchICmp() { … } bool LoopPredication::isLoopProfitableToPredicate() { … } /// If we can (cheaply) find a widenable branch which controls entry into the /// loop, return it. static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) { … } /// Return the minimum of all analyzeable exit counts. This is an upper bound /// on the actual exit count. If there are not at least two analyzeable exits, /// returns SCEVCouldNotCompute. static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE, DominatorTree &DT, Loop *L) { … } /// This implements an analogous, but entirely distinct transform from the main /// loop predication transform. This one is phrased in terms of using a /// widenable branch *outside* the loop to allow us to simplify loop exits in a /// following loop. This is close in spirit to the IndVarSimplify transform /// of the same name, but is materially different widening loosens legality /// sharply. bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { … } bool LoopPredication::runOnLoop(Loop *Loop) { … }