//===- LoopPeel.cpp -------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Loop Peeling Utilities. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> #include <cstdint> #include <optional> usingnamespacellvm; usingnamespacellvm::PatternMatch; #define DEBUG_TYPE … STATISTIC(NumPeeled, "Number of loops peeled"); static cl::opt<unsigned> UnrollPeelCount( "unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes")); static cl::opt<bool> UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); static cl::opt<bool> UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled.")); static cl::opt<unsigned> UnrollPeelMaxCount( "unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling.")); static cl::opt<unsigned> UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); static cl::opt<bool> DisableAdvancedPeeling( "disable-advanced-peeling", cl::init(false), cl::Hidden, cl::desc( "Disable advance peeling. Issues for convergent targets (D134803).")); static const char *PeeledCountMetaData = …; // Check whether we are capable of peeling this loop. bool llvm::canPeel(const Loop *L) { … } namespace { // As a loop is peeled, it may be the case that Phi nodes become // loop-invariant (ie, known because there is only one choice). // For example, consider the following function: // void g(int); // void binary() { // int x = 0; // int y = 0; // int a = 0; // for(int i = 0; i <100000; ++i) { // g(x); // x = y; // g(a); // y = a + 1; // a = 5; // } // } // Peeling 3 iterations is beneficial because the values for x, y and a // become known. The IR for this loop looks something like the following: // // %i = phi i32 [ 0, %entry ], [ %inc, %if.end ] // %a = phi i32 [ 0, %entry ], [ 5, %if.end ] // %y = phi i32 [ 0, %entry ], [ %add, %if.end ] // %x = phi i32 [ 0, %entry ], [ %y, %if.end ] // ... // tail call void @_Z1gi(i32 signext %x) // tail call void @_Z1gi(i32 signext %a) // %add = add nuw nsw i32 %a, 1 // %inc = add nuw nsw i32 %i, 1 // %exitcond = icmp eq i32 %inc, 100000 // br i1 %exitcond, label %for.cond.cleanup, label %for.body // // The arguments for the calls to g will become known after 3 iterations // of the loop, because the phi nodes values become known after 3 iterations // of the loop (ie, they are known on the 4th iteration, so peel 3 iterations). // The first iteration has g(0), g(0); the second has g(0), g(5); the // third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5). // Now consider the phi nodes: // %a is a phi with constants so it is determined after iteration 1. // %y is a phi based on a constant and %a so it is determined on // the iteration after %a is determined, so iteration 2. // %x is a phi based on a constant and %y so it is determined on // the iteration after %y, so iteration 3. // %i is based on itself (and is an induction variable) so it is // never determined. // This means that peeling off 3 iterations will result in being able to // remove the phi nodes for %a, %y, and %x. The arguments for the // corresponding calls to g are determined and the code for computing // x, y, and a can be removed. // // The PhiAnalyzer class calculates how many times a loop should be // peeled based on the above analysis of the phi nodes in the loop while // respecting the maximum specified. class PhiAnalyzer { … }; PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations) : … { … } // This function calculates the number of iterations after which the value // becomes an invariant. The pre-calculated values are memorized in a map. // N.B. This number will be Unknown or <= MaxIterations. // The function is calculated according to the following definition: // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. // F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown) // G(%y) = 0 if %y is a loop invariant // G(%y) = G(%BackEdgeValue) if %y is a phi in the header block // G(%y) = TODO: if %y is an expression based on phis and loop invariants // The example looks like: // %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration. // %y = phi(0, 5) // %a = %y + 1 // G(%y) = Unknown otherwise (including phi not in header block) PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { … } std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() { … } } // unnamed namespace // Try to find any invariant memory reads that will become dereferenceable in // the remainder loop after peeling. The load must also be used (transitively) // by an exit condition. Returns the number of iterations to peel off (at the // moment either 0 or 1). static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, DominatorTree &DT, AssumptionCache *AC) { … } // Return the number of iterations to peel off that make conditions in the // body true/false. For example, if we peel 2 iterations off the loop below, // the condition i < 2 can be evaluated at compile time. // for (i = 0; i < n; i++) // if (i < 2) // .. // else // .. // } static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { … } /// This "heuristic" exactly matches implicit behavior which used to exist /// inside getLoopEstimatedTripCount. It was added here to keep an /// improvement inside that API from causing peeling to become more aggressive. /// This should probably be removed. static bool violatesLegacyMultiExitLoopCheck(Loop *L) { … } // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, AssumptionCache *AC, unsigned Threshold) { … } struct WeightInfo { … }; /// Update the branch weights of an exiting block of a peeled-off loop /// iteration. /// Let F is a weight of the edge to continue (fallthrough) into the loop. /// Let E is a weight of the edge to an exit. /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to /// go to exit. /// Then, Estimated ExitCount = F / E. /// For I-th (counting from 0) peeled off iteration we set the weights for /// the peeled exit as (EC - I, 1). It gives us reasonable distribution, /// The probability to go to exit 1/(EC-I) increases. At the same time /// the estimated exit count in the remainder loop reduces by I. /// To avoid dealing with division rounding we can just multiple both part /// of weights to E and use weight as (F - I * E, E). static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { … } /// Initialize the weights for all exiting blocks. static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, Loop *L) { … } /// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. /// \param ExitEdges The exit edges of the original loop. /// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. /// \param LoopBlocks A helper for DFS-traversal of the loop. /// \param LVMap A value-map that maps instructions from the original loop to /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes, ScalarEvolution &SE) { … } TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional<bool> UserAllowPeeling, std::optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) { … } /// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. /// Rather, each iteration is peeled off separately, and needs to check the /// exit condition. /// For loops that dynamically execute \p PeelCount iterations or less /// this provides a benefit, since the peeled off iterations, which account /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &LVMap) { … }