//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This transformation analyzes and transforms the induction variables (and // computations derived from them) into forms suitable for efficient execution // on the target. // // This pass performs a strength reduction on array references inside loops that // have as one or more of their components the loop induction variable, it // rewrites expressions to take advantage of scaled-index addressing modes // available on the target, and it performs a variety of other optimizations // related to loop induction variables. // // Terminology note: this code has a lot of handling for "post-increment" or // "post-inc" users. This is not talking about post-increment addressing modes; // it is instead talking about code like this: // // %i = phi [ 0, %entry ], [ %i.next, %latch ] // ... // %i.next = add %i, 1 // %c = icmp eq %i.next, %n // // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however // it's useful to think about these as the same register, with some uses using // the value of the register before the add and some using it after. In this // example, the icmp is a post-increment user, since it uses %i.next, which is // the value of the induction variable after the increment. The other common // case of post-increment users is users outside the loop. // // TODO: More sophistication in the way Formulae are generated and filtered. // // TODO: Handle multiple loops at a time. // // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead // of a GlobalValue? // // TODO: When truncation is free, truncate ICmp users' operands to make it a // smaller encoding (on x86 at least). // // TODO: When a negated register is used by an add (such as in a list of // multiple base registers, or as the increment expression in an addrec), // we may not actually need both reg and (-1 * reg) in registers; the // negation can be implemented by using a sub instead of an add. The // lack of support for taking this into consideration when making // register pressure decisions is partly worked around by the "Special" // use kind. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> #include <iterator> #include <limits> #include <map> #include <numeric> #include <optional> #include <utility> usingnamespacellvm; #define DEBUG_TYPE … /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for /// bail out. This threshold is far beyond the number of users that LSR can /// conceivably solve, so it should not affect generated code, but catches the /// worst cases before LSR burns too much compile time and stack space. static const unsigned MaxIVUsers = …; /// Limit the size of expression that SCEV-based salvaging will attempt to /// translate into a DIExpression. /// Choose a maximum size such that debuginfo is not excessively increased and /// the salvaging is not too expensive for the compiler. static const unsigned MaxSCEVSalvageExpressionSize = …; // Cleanup congruent phis after LSR phi expansion. static cl::opt<bool> EnablePhiElim( "enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination")); // The flag adds instruction count to solutions cost comparison. static cl::opt<bool> InsnsCost( "lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model")); // Flag to choose how to narrow complex lsr solution static cl::opt<bool> LSRExpNarrow( "lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number")); // Flag to narrow search space by filtering non-optimal formulae with // the same ScaledReg and Scale. static cl::opt<bool> FilterSameScaledReg( "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale")); static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode( "lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"))); static cl::opt<unsigned> ComplexityLimit( "lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits<uint16_t>::max()), cl::desc("LSR search space complexity limit")); static cl::opt<unsigned> SetupCostDepthLimit( "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost")); static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable( "lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable")); static cl::opt<bool> EnableVScaleImmediates( "lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR")); static cl::opt<bool> DropScaledForVScale( "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing")); #ifndef NDEBUG // Stress test IV chain generation. static cl::opt<bool> StressIVChain( "stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains")); #else static bool StressIVChain = …; #endif namespace { struct MemAccessTy { … }; /// This class holds data which is used to order reuse candidates. class RegSortData { … }; // An offset from an address that is either scalable or fixed. Used for // per-target optimizations of addressing modes. class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> { … }; // This is needed for the Compare type of std::map when Immediate is used // as a key. We don't need it to be fully correct against any value of vscale, // just to make sure that vscale-related terms in the map are considered against // each other rather than being mixed up and potentially missing opportunities. struct KeyOrderTargetImmediate { … }; // This would be nicer if we could be generic instead of directly using size_t, // but there doesn't seem to be a type trait for is_orderable or // is_lessthan_comparable or similar. struct KeyOrderSizeTAndImmediate { … }; } // end anonymous namespace #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } LLVM_DUMP_METHOD void RegSortData::dump() const { print(errs()); errs() << '\n'; } #endif namespace { /// Map register candidates to information about how they are used. class RegUseTracker { … }; } // end anonymous namespace void RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) { … } void RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) { … } void RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) { … } bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { … } const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { … } void RegUseTracker::clear() { … } namespace { /// This class holds information that describes a formula for computing /// satisfying a use. It may include broken-out immediates and scaled registers. struct Formula { … }; } // end anonymous namespace /// Recursion helper for initialMatch. static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl<const SCEV *> &Good, SmallVectorImpl<const SCEV *> &Bad, ScalarEvolution &SE) { … } /// Incorporate loop-variant parts of S into this Formula, attempting to keep /// all loop-invariant and loop-computable values in a single base register. void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { … } static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) { … } /// Check whether or not this formula satisfies the canonical /// representation. /// \see Formula::BaseRegs. bool Formula::isCanonical(const Loop &L) const { … } /// Helper method to morph a formula into its canonical representation. /// \see Formula::BaseRegs. /// Every formula having more than one base register, must use the ScaledReg /// field. Otherwise, we would have to do special cases everywhere in LSR /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... /// On the other hand, 1*reg should be canonicalized into reg. void Formula::canonicalize(const Loop &L) { … } /// Get rid of the scale in the formula. /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. /// \return true if it was possible to get rid of the scale, false otherwise. /// \note After this operation the formula may not be in the canonical form. bool Formula::unscale() { … } bool Formula::hasZeroEnd() const { … } /// Return the total number of register operands used by this formula. This does /// not include register uses implied by non-constant addrec strides. size_t Formula::getNumRegs() const { … } /// Return the type of this formula, if it has one, or null otherwise. This type /// is meaningless except for the bit size. Type *Formula::getType() const { … } /// Delete the given base reg from the BaseRegs list. void Formula::deleteBaseReg(const SCEV *&S) { … } /// Test if this formula references the given register. bool Formula::referencesReg(const SCEV *S) const { … } /// Test whether this formula uses registers which are used by uses other than /// the use with the given index. bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, const RegUseTracker &RegUses) const { … } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void Formula::print(raw_ostream &OS) const { bool First = true; if (BaseGV) { if (!First) OS << " + "; else First = false; BaseGV->printAsOperand(OS, /*PrintType=*/false); } if (BaseOffset.isNonZero()) { if (!First) OS << " + "; else First = false; OS << BaseOffset; } for (const SCEV *BaseReg : BaseRegs) { if (!First) OS << " + "; else First = false; OS << "reg(" << *BaseReg << ')'; } if (HasBaseReg && BaseRegs.empty()) { if (!First) OS << " + "; else First = false; OS << "**error: HasBaseReg**"; } else if (!HasBaseReg && !BaseRegs.empty()) { if (!First) OS << " + "; else First = false; OS << "**error: !HasBaseReg**"; } if (Scale != 0) { if (!First) OS << " + "; else First = false; OS << Scale << "*reg("; if (ScaledReg) OS << *ScaledReg; else OS << "<unknown>"; OS << ')'; } if (UnfoldedOffset.isNonZero()) { if (!First) OS << " + "; OS << "imm(" << UnfoldedOffset << ')'; } } LLVM_DUMP_METHOD void Formula::dump() const { print(errs()); errs() << '\n'; } #endif /// Return true if the given addrec can be sign-extended without changing its /// value. static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { … } /// Return true if the given add can be sign-extended without changing its /// value. static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { … } /// Return true if the given mul can be sign-extended without changing its /// value. static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { … } /// Return an expression for LHS /s RHS, if it can be determined and if the /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits /// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that /// the multiplication may overflow, which is useful when the result will be /// used in a context where the most significant bits are ignored. static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits = false) { … } /// If S involves the addition of a constant integer value, return that integer /// value, and mutate S to point to a new SCEV with that value excluded. static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { … } /// If S involves the addition of a GlobalValue address, return that symbol, and /// mutate S to point to a new SCEV with that value excluded. static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { … } /// Returns true if the specified instruction is using the specified value as an /// address. static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal) { … } /// Return the type of the memory being accessed. static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal) { … } /// Return true if this AddRec is already a phi in its loop. static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { … } /// Check if expanding this expression is likely to incur significant cost. This /// is tricky because SCEV doesn't track which expressions are actually computed /// by the current IR. /// /// We currently allow expansion of IV increments that involve adds, /// multiplication by constants, and AddRecs from existing phis. /// /// TODO: Allow UDivExpr if we can find an existing IV increment that is an /// obvious multiple of the UDivExpr. static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl<const SCEV*> &Processed, ScalarEvolution &SE) { … } namespace { class LSRUse; } // end anonymous namespace /// Check if the addressing mode defined by \p F is completely /// folded in \p LU at isel time. /// This includes address-mode folding and special icmp tricks. /// This function returns true if \p LU can accommodate what \p F /// defines and up to 1 base + 1 scaled + offset. /// In other words, if \p F has several base registers, this function may /// still return true. Therefore, users still need to account for /// additional base registers and/or unfolded offsets to derive an /// accurate cost model. static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F); // Get the cost of the scaling factor used in F for LU. static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L); namespace { /// This class is used to measure and compare candidate formulae. class Cost { … }; /// An operand value in an instruction which is to be replaced with some /// equivalent, possibly strength-reduced, replacement. struct LSRFixup { … }; /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted /// SmallVectors of const SCEV*. struct UniquifierDenseMapInfo { … }; /// This class holds the state that LSR keeps for each use in IVUsers, as well /// as uses invented by LSR itself. It includes information about what kinds of /// things can be folded into the user, information about the user itself, and /// information about how the use may be satisfied. TODO: Represent multiple /// users of the same expression in common? class LSRUse { … }; } // end anonymous namespace static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale, Instruction *Fixup = nullptr); static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { … } /// Tally up interesting quantities from the given register. void Cost::RateRegister(const Formula &F, const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs) { … } /// Record this register in the set. If we haven't seen it before, rate /// it. Optional LoserRegs provides a way to declare any formula that refers to /// one of those regs an instant loser. void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg, SmallPtrSetImpl<const SCEV *> &Regs, SmallPtrSetImpl<const SCEV *> *LoserRegs) { … } void Cost::RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs, const DenseSet<const SCEV *> &VisitedRegs, const LSRUse &LU, SmallPtrSetImpl<const SCEV *> *LoserRegs) { … } /// Set this cost to a losing value. void Cost::Lose() { … } /// Choose the lower cost. bool Cost::isLess(const Cost &Other) const { … } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void Cost::print(raw_ostream &OS) const { if (InsnsCost) OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); if (C.AddRecCost != 0) OS << ", with addrec cost " << C.AddRecCost; if (C.NumIVMuls != 0) OS << ", plus " << C.NumIVMuls << " IV mul" << (C.NumIVMuls == 1 ? "" : "s"); if (C.NumBaseAdds != 0) OS << ", plus " << C.NumBaseAdds << " base add" << (C.NumBaseAdds == 1 ? "" : "s"); if (C.ScaleCost != 0) OS << ", plus " << C.ScaleCost << " scale cost"; if (C.ImmCost != 0) OS << ", plus " << C.ImmCost << " imm cost"; if (C.SetupCost != 0) OS << ", plus " << C.SetupCost << " setup cost"; } LLVM_DUMP_METHOD void Cost::dump() const { print(errs()); errs() << '\n'; } #endif /// Test whether this fixup always uses its value outside of the given loop. bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { … } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LSRFixup::print(raw_ostream &OS) const { OS << "UserInst="; // Store is common and interesting enough to be worth special-casing. if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) { OS << "store "; Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); } else if (UserInst->getType()->isVoidTy()) OS << UserInst->getOpcodeName(); else UserInst->printAsOperand(OS, /*PrintType=*/false); OS << ", OperandValToReplace="; OperandValToReplace->printAsOperand(OS, /*PrintType=*/false); for (const Loop *PIL : PostIncLoops) { OS << ", PostIncLoop="; PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false); } if (Offset.isNonZero()) OS << ", Offset=" << Offset; } LLVM_DUMP_METHOD void LSRFixup::dump() const { print(errs()); errs() << '\n'; } #endif /// Test whether this use as a formula which has the same registers as the given /// formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { … } /// The function returns a probability of selecting formula without Reg. float LSRUse::getNotSelectedProbability(const SCEV *Reg) const { … } /// If the given formula has not yet been inserted, add it to the list, and /// return true. Return false otherwise. The formula must be in canonical form. bool LSRUse::InsertFormula(const Formula &F, const Loop &L) { … } /// Remove the given formula from this use's list. void LSRUse::DeleteFormula(Formula &F) { … } /// Recompute the Regs field, and update RegUses. void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { … } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LSRUse::print(raw_ostream &OS) const { OS << "LSR Use: Kind="; switch (Kind) { case Basic: OS << "Basic"; break; case Special: OS << "Special"; break; case ICmpZero: OS << "ICmpZero"; break; case Address: OS << "Address of "; if (AccessTy.MemTy->isPointerTy()) OS << "pointer"; // the full pointer type could be really verbose else { OS << *AccessTy.MemTy; } OS << " in addrspace(" << AccessTy.AddrSpace << ')'; } OS << ", Offsets={"; bool NeedComma = false; for (const LSRFixup &Fixup : Fixups) { if (NeedComma) OS << ','; OS << Fixup.Offset; NeedComma = true; } OS << '}'; if (AllFixupsOutsideLoop) OS << ", all-fixups-outside-loop"; if (WidestFixupType) OS << ", widest fixup type: " << *WidestFixupType; } LLVM_DUMP_METHOD void LSRUse::dump() const { print(errs()); errs() << '\n'; } #endif static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale, Instruction *Fixup /* = nullptr */) { … } static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale) { … } static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, const Formula &F, const Loop &L) { … } /// Test whether we know how to expand the current formula. static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale) { … } static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, const Formula &F) { … } static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset) { … } static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { … } static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L) { … } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg) { … } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, ScalarEvolution &SE, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, const SCEV *S, bool HasBaseReg) { … } namespace { /// An individual increment in a Chain of IV increments. Relate an IV user to /// an expression that computes the IV it uses from the IV used by the previous /// link in the Chain. /// /// For the head of a chain, IncExpr holds the absolute SCEV expression for the /// original IVOperand. The head of the chain's IVOperand is only valid during /// chain collection, before LSR replaces IV users. During chain generation, /// IncExpr can be used to find the new IVOperand that computes the same /// expression. struct IVInc { … }; // The list of IV increments in program order. We typically add the head of a // chain without finding subsequent links. struct IVChain { … }; /// Helper for CollectChains to track multiple IV increment uses. Distinguish /// between FarUsers that definitely cross IV increments and NearUsers that may /// be used between IV increments. struct ChainUsers { … }; /// This class holds state for the main loop strength reduction logic. class LSRInstance { … }; } // end anonymous namespace /// If IV is used in a int-to-float cast inside the loop then try to eliminate /// the cast operation. void LSRInstance::OptimizeShadowIV() { … } /// If Cond has an operand that is an expression of an IV, set the IV user and /// stride information and return true, otherwise return false. bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { … } /// Rewrite the loop's terminating condition if it uses a max computation. /// /// This is a narrow solution to a specific, but acute, problem. For loops /// like this: /// /// i = 0; /// do { /// p[i] = 0.0; /// } while (++i < n); /// /// the trip count isn't just 'n', because 'n' might not be positive. And /// unfortunately this can come up even for loops where the user didn't use /// a C do-while loop. For example, seemingly well-behaved top-test loops /// will commonly be lowered like this: /// /// if (n > 0) { /// i = 0; /// do { /// p[i] = 0.0; /// } while (++i < n); /// } /// /// and then it's possible for subsequent optimization to obscure the if /// test in such a way that indvars can't find it. /// /// When indvars can't find the if test in loops like this, it creates a /// max expression, which allows it to give the loop a canonical /// induction variable: /// /// i = 0; /// max = n < 1 ? 1 : n; /// do { /// p[i] = 0.0; /// } while (++i != max); /// /// Canonical induction variables are necessary because the loop passes /// are designed around them. The most obvious example of this is the /// LoopInfo analysis, which doesn't remember trip count values. It /// expects to be able to rediscover the trip count each time it is /// needed, and it does this using a simple analysis that only succeeds if /// the loop has a canonical induction variable. /// /// However, when it comes time to generate code, the maximum operation /// can be quite costly, especially if it's inside of an outer loop. /// /// This function solves this problem by detecting this type of loop and /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting /// the instructions for the maximum computation. ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { … } /// Change loop terminating condition to use the postinc iv when possible. void LSRInstance::OptimizeLoopTermCond() { … } /// Determine if the given use can accommodate a fixup at the given offset and /// other details. If so, update the use and return true. bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg, LSRUse::KindType Kind, MemAccessTy AccessTy) { … } /// Return an LSRUse index and an offset value for a fixup which needs the given /// expression, with the given kind and optional access type. Either reuse an /// existing use or create a new one, as needed. std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr, LSRUse::KindType Kind, MemAccessTy AccessTy) { … } /// Delete the given use from the Uses list. void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { … } /// Look for a use distinct from OrigLU which is has a formula that has the same /// registers as the given formula. LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, const LSRUse &OrigLU) { … } void LSRInstance::CollectInterestingTypesAndFactors() { … } /// Helper for CollectChains that finds an IV operand (computed by an AddRec in /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to /// IVStrideUses, we could partially skip this. static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE) { … } /// IVChain logic must consistently peek base TruncInst operands, so wrap it in /// a convenient helper. static Value *getWideOperand(Value *Oper) { … } /// Return an approximation of this SCEV expression's "base", or NULL for any /// constant. Returning the expression itself is conservative. Returning a /// deeper subexpression is more precise and valid as long as it isn't less /// complex than another subexpression. For expressions involving multiple /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i], /// IVInc==b-a. /// /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost /// SCEVUnknown, we simply return the rightmost SCEV operand. static const SCEV *getExprBase(const SCEV *S) { … } /// Return true if the chain increment is profitable to expand into a loop /// invariant value, which may require its own register. A profitable chain /// increment will be an offset relative to the same base. We allow such offsets /// to potentially be used as chain increment as long as it's not obviously /// expensive to expand using real instructions. bool IVChain::isProfitableIncrement(const SCEV *OperExpr, const SCEV *IncExpr, ScalarEvolution &SE) { … } /// Return true if the number of registers needed for the chain is estimated to /// be less than the number required for the individual IV users. First prohibit /// any IV users that keep the IV live across increments (the Users set should /// be empty). Next count the number and type of increments in the chain. /// /// Chaining IVs can lead to considerable code bloat if ISEL doesn't /// effectively use postinc addressing modes. Only consider it profitable it the /// increments can be computed in fewer registers when chained. /// /// TODO: Consider IVInc free if it's already used in another chains. static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction *> &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI) { … } /// Add this IV user to an existing chain or make it the head of a new chain. void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, SmallVectorImpl<ChainUsers> &ChainUsersVec) { … } /// Populate the vector of Chains. /// /// This decreases ILP at the architecture level. Targets with ample registers, /// multiple memory ports, and no register renaming probably don't want /// this. However, such targets should probably disable LSR altogether. /// /// The job of LSR is to make a reasonable choice of induction variables across /// the loop. Subsequent passes can easily "unchain" computation exposing more /// ILP *within the loop* if the target wants it. /// /// Finding the best IV chain is potentially a scheduling problem. Since LSR /// will not reorder memory operations, it will recognize this as a chain, but /// will generate redundant IV increments. Ideally this would be corrected later /// by a smart scheduler: /// = A[i] /// = A[i+x] /// A[i] = /// A[i+x] = /// /// TODO: Walk the entire domtree within this loop, not just the path to the /// loop latch. This will discover chains on side paths, but requires /// maintaining multiple copies of the Chains state. void LSRInstance::CollectChains() { … } void LSRInstance::FinalizeChain(IVChain &Chain) { … } /// Return true if the IVInc can be folded into an addressing mode. static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI) { … } /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. void LSRInstance::GenerateIVChain(const IVChain &Chain, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { … } void LSRInstance::CollectFixupsAndInitialFormulae() { … } /// Insert a formula for the given expression into the given use, separating out /// loop-variant portions from loop-invariant and loop-computable portions. void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { … } /// Insert a simple single-register formula for the given expression into the /// given use. void LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { … } /// Note which registers are used by the given formula, updating RegUses. void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { … } /// If the given formula has not yet been inserted, add it to the list, and /// return true. Return false otherwise. bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { … } /// Check for other uses of loop-invariant values which we're tracking. These /// other uses will pin these values in registers, making them less profitable /// for elimination. /// TODO: This currently misses non-constant addrec step registers. /// TODO: Should this give more weight to users inside the loop? void LSRInstance::CollectLoopInvariantFixupsAndFormulae() { … } /// Split S into subexpressions which can be pulled out into separate /// registers. If C is non-null, multiply each subexpression by C. /// /// Return remainder expression after factoring the subexpressions captured by /// Ops. If Ops is complete, return NULL. static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth = 0) { … } /// Return true if the SCEV represents a value that may end up as a /// post-increment operation. static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE) { … } /// Helper function for LSRInstance::GenerateReassociations. void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, const Formula &Base, unsigned Depth, size_t Idx, bool IsScaledReg) { … } /// Split out subexpressions from adds and the bases of addrecs. void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, unsigned Depth) { … } /// Generate a formula consisting of all of the loop-dominating registers added /// into a single register. void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base) { … } /// Helper function for LSRInstance::GenerateSymbolicOffsets. void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, const Formula &Base, size_t Idx, bool IsScaledReg) { … } /// Generate reuse formulae using symbolic offsets. void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { … } /// Helper function for LSRInstance::GenerateConstantOffsets. void LSRInstance::GenerateConstantOffsetsImpl( LSRUse &LU, unsigned LUIdx, const Formula &Base, const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) { … } /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { … } /// For ICmpZero, check to see if we can scale up the comparison. For example, x /// == y -> x*c == y*c. void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base) { … } /// Generate stride factor reuse formulae by making use of scaled-offset address /// modes, for example. void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { … } /// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops. /// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then /// perform the extension/truncate and normalize again, as the normalized form /// can result in folds that are not valid in the post-inc use contexts. The /// expressions for all PostIncLoopSets must match, otherwise return nullptr. static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE) { … } /// Generate reuse formulae from different IV types. void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { … } namespace { /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer /// modifications so that the search phase doesn't have to worry about the data /// structures moving underneath it. struct WorkItem { … }; } // end anonymous namespace #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void WorkItem::print(raw_ostream &OS) const { OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx << " , add offset " << Imm; } LLVM_DUMP_METHOD void WorkItem::dump() const { print(errs()); errs() << '\n'; } #endif /// Look for registers which are a constant distance apart and try to form reuse /// opportunities between them. void LSRInstance::GenerateCrossUseConstantOffsets() { … } /// Generate formulae for each use. void LSRInstance::GenerateAllReuseFormulae() { … } /// If there are multiple formulae with the same set of registers used /// by other uses, pick the best one and delete the others. void LSRInstance::FilterOutUndesirableDedicatedRegisters() { … } /// Estimate the worst-case number of solutions the solver might have to /// consider. It almost never considers this many solutions because it prune the /// search space, but the pruning isn't always sufficient. size_t LSRInstance::EstimateSearchSpaceComplexity() const { … } /// When one formula uses a superset of the registers of another formula, it /// won't help reduce register pressure (though it may not necessarily hurt /// register pressure); remove it to simplify the system. void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { … } /// When there are many registers for expressions like A, A+1, A+2, etc., /// allocate a single register for them. void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { … } /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that /// we've done more filtering, as it may be able to find more formulae to /// eliminate. void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ … } /// If a LSRUse has multiple formulae with the same ScaledReg and Scale. /// Pick the best one and delete the others. /// This narrowing heuristic is to keep as many formulae with different /// Scale and ScaledReg pair as possible while narrowing the search space. /// The benefit is that it is more likely to find out a better solution /// from a formulae set with more Scale and ScaledReg variations than /// a formulae set with the same Scale and ScaledReg. The picking winner /// reg heuristic will often keep the formulae with the same Scale and /// ScaledReg and filter others, and we want to avoid that if possible. void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { … } /// If we are over the complexity limit, filter out any post-inc prefering /// variables to only post-inc values. void LSRInstance::NarrowSearchSpaceByFilterPostInc() { … } /// The function delete formulas with high registers number expectation. /// Assuming we don't know the value of each formula (already delete /// all inefficient), generate probability of not selecting for each /// register. /// For example, /// Use1: /// reg(a) + reg({0,+,1}) /// reg(a) + reg({-1,+,1}) + 1 /// reg({a,+,1}) /// Use2: /// reg(b) + reg({0,+,1}) /// reg(b) + reg({-1,+,1}) + 1 /// reg({b,+,1}) /// Use3: /// reg(c) + reg(b) + reg({0,+,1}) /// reg(c) + reg({b,+,1}) /// /// Probability of not selecting /// Use1 Use2 Use3 /// reg(a) (1/3) * 1 * 1 /// reg(b) 1 * (1/3) * (1/2) /// reg({0,+,1}) (2/3) * (2/3) * (1/2) /// reg({-1,+,1}) (2/3) * (2/3) * 1 /// reg({a,+,1}) (2/3) * 1 * 1 /// reg({b,+,1}) 1 * (2/3) * (2/3) /// reg(c) 1 * 1 * 0 /// /// Now count registers number mathematical expectation for each formula: /// Note that for each use we exclude probability if not selecting for the use. /// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding /// probabilty 1/3 of not selecting for Use1). /// Use1: /// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted /// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted /// reg({a,+,1}) 1 /// Use2: /// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted /// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted /// reg({b,+,1}) 2/3 /// Use3: /// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted /// reg(c) + reg({b,+,1}) 1 + 2/3 void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { … } // Check if Best and Reg are SCEVs separated by a constant amount C, and if so // would the addressing offset +C would be legal where the negative offset -C is // not. static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType) { … } /// Pick a register which seems likely to be profitable, and then in any use /// which has any reference to that register, delete all formulae which do not /// reference that register. void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { … } /// If there are an extraordinary number of formulae to choose from, use some /// rough heuristics to prune down the number of formulae. This keeps the main /// solver from taking an extraordinary amount of time in some worst-case /// scenarios. void LSRInstance::NarrowSearchSpaceUsingHeuristics() { … } /// This is the recursive solver. void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, Cost &SolutionCost, SmallVectorImpl<const Formula *> &Workspace, const Cost &CurCost, const SmallPtrSet<const SCEV *, 16> &CurRegs, DenseSet<const SCEV *> &VisitedRegs) const { … } /// Choose one formula from each use. Return the results in the given Solution /// vector. void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { … } /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as /// we can go while still being dominated by the input positions. This helps /// canonicalize the insert position, which encourages sharing. BasicBlock::iterator LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl<Instruction *> &Inputs) const { … } /// Determine an input position which will be dominated by the operands and /// which will dominate the result. BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand( BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const { … } /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { … } /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { … } /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { … } // Trying to hoist the IVInc to loop header if all IVInc users are in // the loop header. It will help backend to generate post index load/store // when the latch block is different from loop header block. static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L) { … } /// Rewrite all the fixup locations with new values, following the chosen /// solution. void LSRInstance::ImplementSolution( const SmallVectorImpl<const Formula *> &Solution) { … } LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) : … { … } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LSRInstance::print_factors_and_types(raw_ostream &OS) const { if (Factors.empty() && Types.empty()) return; OS << "LSR has identified the following interesting factors and types: "; bool First = true; for (int64_t Factor : Factors) { if (!First) OS << ", "; First = false; OS << '*' << Factor; } for (Type *Ty : Types) { if (!First) OS << ", "; First = false; OS << '(' << *Ty << ')'; } OS << '\n'; } void LSRInstance::print_fixups(raw_ostream &OS) const { OS << "LSR is examining the following fixup sites:\n"; for (const LSRUse &LU : Uses) for (const LSRFixup &LF : LU.Fixups) { dbgs() << " "; LF.print(OS); OS << '\n'; } } void LSRInstance::print_uses(raw_ostream &OS) const { OS << "LSR is examining the following uses:\n"; for (const LSRUse &LU : Uses) { dbgs() << " "; LU.print(OS); OS << '\n'; for (const Formula &F : LU.Formulae) { OS << " "; F.print(OS); OS << '\n'; } } } void LSRInstance::print(raw_ostream &OS) const { print_factors_and_types(OS); print_fixups(OS); print_uses(OS); } LLVM_DUMP_METHOD void LSRInstance::dump() const { print(errs()); errs() << '\n'; } #endif namespace { class LoopStrengthReduce : public LoopPass { … }; } // end anonymous namespace LoopStrengthReduce::LoopStrengthReduce() : … { … } void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { … } namespace { /// Enables more convenient iteration over a DWARF expression vector. static iterator_range<llvm::DIExpression::expr_op_iterator> ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) { … } struct SCEVDbgValueBuilder { … }; /// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs /// and DIExpression. struct DVIRecoveryRec { … }; } // namespace /// Returns the total number of DW_OP_llvm_arg operands in the expression. /// This helps in determining if a DIArglist is necessary or can be omitted from /// the dbg.value. static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) { … } /// Overwrites DVI with the location and Ops as the DIExpression. This will /// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands, /// because a DIArglist is not created for the first argument of the dbg.value. template <typename T> static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl<uint64_t> &Ops) { … } /// Overwrite DVI with locations placed into a DIArglist. template <typename T> static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl<Value *> &Locations, SmallVectorImpl<uint64_t> &Ops) { … } /// Write the new expression and new location ops for the dbg.value. If possible /// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This /// can be omitted if: /// 1. There is only a single location, refenced by a single DW_OP_llvm_arg. /// 2. The DW_OP_LLVM_arg is the first operand in the expression. static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl<Value *> &NewLocationOps, SmallVectorImpl<uint64_t> &NewExpr) { … } /// Cached location ops may be erased during LSR, in which case a poison is /// required when restoring from the cache. The type of that location is no /// longer available, so just use int8. The poison will be replaced by one or /// more locations later when a SCEVDbgValueBuilder selects alternative /// locations to use for the salvage. static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) { … } /// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values. static void restorePreTransformState(DVIRecoveryRec &DVIRec) { … } static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr) { … } /// Obtain an expression for the iteration count, then attempt to salvage the /// dbg.value intrinsics. static void DbgRewriteSalvageableDVIs( llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) { … } /// Identify and cache salvageable DVI locations and expressions along with the /// corresponding SCEV(s). Also ensure that the DVI is not deleted between /// cacheing and salvaging. static void DbgGatherSalvagableDVI( Loop *L, ScalarEvolution &SE, SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs, SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) { … } /// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback /// any PHi from the loop header is usable, but may have less chance of /// surviving subsequent transforms. static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR) { … } static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA) { … } bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { … } PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { … } char LoopStrengthReduce::ID = …; INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) Pass *llvm::createLoopStrengthReducePass() { … }