//===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass does the inverse transformation of what LICM does. // It traverses all of the instructions in the loop's preheader and sinks // them to the loop body where frequency is lower than the loop's preheader. // This pass is a reverse-transformation of LICM. It differs from the Sink // pass in the following ways: // // * It only handles sinking of instructions from the loop's preheader to the // loop's body // * It uses alias set tracker to get more accurate alias info // * It uses block frequency info to find the optimal sinking locations // // Overall algorithm: // // For I in Preheader: // InsertBBs = BBs that uses I // For BB in sorted(LoopBBs): // DomBBs = BBs in InsertBBs that are dominated by BB // if freq(DomBBs) > freq(BB) // InsertBBs = UseBBs - DomBBs + BB // For BB in InsertBBs: // Insert I at BB's beginning // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopSink.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" usingnamespacellvm; #define DEBUG_TYPE … STATISTIC(NumLoopSunk, "Number of instructions sunk into loop"); STATISTIC(NumLoopSunkCloned, "Number of cloned instructions sunk into loop"); static cl::opt<unsigned> SinkFrequencyPercentThreshold( "sink-freq-percent-threshold", cl::Hidden, cl::init(90), cl::desc("Do not sink instructions that require cloning unless they " "execute less than this percent of the time.")); static cl::opt<unsigned> MaxNumberOfUseBBsForSinking( "max-uses-for-sinking", cl::Hidden, cl::init(30), cl::desc("Do not sink instructions that have too many uses.")); /// Return adjusted total frequency of \p BBs. /// /// * If there is only one BB, sinking instruction will not introduce code /// size increase. Thus there is no need to adjust the frequency. /// * If there are more than one BB, sinking would lead to code size increase. /// In this case, we add some "tax" to the total frequency to make it harder /// to sink. E.g. /// Freq(Preheader) = 100 /// Freq(BBs) = sum(50, 49) = 99 /// Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to /// BBs as the difference is too small to justify the code size increase. /// To model this, The adjusted Freq(BBs) will be: /// AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold% static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs, BlockFrequencyInfo &BFI) { … } /// Return a set of basic blocks to insert sinked instructions. /// /// The returned set of basic blocks (BBsToSinkInto) should satisfy: /// /// * Inside the loop \p L /// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto /// that domintates the UseBB /// * Has minimum total frequency that is no greater than preheader frequency /// /// The purpose of the function is to find the optimal sinking points to /// minimize execution cost, which is defined as "sum of frequency of /// BBsToSinkInto". /// As a result, the returned BBsToSinkInto needs to have minimum total /// frequency. /// Additionally, if the total frequency of BBsToSinkInto exceeds preheader /// frequency, the optimal solution is not sinking (return empty set). /// /// \p ColdLoopBBs is used to help find the optimal sinking locations. /// It stores a list of BBs that is: /// /// * Inside the loop \p L /// * Has a frequency no larger than the loop's preheader /// * Sorted by BB frequency /// /// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()). /// To avoid expensive computation, we cap the maximum UseBBs.size() in its /// caller. static SmallPtrSet<BasicBlock *, 2> findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, DominatorTree &DT, BlockFrequencyInfo &BFI) { … } // Sinks \p I from the loop \p L's preheader to its uses. Returns true if // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. static bool sinkInstruction( Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) { … } /// Sinks instructions from loop's preheader to the loop body if the /// sum frequency of inserted copy is smaller than preheader's frequency. static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSA &MSSA, ScalarEvolution *SE) { … } PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { … }