//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements some loop unrolling utilities for loops with run-time // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time // trip counts. // // The functions in this file are used to generate extra code when the // run-time trip count modulo the unroll factor is not 0. When this is the // case, we need to generate code to execute these 'left over' iterations. // // The current strategy generates an if-then-else sequence prior to the // unrolled loop to execute the 'left over' iterations before or after the // unrolled loop. // //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <algorithm> usingnamespacellvm; #define DEBUG_TYPE … STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); static cl::opt<bool> UnrollRuntimeMultiExit( "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated")); static cl::opt<bool> UnrollRuntimeOtherExitPredictable( "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable")); // Probability that the loop trip count is so small that after the prolog // we do not enter the unrolled loop at all. // It is unlikely that the loop trip count is smaller than the unroll factor; // other than that, the choice of constant is not tuned yet. static const uint32_t UnrolledLoopHeaderWeights[] = …; // Probability that the loop trip count is so small that we skip the unrolled // loop completely and immediately enter the epilogue loop. // It is unlikely that the loop trip count is smaller than the unroll factor; // other than that, the choice of constant is not tuned yet. static const uint32_t EpilogHeaderWeights[] = …; /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE) { … } /// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Update PHI nodes at the unrolling loop exit and epilog loop exit /// - Create PHI nodes at the unrolling loop exit to combine /// values that exit the unrolling loop code and jump around it. /// - Update PHI operands in the epilog loop by the new PHI nodes /// - Branch around the epilog loop if extra iters (ModVal) is zero. /// static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, unsigned Count) { … } /// Create a clone of the blocks in a loop and connect them together. A new /// loop will be created including all cloned blocks, and the iterator of the /// new loop switched to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// InsertTop should be new preheader, InsertBot new loop exit. /// Returns the new cloned loop that is created. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, unsigned Count) { … } /// Returns true if we can profitably unroll the multi-exit loop L. Currently, /// we return true only if UnrollRuntimeMultiExit is set to true. static bool canProfitablyUnrollMultiExitLoop( Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder) { … } /// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain /// accounting for the possibility of unsigned overflow in the 2s complement /// domain. Preconditions: /// 1) TripCount = BECount + 1 (allowing overflow) /// 2) Log2(Count) <= BitWidth(BECount) static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount, Value *TripCount, unsigned Count) { … } /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// /// This method assumes that the loop unroll factor is total number /// of loop bodies in the loop after unrolling. (Some folks refer /// to the unroll factor as the number of *extra* copies added). /// We assume also that the loop unroll factor is a power-of-two. So, after /// unrolling the loop, the number of loop bodies executed is 2, /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for /// the switch instruction is generated. /// /// ***Prolog case*** /// extraiters = tripcount % loopfactor /// if (extraiters == 0) jump Loop: /// else jump Prol: /// Prol: LoopBody; /// extraiters -= 1 // Omitted if unroll factor is 2. /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. /// if (tripcount < loopfactor) jump End: /// Loop: /// ... /// End: /// /// ***Epilog case*** /// extraiters = tripcount % loopfactor /// if (tripcount < loopfactor) jump LoopExit: /// unroll_iters = tripcount - extraiters /// Loop: LoopBody; (executes unroll_iter times); /// unroll_iter -= 1 /// if (unroll_iter != 0) jump Loop: /// LoopExit: /// if (extraiters == 0) jump EpilExit: /// Epil: LoopBody; (executes extraiters times) /// extraiters -= 1 // Omitted if unroll factor is 2. /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. /// EpilExit: bool llvm::UnrollRuntimeLoopRemainder( Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) { … }