//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Transform each threading path to effectively jump thread the DFA. For // example, the CFG below could be transformed as follows, where the cloned // blocks unconditionally branch to the next correct case based on what is // identified in the analysis. // // sw.bb sw.bb // / | \ / | \ // case1 case2 case3 case1 case2 case3 // \ | / | | | // determinator det.2 det.3 det.1 // br sw.bb / | \ // sw.bb.2 sw.bb.3 sw.bb.1 // br case2 br case3 br case1§ // // Definitions and Terminology: // // * Threading path: // a list of basic blocks, the exit state, and the block that determines // the next state, for which the following notation will be used: // < path of BBs that form a cycle > [ state, determinator ] // // * Predictable switch: // The switch variable is always a known constant so that all conditional // jumps based on switch variable can be converted to unconditional jump. // // * Determinator: // The basic block that determines the next state of the DFA. // // Representing the optimization in C-like pseudocode: the code pattern on the // left could functionally be transformed to the right pattern if the switch // condition is predictable. // // X = A goto A // for (...) A: // switch (X) ... // case A goto B // X = B B: // case B ... // X = C goto C // // The pass first checks that switch variable X is decided by the control flow // path taken in the loop; for example, in case B, the next value of X is // decided to be C. It then enumerates through all paths in the loop and labels // the basic blocks where the next state is decided. // // Using this information it creates new paths that unconditionally branch to // the next case. This involves cloning code, so it only gets triggered if the // amount of code duplicated is below a threshold. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/DFAJumpThreading.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <deque> #ifdef EXPENSIVE_CHECKS #include "llvm/IR/Verifier.h" #endif usingnamespacellvm; #define DEBUG_TYPE … STATISTIC(NumTransforms, "Number of transformations done"); STATISTIC(NumCloned, "Number of blocks cloned"); STATISTIC(NumPaths, "Number of individual paths threaded"); static cl::opt<bool> ClViewCfgBefore("dfa-jump-view-cfg-before", cl::desc("View the CFG before DFA Jump Threading"), cl::Hidden, cl::init(false)); static cl::opt<bool> EarlyExitHeuristic( "dfa-early-exit-heuristic", cl::desc("Exit early if an unpredictable value come from the same loop"), cl::Hidden, cl::init(true)); static cl::opt<unsigned> MaxPathLength( "dfa-max-path-length", cl::desc("Max number of blocks searched to find a threading path"), cl::Hidden, cl::init(20)); static cl::opt<unsigned> MaxNumVisitiedPaths( "dfa-max-num-visited-paths", cl::desc( "Max number of blocks visited while enumerating paths around a switch"), cl::Hidden, cl::init(2000)); static cl::opt<unsigned> MaxNumPaths("dfa-max-num-paths", cl::desc("Max number of paths enumerated around a switch"), cl::Hidden, cl::init(200)); static cl::opt<unsigned> CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50)); namespace { class SelectInstToUnfold { … }; void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, std::vector<SelectInstToUnfold> *NewSIsToUnfold, std::vector<BasicBlock *> *NewBBs); class DFAJumpThreading { … }; } // end anonymous namespace namespace { /// Unfold the select instruction held in \p SIToUnfold by replacing it with /// control flow. /// /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly /// created basic blocks into \p NewBBs. /// /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, std::vector<SelectInstToUnfold> *NewSIsToUnfold, std::vector<BasicBlock *> *NewBBs) { … } struct ClonedBlock { … }; PathType; PathsType; VisitedBlocks; CloneList; // This data structure keeps track of all blocks that have been cloned. If two // different ThreadingPaths clone the same block for a certain state it should // be reused, and it can be looked up in this map. DuplicateBlockMap; // This map keeps track of all the new definitions for an instruction. This // information is needed when restoring SSA form after cloning blocks. DefMap; inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { … } /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with /// unconditional ones. A threading path includes a list of basic blocks, the /// exit state, and the block that determines the next state. struct ThreadingPath { … }; #ifndef NDEBUG inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { TPath.print(OS); return OS; } #endif struct MainSwitch { … }; struct AllSwitchPaths { … }; struct TransformDFA { … }; bool DFAJumpThreading::run(Function &F) { … } } // end anonymous namespace /// Integrate with the new Pass Manager PreservedAnalyses DFAJumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { … }