//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This file contains the declarations of the Vectorization Plan base classes: /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual /// VPBlockBase, together implementing a Hierarchical CFG; /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained /// within VPBasicBlocks; /// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that /// also inherit from VPValue. /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned /// instruction; /// 5. The VPlan class holding a candidate for vectorization; /// 6. The VPlanPrinter class providing a way to print a plan in dot format; /// These are documented in docs/VectorizationPlan.rst. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #include "VPlanAnalysis.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/FMF.h" #include "llvm/IR/Operator.h" #include "llvm/Support/InstructionCost.h" #include <algorithm> #include <cassert> #include <cstddef> #include <string> namespace llvm { class BasicBlock; class DominatorTree; class InnerLoopVectorizer; class IRBuilderBase; class LoopInfo; class raw_ostream; class RecurrenceDescriptor; class SCEV; class Type; class VPBasicBlock; class VPRegionBlock; class VPlan; class VPReplicateRecipe; class VPlanSlp; class Value; class LoopVectorizationCostModel; class LoopVersioning; struct VPCostContext; namespace Intrinsic { ID; } /// Returns a calculation for the total number of elements for a given \p VF. /// For fixed width vectors this value is a constant, whereas for scalable /// vectors it is an expression determined at runtime. Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF); /// Return a value for Step multiplied by VF. Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step); /// A helper function that returns the reciprocal of the block probability of /// predicated blocks. If we return X, we are assuming the predicated block /// will execute once for every X iterations of the loop header. /// /// TODO: We should use actual block probability here, if available. Currently, /// we always assume predicated blocks have a 50% chance of executing. inline unsigned getReciprocalPredBlockProb() { … } /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 16) = {1, 2, 4, 8} struct VFRange { … }; VPlanPtr; /// In what follows, the term "input IR" refers to code that is fed into the /// vectorizer whereas the term "output IR" refers to code that is generated by /// the vectorizer. /// VPLane provides a way to access lanes in both fixed width and scalable /// vectors, where for the latter the lane index sometimes needs calculating /// as a runtime expression. class VPLane { … }; /// VPTransformState holds information passed down when "executing" a VPlan, /// needed for generating the output IR. struct VPTransformState { … }; /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. class VPBlockBase { … }; /// A value that is used outside the VPlan. The operand of the user needs to be /// added to the associated phi node. The incoming block from VPlan is /// determined by where the VPValue is defined: if it is defined by a recipe /// outside a region, its parent block is used, otherwise the middle block is /// used. class VPLiveOut : public VPUser { … }; /// Struct to hold various analysis needed for cost computations. struct VPCostContext { … }; /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value /// recipes must inherit from VPSingleDef instead of inheriting from both /// VPRecipeBase and VPValue separately. class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, public VPDef, public VPUser { … }; // Helper macro to define common classof implementations for recipes. #define VP_CLASSOF_IMPL(VPDefID) … /// VPSingleDef is a base class for recipes for modeling a sequence of one or /// more output IR that define a single result VPValue. /// Note that VPRecipeBase must be inherited from before VPValue. class VPSingleDefRecipe : public VPRecipeBase, public VPValue { … }; /// Class to record LLVM IR flag for a recipe along with it. class VPRecipeWithIRFlags : public VPSingleDefRecipe { … }; /// Helper to access the operand that contains the unroll part for this recipe /// after unrolling. template <unsigned PartOpIdx> class VPUnrollPartAccessor { … }; /// This is a concrete Recipe that models a single VPlan-level instruction. /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as /// the VPInstruction is also a single def-use vertex. class VPInstruction : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<1> { … }; /// A recipe to wrap on original IR instruction not to be modified during /// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed, /// and it is used to add a new incoming value for the single predecessor VPBB. /// Expect PHIs, VPIRInstructions cannot have any operands. class VPIRInstruction : public VPRecipeBase { … }; /// VPWidenRecipe is a recipe for producing a widened instruction using the /// opcode and operands of the recipe. This recipe covers most of the /// traditional vectorization cases where each recipe transforms into a /// vectorized version of itself. class VPWidenRecipe : public VPRecipeWithIRFlags { … }; /// A recipe for widening operations with vector-predication intrinsics with /// explicit vector length (EVL). class VPWidenEVLRecipe : public VPWidenRecipe { … }; /// VPWidenCastRecipe is a recipe to create vector cast instructions. class VPWidenCastRecipe : public VPRecipeWithIRFlags { … }; /// VPScalarCastRecipe is a recipe to create scalar cast instructions. class VPScalarCastRecipe : public VPSingleDefRecipe { … }; /// A recipe for widening vector intrinsics. class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { … }; /// A recipe for widening Call instructions using library calls. class VPWidenCallRecipe : public VPRecipeWithIRFlags { … }; /// A recipe representing a sequence of load -> update -> store as part of /// a histogram operation. This means there may be aliasing between vector /// lanes, which is handled by the llvm.experimental.vector.histogram family /// of intrinsics. The only update operations currently supported are /// 'add' and 'sub' where the other term is loop-invariant. class VPHistogramRecipe : public VPRecipeBase { … }; /// A recipe for widening select instructions. struct VPWidenSelectRecipe : public VPSingleDefRecipe { … }; /// A recipe for handling GEP instructions. class VPWidenGEPRecipe : public VPRecipeWithIRFlags { … }; /// A recipe to compute the pointers for widened memory accesses of IndexTy for /// all parts. If IsReverse is true, compute pointers for accessing the input in /// reverse order per part. class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<1> { … }; /// A pure virtual base class for all recipes modeling header phis, including /// phis for first order recurrences, pointer inductions and reductions. The /// start value is the first operand of the recipe and the incoming value from /// the backedge is the second operand. /// /// Inductions are modeled using the following sub-classes: /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop, /// starting at a specified value (zero for the main vector loop, the resume /// value for the epilogue vector loop) and stepping by 1. The induction /// controls exiting of the vector loop by comparing against the vector trip /// count. Produces a single scalar PHI for the induction value per /// iteration. /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and /// floating point inductions with arbitrary start and step values. Produces /// a vector PHI per-part. /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding /// value of an IV with different start and step values. Produces a single /// scalar value per iteration /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a /// canonical or derived induction. /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a /// pointer induction. Produces either a vector PHI per-part or scalar values /// per-lane based on the canonical induction. class VPHeaderPHIRecipe : public VPSingleDefRecipe { … }; /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their vector values. class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { … }; class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe, public VPUnrollPartAccessor<3> { … }; /// A recipe for handling phis that are widened in the vector loop. /// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are /// managed in the recipe directly. class VPWidenPHIRecipe : public VPSingleDefRecipe { … }; /// A recipe for handling first-order recurrence phis. The start value is the /// first operand of the recipe and the incoming value from the backedge is the /// second operand. struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { … }; /// A recipe for handling reduction phis. The start value is the first operand /// of the recipe and the incoming value from the backedge is the second /// operand. class VPReductionPHIRecipe : public VPHeaderPHIRecipe, public VPUnrollPartAccessor<2> { … }; /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. class VPBlendRecipe : public VPSingleDefRecipe { … }; /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. The first operand of a /// VPInterleave recipe is the address, followed by the stored values, followed /// by an optional mask. class VPInterleaveRecipe : public VPRecipeBase { … }; /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. /// The Operands are {ChainOp, VecOp, [Condition]}. class VPReductionRecipe : public VPSingleDefRecipe { … }; /// A recipe to represent inloop reduction operations with vector-predication /// intrinsics, performing a reduction on a vector operand with the explicit /// vector length (EVL) into a scalar value, and adding the result to a chain. /// The Operands are {ChainOp, VecOp, EVL, [Condition]}. class VPReductionEVLRecipe : public VPReductionRecipe { … }; /// VPReplicateRecipe replicates a given instruction producing multiple scalar /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. class VPReplicateRecipe : public VPRecipeWithIRFlags { … }; /// A recipe for generating conditional branches on the bits of a mask. class VPBranchOnMaskRecipe : public VPRecipeBase { … }; /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when /// control converges back from a Branch-on-Mask. The phi nodes are needed in /// order to merge values that are set under such a branch and feed their uses. /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. class VPPredInstPHIRecipe : public VPSingleDefRecipe { … }; /// A common base class for widening memory operations. An optional mask can be /// provided as the last operand. class VPWidenMemoryRecipe : public VPRecipeBase { … }; /// A recipe for widening load operations, using the address to load from and an /// optional mask. struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue { … }; /// A recipe for widening load operations with vector-predication intrinsics, /// using the address to load from, the explicit vector length and an optional /// mask. struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { … }; /// A recipe for widening store operations, using the stored value, the address /// to store to and an optional mask. struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe { … }; /// A recipe for widening store operations with vector-predication intrinsics, /// using the value to store, the address to store to, the explicit vector /// length and an optional mask. struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { … }; /// Recipe to expand a SCEV expression. class VPExpandSCEVRecipe : public VPSingleDefRecipe { … }; /// Canonical scalar induction phi of the vector loop. Starting at the specified /// start value (either 0 or the resume value when vectorizing the epilogue /// loop). VPWidenCanonicalIVRecipe represents the vector version of the /// canonical induction variable. class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { … }; /// A recipe for generating the active lane mask for the vector loop that is /// used to predicate the vector operations. /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and /// remove VPActiveLaneMaskPHIRecipe. class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { … }; /// A recipe for generating the phi node for the current index of elements, /// adjusted in accordance with EVL value. It starts at the start value of the /// canonical induction and gets incremented by EVL in each iteration of the /// vector loop. class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { … }; /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe, public VPUnrollPartAccessor<1> { … }; /// A recipe for converting the input value \p IV value to the corresponding /// value of an IV with different start and step values, using Start + IV * /// Step. class VPDerivedIVRecipe : public VPSingleDefRecipe { … }; /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their scalar values. class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<2> { … }; /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It /// holds a sequence of zero or more VPRecipe's each representing a sequence of /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. class VPBasicBlock : public VPBlockBase { … }; /// A special type of VPBasicBlock that wraps an existing IR basic block. /// Recipes of the block get added before the first non-phi instruction in the /// wrapped block. /// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's /// preheader block. class VPIRBasicBlock : public VPBasicBlock { … }; /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. /// A VPRegionBlock may indicate that its contents are to be replicated several /// times. This is designed to support predicated scalarization, in which a /// scalar if-then code structure needs to be generated VF * UF times. Having /// this replication indicator helps to keep a single model for multiple /// candidate VF's. The actual replication takes place only once the desired VF /// and UF have been determined. class VPRegionBlock : public VPBlockBase { … }; /// VPlan models a candidate for vectorization, encoding various decisions take /// to produce efficient output IR, including which branches, basic-blocks and /// output IR instructions to generate, and their cost. VPlan holds a /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry /// VPBasicBlock. class VPlan { … }; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// VPlanPrinter prints a given VPlan to a given output stream. The printing is /// indented and follows the dot format. class VPlanPrinter { raw_ostream &OS; const VPlan &Plan; unsigned Depth = 0; unsigned TabWidth = 2; std::string Indent; unsigned BID = 0; SmallDenseMap<const VPBlockBase *, unsigned> BlockID; VPSlotTracker SlotTracker; /// Handle indentation. void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } /// Print a given \p Block of the Plan. void dumpBlock(const VPBlockBase *Block); /// Print the information related to the CFG edges going out of a given /// \p Block, followed by printing the successor blocks themselves. void dumpEdges(const VPBlockBase *Block); /// Print a given \p BasicBlock, including its VPRecipes, followed by printing /// its successor blocks. void dumpBasicBlock(const VPBasicBlock *BasicBlock); /// Print a given \p Region of the Plan. void dumpRegion(const VPRegionBlock *Region); unsigned getOrCreateBID(const VPBlockBase *Block) { return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++; } Twine getOrCreateName(const VPBlockBase *Block); Twine getUID(const VPBlockBase *Block); /// Print the information related to a CFG edge between two VPBlockBases. void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, const Twine &Label); public: VPlanPrinter(raw_ostream &O, const VPlan &P) : OS(O), Plan(P), SlotTracker(&P) {} LLVM_DUMP_METHOD void dump(); }; struct VPlanIngredient { const Value *V; VPlanIngredient(const Value *V) : V(V) {} void print(raw_ostream &O) const; }; inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { I.print(OS); return OS; } inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { Plan.print(OS); return OS; } #endif //===----------------------------------------------------------------------===// // VPlan Utilities //===----------------------------------------------------------------------===// /// Class that provides utilities for VPBlockBases in VPlan. class VPBlockUtils { … }; class VPInterleavedAccessInfo { … }; /// Class that maps (parts of) an existing VPlan to trees of combined /// VPInstructions. class VPlanSlp { … }; } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H