llvm/llvm/lib/Transforms/Vectorize/VPlan.h

//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file contains the declarations of the Vectorization Plan base classes:
/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
///    VPBlockBase, together implementing a Hierarchical CFG;
/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
///    within VPBasicBlocks;
/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
///    also inherit from VPValue.
/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
///    instruction;
/// 5. The VPlan class holding a candidate for vectorization;
/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
/// These are documented in docs/VectorizationPlan.rst.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H

#include "VPlanAnalysis.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/InstructionCost.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <string>

namespace llvm {

class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
class IRBuilderBase;
class LoopInfo;
class raw_ostream;
class RecurrenceDescriptor;
class SCEV;
class Type;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
class VPReplicateRecipe;
class VPlanSlp;
class Value;
class LoopVectorizationCostModel;
class LoopVersioning;

struct VPCostContext;

namespace Intrinsic {
ID;
}

/// Returns a calculation for the total number of elements for a given \p VF.
/// For fixed width vectors this value is a constant, whereas for scalable
/// vectors it is an expression determined at runtime.
Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);

/// Return a value for Step multiplied by VF.
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
                       int64_t Step);

/// A helper function that returns the reciprocal of the block probability of
/// predicated blocks. If we return X, we are assuming the predicated block
/// will execute once for every X iterations of the loop header.
///
/// TODO: We should use actual block probability here, if available. Currently,
///       we always assume predicated blocks have a 50% chance of executing.
inline unsigned getReciprocalPredBlockProb() {}

/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 16) = {1, 2, 4, 8}
struct VFRange {};

VPlanPtr;

/// In what follows, the term "input IR" refers to code that is fed into the
/// vectorizer whereas the term "output IR" refers to code that is generated by
/// the vectorizer.

/// VPLane provides a way to access lanes in both fixed width and scalable
/// vectors, where for the latter the lane index sometimes needs calculating
/// as a runtime expression.
class VPLane {};

/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {};

/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
class VPBlockBase {};

/// A value that is used outside the VPlan. The operand of the user needs to be
/// added to the associated phi node. The incoming block from VPlan is
/// determined by where the VPValue is defined: if it is defined by a recipe
/// outside a region, its parent block is used, otherwise the middle block is
/// used.
class VPLiveOut : public VPUser {};

/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {};

/// VPRecipeBase is a base class modeling a sequence of one or more output IR
/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
/// recipes must inherit from VPSingleDef instead of inheriting from both
/// VPRecipeBase and VPValue separately.
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
                     public VPDef,
                     public VPUser {};

// Helper macro to define common classof implementations for recipes.
#define VP_CLASSOF_IMPL(VPDefID)

/// VPSingleDef is a base class for recipes for modeling a sequence of one or
/// more output IR that define a single result VPValue.
/// Note that VPRecipeBase must be inherited from before VPValue.
class VPSingleDefRecipe : public VPRecipeBase, public VPValue {};

/// Class to record LLVM IR flag for a recipe along with it.
class VPRecipeWithIRFlags : public VPSingleDefRecipe {};

/// Helper to access the operand that contains the unroll part for this recipe
/// after unrolling.
template <unsigned PartOpIdx> class VPUnrollPartAccessor {};

/// This is a concrete Recipe that models a single VPlan-level instruction.
/// While as any Recipe it may generate a sequence of IR instructions when
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
class VPInstruction : public VPRecipeWithIRFlags,
                      public VPUnrollPartAccessor<1> {};

/// A recipe to wrap on original IR instruction not to be modified during
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
/// and it is used to add a new incoming value for the single predecessor VPBB.
/// Expect PHIs, VPIRInstructions cannot have any operands.
class VPIRInstruction : public VPRecipeBase {};

/// VPWidenRecipe is a recipe for producing a widened instruction using the
/// opcode and operands of the recipe. This recipe covers most of the
/// traditional vectorization cases where each recipe transforms into a
/// vectorized version of itself.
class VPWidenRecipe : public VPRecipeWithIRFlags {};

/// A recipe for widening operations with vector-predication intrinsics with
/// explicit vector length (EVL).
class VPWidenEVLRecipe : public VPWidenRecipe {};

/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {};

/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {};

/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {};

/// A recipe for widening Call instructions using library calls.
class VPWidenCallRecipe : public VPRecipeWithIRFlags {};

/// A recipe representing a sequence of load -> update -> store as part of
/// a histogram operation. This means there may be aliasing between vector
/// lanes, which is handled by the llvm.experimental.vector.histogram family
/// of intrinsics. The only update operations currently supported are
/// 'add' and 'sub' where the other term is loop-invariant.
class VPHistogramRecipe : public VPRecipeBase {};

/// A recipe for widening select instructions.
struct VPWidenSelectRecipe : public VPSingleDefRecipe {};

/// A recipe for handling GEP instructions.
class VPWidenGEPRecipe : public VPRecipeWithIRFlags {};

/// A recipe to compute the pointers for widened memory accesses of IndexTy for
/// all parts. If IsReverse is true, compute pointers for accessing the input in
/// reverse order per part.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
                              public VPUnrollPartAccessor<1> {};

/// A pure virtual base class for all recipes modeling header phis, including
/// phis for first order recurrences, pointer inductions and reductions. The
/// start value is the first operand of the recipe and the incoming value from
/// the backedge is the second operand.
///
/// Inductions are modeled using the following sub-classes:
///  * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
///    starting at a specified value (zero for the main vector loop, the resume
///    value for the epilogue vector loop) and stepping by 1. The induction
///    controls exiting of the vector loop by comparing against the vector trip
///    count. Produces a single scalar PHI for the induction value per
///    iteration.
///  * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
///    floating point inductions with arbitrary start and step values. Produces
///    a vector PHI per-part.
///  * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
///    value of an IV with different start and step values. Produces a single
///    scalar value per iteration
///  * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
///    canonical or derived induction.
///  * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
///    pointer induction. Produces either a vector PHI per-part or scalar values
///    per-lane based on the canonical induction.
class VPHeaderPHIRecipe : public VPSingleDefRecipe {};

/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their vector values.
class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {};

class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
                                      public VPUnrollPartAccessor<3> {};

/// A recipe for handling phis that are widened in the vector loop.
/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
/// managed in the recipe directly.
class VPWidenPHIRecipe : public VPSingleDefRecipe {};

/// A recipe for handling first-order recurrence phis. The start value is the
/// first operand of the recipe and the incoming value from the backedge is the
/// second operand.
struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {};

/// A recipe for handling reduction phis. The start value is the first operand
/// of the recipe and the incoming value from the backedge is the second
/// operand.
class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
                             public VPUnrollPartAccessor<2> {};

/// A recipe for vectorizing a phi-node as a sequence of mask-based select
/// instructions.
class VPBlendRecipe : public VPSingleDefRecipe {};

/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
/// or stores into one wide load/store and shuffles. The first operand of a
/// VPInterleave recipe is the address, followed by the stored values, followed
/// by an optional mask.
class VPInterleaveRecipe : public VPRecipeBase {};

/// A recipe to represent inloop reduction operations, performing a reduction on
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPSingleDefRecipe {};

/// A recipe to represent inloop reduction operations with vector-predication
/// intrinsics, performing a reduction on a vector operand with the explicit
/// vector length (EVL) into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
class VPReductionEVLRecipe : public VPReductionRecipe {};

/// VPReplicateRecipe replicates a given instruction producing multiple scalar
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
/// uniform only one copy, per lane zero, will be generated.
class VPReplicateRecipe : public VPRecipeWithIRFlags {};

/// A recipe for generating conditional branches on the bits of a mask.
class VPBranchOnMaskRecipe : public VPRecipeBase {};

/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
/// control converges back from a Branch-on-Mask. The phi nodes are needed in
/// order to merge values that are set under such a branch and feed their uses.
/// The phi nodes can be scalar or vector depending on the users of the value.
/// This recipe works in concert with VPBranchOnMaskRecipe.
class VPPredInstPHIRecipe : public VPSingleDefRecipe {};

/// A common base class for widening memory operations. An optional mask can be
/// provided as the last operand.
class VPWidenMemoryRecipe : public VPRecipeBase {};

/// A recipe for widening load operations, using the address to load from and an
/// optional mask.
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {};

/// A recipe for widening load operations with vector-predication intrinsics,
/// using the address to load from, the explicit vector length and an optional
/// mask.
struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {};

/// A recipe for widening store operations, using the stored value, the address
/// to store to and an optional mask.
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {};

/// A recipe for widening store operations with vector-predication intrinsics,
/// using the value to store, the address to store to, the explicit vector
/// length and an optional mask.
struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {};

/// Recipe to expand a SCEV expression.
class VPExpandSCEVRecipe : public VPSingleDefRecipe {};

/// Canonical scalar induction phi of the vector loop. Starting at the specified
/// start value (either 0 or the resume value when vectorizing the epilogue
/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
/// canonical induction variable.
class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {};

/// A recipe for generating the active lane mask for the vector loop that is
/// used to predicate the vector operations.
/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
/// remove VPActiveLaneMaskPHIRecipe.
class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {};

/// A recipe for generating the phi node for the current index of elements,
/// adjusted in accordance with EVL value. It starts at the start value of the
/// canonical induction and gets incremented by EVL in each iteration of the
/// vector loop.
class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {};

/// A Recipe for widening the canonical induction variable of the vector loop.
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
                                 public VPUnrollPartAccessor<1> {};

/// A recipe for converting the input value \p IV value to the corresponding
/// value of an IV with different start and step values, using Start + IV *
/// Step.
class VPDerivedIVRecipe : public VPSingleDefRecipe {};

/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
                              public VPUnrollPartAccessor<2> {};

/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
class VPBasicBlock : public VPBlockBase {};

/// A special type of VPBasicBlock that wraps an existing IR basic block.
/// Recipes of the block get added before the first non-phi instruction in the
/// wrapped block.
/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
/// preheader block.
class VPIRBasicBlock : public VPBasicBlock {};

/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
/// A VPRegionBlock may indicate that its contents are to be replicated several
/// times. This is designed to support predicated scalarization, in which a
/// scalar if-then code structure needs to be generated VF * UF times. Having
/// this replication indicator helps to keep a single model for multiple
/// candidate VF's. The actual replication takes place only once the desired VF
/// and UF have been determined.
class VPRegionBlock : public VPBlockBase {};

/// VPlan models a candidate for vectorization, encoding various decisions take
/// to produce efficient output IR, including which branches, basic-blocks and
/// output IR instructions to generate, and their cost. VPlan holds a
/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
/// VPBasicBlock.
class VPlan {};

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
/// indented and follows the dot format.
class VPlanPrinter {
  raw_ostream &OS;
  const VPlan &Plan;
  unsigned Depth = 0;
  unsigned TabWidth = 2;
  std::string Indent;
  unsigned BID = 0;
  SmallDenseMap<const VPBlockBase *, unsigned> BlockID;

  VPSlotTracker SlotTracker;

  /// Handle indentation.
  void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }

  /// Print a given \p Block of the Plan.
  void dumpBlock(const VPBlockBase *Block);

  /// Print the information related to the CFG edges going out of a given
  /// \p Block, followed by printing the successor blocks themselves.
  void dumpEdges(const VPBlockBase *Block);

  /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
  /// its successor blocks.
  void dumpBasicBlock(const VPBasicBlock *BasicBlock);

  /// Print a given \p Region of the Plan.
  void dumpRegion(const VPRegionBlock *Region);

  unsigned getOrCreateBID(const VPBlockBase *Block) {
    return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
  }

  Twine getOrCreateName(const VPBlockBase *Block);

  Twine getUID(const VPBlockBase *Block);

  /// Print the information related to a CFG edge between two VPBlockBases.
  void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
                const Twine &Label);

public:
  VPlanPrinter(raw_ostream &O, const VPlan &P)
      : OS(O), Plan(P), SlotTracker(&P) {}

  LLVM_DUMP_METHOD void dump();
};

struct VPlanIngredient {
  const Value *V;

  VPlanIngredient(const Value *V) : V(V) {}

  void print(raw_ostream &O) const;
};

inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
  I.print(OS);
  return OS;
}

inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
  Plan.print(OS);
  return OS;
}
#endif

//===----------------------------------------------------------------------===//
// VPlan Utilities
//===----------------------------------------------------------------------===//

/// Class that provides utilities for VPBlockBases in VPlan.
class VPBlockUtils {};

class VPInterleavedAccessInfo {};

/// Class that maps (parts of) an existing VPlan to trees of combined
/// VPInstructions.
class VPlanSlp {};
} // end namespace llvm

#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H