LoopVectorizationLegality.cpp | Explore in Territory

//===- LoopVectorizationLegality.cpp --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides loop vectorization legality analysis. Original code
// resided in LoopVectorize.cpp for a long time.
//
// At this point, it is implemented as a utility class, not as an analysis
// pass. It should be easy to create an analysis pass around it if there
// is a need (but D45420 needs to happen first).
//

#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"

usingnamespacellvm;
usingnamespacePatternMatch;

#define LV_NAME …
#define DEBUG_TYPE …

static cl::opt<bool>
    EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
                       cl::desc("Enable if-conversion during vectorization."));

static cl::opt<bool>
AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
                       cl::desc("Enable recognition of non-constant strided "
                                "pointer induction variables."));

namespace llvm {
cl::opt<bool>
    HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
                         cl::desc("Allow enabling loop hints to reorder "
                                  "FP operations during vectorization."));
} // namespace llvm

// TODO: Move size-based thresholds out of legality checking, make cost based
// decisions instead of hard thresholds.
static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
    "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
    cl::desc("The maximum number of SCEV checks allowed."));

static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
    "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
    cl::desc("The maximum number of SCEV checks allowed with a "
             "vectorize(enable) pragma"));

static cl::opt<LoopVectorizeHints::ScalableForceKind>
    ForceScalableVectorization(
        "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
        cl::Hidden,
        cl::desc("Control whether the compiler can use scalable vectors to "
                 "vectorize a loop"),
        cl::values(
            clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
                       "Scalable vectorization is disabled."),
            clEnumValN(
                LoopVectorizeHints::SK_PreferScalable, "preferred",
                "Scalable vectorization is available and favored when the "
                "cost is inconclusive."),
            clEnumValN(
                LoopVectorizeHints::SK_PreferScalable, "on",
                "Scalable vectorization is available and favored when the "
                "cost is inconclusive.")));

static cl::opt<bool> EnableHistogramVectorization(
    "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
    cl::desc("Enables autovectorization of some loops containing histograms"));

/// Maximum vectorization interleave count.
static const unsigned MaxInterleaveFactor = …;

namespace llvm {

bool LoopVectorizeHints::Hint::validate(unsigned Val) { … }

LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
                                       bool InterleaveOnlyWhenForced,
                                       OptimizationRemarkEmitter &ORE,
                                       const TargetTransformInfo *TTI)
    : … { … }

void LoopVectorizeHints::setAlreadyVectorized() { … }

bool LoopVectorizeHints::allowVectorization(
    Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { … }

void LoopVectorizeHints::emitRemarkWithHints() const { … }

const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { … }

bool LoopVectorizeHints::allowReordering() const { … }

void LoopVectorizeHints::getHintsFromMetadata() { … }

void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { … }

// Return true if the inner loop \p Lp is uniform with regard to the outer loop
// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
// executing the inner loop will execute the same iterations). This check is
// very constrained for now but it will be relaxed in the future. \p Lp is
// considered uniform if it meets all the following conditions:
//   1) it has a canonical IV (starting from 0 and with stride 1),
//   2) its latch terminator is a conditional branch and,
//   3) its latch condition is a compare instruction whose operands are the
//      canonical IV and an OuterLp invariant.
// This check doesn't take into account the uniformity of other conditions not
// related to the loop latch because they don't affect the loop uniformity.
//
// NOTE: We decided to keep all these checks and its associated documentation
// together so that we can easily have a picture of the current supported loop
// nests. However, some of the current checks don't depend on \p OuterLp and
// would be redundantly executed for each \p Lp if we invoked this function for
// different candidate outer loops. This is not the case for now because we
// don't currently have the infrastructure to evaluate multiple candidate outer
// loops and \p OuterLp will be a fixed parameter while we only support explicit
// outer loop vectorization. It's also very likely that these checks go away
// before introducing the aforementioned infrastructure. However, if this is not
// the case, we should move the \p OuterLp independent checks to a separate
// function that is only executed once for each \p Lp.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { … }

// Return true if \p Lp and all its nested loops are uniform with regard to \p
// OuterLp.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { … }

static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { … }

static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { … }

/// Check that the instruction has outside loop users and is not an
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
                               SmallPtrSetImpl<Value *> &AllowedExit) { … }

/// Returns true if A and B have same pointer operands or same SCEVs addresses
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
                               StoreInst *B) { … }

int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
                                                Value *Ptr) const { … }

bool LoopVectorizationLegality::isInvariant(Value *V) const { … }

namespace {
/// A rewriter to build the SCEVs for each of the VF lanes in the expected
/// vectorized loop, which can then be compared to detect their uniformity. This
/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
/// uniformity.
class SCEVAddRecForUniformityRewriter
    : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> { … };

} // namespace

bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const { … }

bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
                                               ElementCount VF) const { … }

bool LoopVectorizationLegality::canVectorizeOuterLoop() { … }

void LoopVectorizationLegality::addInductionPhi(
    PHINode *Phi, const InductionDescriptor &ID,
    SmallPtrSetImpl<Value *> &AllowedExit) { … }

bool LoopVectorizationLegality::setupOuterLoopInductions() { … }

/// Checks if a function is scalarizable according to the TLI, in
/// the sense that it should be vectorized and then expanded in
/// multiple scalar calls. This is represented in the
/// TLI via mappings that do not specify a vector name, as in the
/// following example:
///
///    const VecDesc VecIntrinsics[] = {
///      {"llvm.phx.abs.i32", "", 4}
///    };
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { … }

bool LoopVectorizationLegality::canVectorizeInstrs() { … }

/// Find histogram operations that match high-level code in loops:
/// \code
/// buckets[indices[i]]+=step;
/// \endcode
///
/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
/// array the computed histogram. It uses a BinOp to sum all counts, storing
/// them using a loop-variant index Load from the 'indices' input array.
///
/// On successful matches it updates the STATISTIC 'HistogramsDetected',
/// regardless of hardware support. When there is support, it additionally
/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
/// used to update histogram in \p HistogramPtrs.
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
                          const PredicatedScalarEvolution &PSE,
                          SmallVectorImpl<HistogramInfo> &Histograms) { … }

bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() { … }

bool LoopVectorizationLegality::canVectorizeMemory() { … }

bool LoopVectorizationLegality::canVectorizeFPMath(
    bool EnableStrictReductions) { … }

bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) { … }

bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) { … }

bool LoopVectorizationLegality::isInductionPhi(const Value *V) const { … }

const InductionDescriptor *
LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const { … }

const InductionDescriptor *
LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const { … }

bool LoopVectorizationLegality::isCastedInductionVariable(
    const Value *V) const { … }

bool LoopVectorizationLegality::isInductionVariable(const Value *V) const { … }

bool LoopVectorizationLegality::isFixedOrderRecurrence(
    const PHINode *Phi) const { … }

bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { … }

bool LoopVectorizationLegality::blockCanBePredicated(
    BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
    SmallPtrSetImpl<const Instruction *> &MaskedOp) const { … }

bool LoopVectorizationLegality::canVectorizeWithIfConvert() { … }

// Helper function to canVectorizeLoopNestCFG.
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
                                                    bool UseVPlanNativePath) { … }

bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
    Loop *Lp, bool UseVPlanNativePath) { … }

bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { … }

bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { … }

bool LoopVectorizationLegality::canFoldTailByMasking() const { … }

void LoopVectorizationLegality::prepareToFoldTailByMasking() { … }

} // namespace llvm
llvm/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp