LoopAccessAnalysis.cpp | Explore in Territory

//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The implementation for the loop memory dependence that was originally
// developed for the loop vectorizer.
//
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
#include <variant>
#include <vector>

usingnamespacellvm;
usingnamespacellvm::PatternMatch;

#define DEBUG_TYPE …

static cl::opt<unsigned, true>
VectorizationFactor("force-vector-width", cl::Hidden,
                    cl::desc("Sets the SIMD width. Zero is autoselect."),
                    cl::location(VectorizerParams::VectorizationFactor));
unsigned VectorizerParams::VectorizationFactor;

static cl::opt<unsigned, true>
VectorizationInterleave("force-vector-interleave", cl::Hidden,
                        cl::desc("Sets the vectorization interleave count. "
                                 "Zero is autoselect."),
                        cl::location(
                            VectorizerParams::VectorizationInterleave));
unsigned VectorizerParams::VectorizationInterleave;

static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
    "runtime-memory-check-threshold", cl::Hidden,
    cl::desc("When performing memory disambiguation checks at runtime do not "
             "generate more than this number of comparisons (default = 8)."),
    cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
unsigned VectorizerParams::RuntimeMemoryCheckThreshold;

/// The maximum iterations used to merge memory checks
static cl::opt<unsigned> MemoryCheckMergeThreshold(
    "memory-check-merge-threshold", cl::Hidden,
    cl::desc("Maximum number of comparisons done when trying to merge "
             "runtime memory checks. (default = 100)"),
    cl::init(100));

/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = …;

/// We collect dependences up to this threshold.
static cl::opt<unsigned>
    MaxDependences("max-dependences", cl::Hidden,
                   cl::desc("Maximum number of dependences collected by "
                            "loop-access analysis (default = 100)"),
                   cl::init(100));

/// This enables versioning on the strides of symbolically striding memory
/// accesses in code like the following.
///   for (i = 0; i < N; ++i)
///     A[i * Stride1] += B[i * Stride2] ...
///
/// Will be roughly translated to
///    if (Stride1 == 1 && Stride2 == 1) {
///      for (i = 0; i < N; i+=4)
///       A[i:i+3] += ...
///    } else
///      ...
static cl::opt<bool> EnableMemAccessVersioning(
    "enable-mem-access-versioning", cl::init(true), cl::Hidden,
    cl::desc("Enable symbolic stride memory access versioning"));

/// Enable store-to-load forwarding conflict detection. This option can
/// be disabled for correctness testing.
static cl::opt<bool> EnableForwardingConflictDetection(
    "store-to-load-forwarding-conflict-detection", cl::Hidden,
    cl::desc("Enable conflict detection in loop-access analysis"),
    cl::init(true));

static cl::opt<unsigned> MaxForkedSCEVDepth(
    "max-forked-scev-depth", cl::Hidden,
    cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
    cl::init(5));

static cl::opt<bool> SpeculateUnitStride(
    "laa-speculate-unit-stride", cl::Hidden,
    cl::desc("Speculate that non-constant strides are unit in LAA"),
    cl::init(true));

static cl::opt<bool, true> HoistRuntimeChecks(
    "hoist-runtime-checks", cl::Hidden,
    cl::desc(
        "Hoist inner loop runtime memory checks to outer loop if possible"),
    cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true));
bool VectorizerParams::HoistRuntimeChecks;

bool VectorizerParams::isInterleaveForced() { … }

const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
                                            const DenseMap<Value *, const SCEV *> &PtrToStride,
                                            Value *Ptr) { … }

RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
    unsigned Index, const RuntimePointerChecking &RtCheck)
    : … { … }

/// Calculate Start and End points of memory access.
/// Let's assume A is the first access and B is a memory access on N-th loop
/// iteration. Then B is calculated as:
///   B = A + Step*N .
/// Step value may be positive or negative.
/// N is a calculated back-edge taken count:
///     N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
/// Start and End points are calculated in the following way:
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
/// where SizeOfElt is the size of single memory access in bytes.
///
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
    const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
    PredicatedScalarEvolution &PSE,
    DenseMap<std::pair<const SCEV *, Type *>,
             std::pair<const SCEV *, const SCEV *>> &PointerBounds) { … }

/// Calculate Start and End points of memory access using
/// getStartAndEndForAccess.
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
                                    Type *AccessTy, bool WritePtr,
                                    unsigned DepSetId, unsigned ASId,
                                    PredicatedScalarEvolution &PSE,
                                    bool NeedsFreeze) { … }

bool RuntimePointerChecking::tryToCreateDiffCheck(
    const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) { … }

SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() { … }

void RuntimePointerChecking::generateChecks(
    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { … }

bool RuntimePointerChecking::needsChecking(
    const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const { … }

/// Compare \p I and \p J and return the minimum.
/// Return nullptr in case we couldn't find an answer.
static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
                                   ScalarEvolution *SE) { … }

bool RuntimeCheckingPtrGroup::addPointer(
    unsigned Index, const RuntimePointerChecking &RtCheck) { … }

bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
                                         const SCEV *End, unsigned AS,
                                         bool NeedsFreeze,
                                         ScalarEvolution &SE) { … }

void RuntimePointerChecking::groupChecks(
    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { … }

bool RuntimePointerChecking::arePointersInSamePartition(
    const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
    unsigned PtrIdx2) { … }

bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { … }

void RuntimePointerChecking::printChecks(
    raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
    unsigned Depth) const { … }

void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { … }

namespace {

/// Analyses memory accesses in a loop.
///
/// Checks whether run time pointer checks are needed and builds sets for data
/// dependence checking.
class AccessAnalysis { … };

} // end anonymous namespace

/// Check whether a pointer can participate in a runtime bounds check.
/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
/// by adding run-time checks (overflow checks) if necessary.
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
                                const SCEV *PtrScev, Loop *L, bool Assume) { … }

/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
                     const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr, Type *AccessTy,
                     Loop *L) { … }

static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
                          function_ref<void(Value *)> AddPointer) { … }

// Walk back through the IR for a pointer, looking for a select like the
// following:
//
//  %offset = select i1 %cmp, i64 %a, i64 %b
//  %addr = getelementptr double, double* %base, i64 %offset
//  %ld = load double, double* %addr, align 8
//
// We won't be able to form a single SCEVAddRecExpr from this since the
// address for each loop iteration depends on %cmp. We could potentially
// produce multiple valid SCEVAddRecExprs, though, and check all of them for
// memory safety/aliasing if needed.
//
// If we encounter some IR we don't yet handle, or something obviously fine
// like a constant, then we just add the SCEV for that term to the list passed
// in by the caller. If we have a node that may potentially yield a valid
// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
// ourselves before adding to the list.
static void findForkedSCEVs(
    ScalarEvolution *SE, const Loop *L, Value *Ptr,
    SmallVectorImpl<PointerIntPair<const SCEV *, 1, bool>> &ScevList,
    unsigned Depth) { … }

static SmallVector<PointerIntPair<const SCEV *, 1, bool>>
findForkedPointer(PredicatedScalarEvolution &PSE,
                  const DenseMap<Value *, const SCEV *> &StridesMap, Value *Ptr,
                  const Loop *L) { … }

bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
                                          MemAccessInfo Access, Type *AccessTy,
                                          const DenseMap<Value *, const SCEV *> &StridesMap,
                                          DenseMap<Value *, unsigned> &DepSetId,
                                          Loop *TheLoop, unsigned &RunningDepId,
                                          unsigned ASId, bool ShouldCheckWrap,
                                          bool Assume) { … }

bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
                                     ScalarEvolution *SE, Loop *TheLoop,
                                     const DenseMap<Value *, const SCEV *> &StridesMap,
                                     Value *&UncomputablePtr, bool ShouldCheckWrap) { … }

void AccessAnalysis::processMemAccesses() { … }

/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
/// i.e. monotonically increasing/decreasing.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
                           PredicatedScalarEvolution &PSE, const Loop *L) { … }

/// Check whether the access through \p Ptr has a constant stride.
std::optional<int64_t>
llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
                   const Loop *Lp,
                   const DenseMap<Value *, const SCEV *> &StridesMap,
                   bool Assume, bool ShouldCheckWrap) { … }

std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
                                         Type *ElemTyB, Value *PtrB,
                                         const DataLayout &DL,
                                         ScalarEvolution &SE, bool StrictCheck,
                                         bool CheckType) { … }

bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
                           const DataLayout &DL, ScalarEvolution &SE,
                           SmallVectorImpl<unsigned> &SortedIndices) { … }

/// Returns true if the memory operations \p A and \p B are consecutive.
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
                               ScalarEvolution &SE, bool CheckType) { … }

void MemoryDepChecker::addAccess(StoreInst *SI) { … }

void MemoryDepChecker::addAccess(LoadInst *LI) { … }

MemoryDepChecker::VectorizationSafetyStatus
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { … }

bool MemoryDepChecker::Dependence::isBackward() const { … }

bool MemoryDepChecker::Dependence::isPossiblyBackward() const { … }

bool MemoryDepChecker::Dependence::isForward() const { … }

bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
                                                    uint64_t TypeByteSize) { … }

void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { … }

/// Given a dependence-distance \p Dist between two
/// memory accesses, that have strides in the same direction whose absolute
/// value of the maximum stride is given in \p MaxStride, and that have the same
/// type size \p TypeByteSize, in a loop whose maximum backedge taken count is
/// \p MaxBTC, check if it is possible to prove statically that the dependence
/// distance is larger than the range that the accesses will travel through the
/// execution of the loop. If so, return true; false otherwise. This is useful
/// for example in loops such as the following (PR31098):
///     for (i = 0; i < D; ++i) {
///                = out[i];
///       out[i+D] =
///     }
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
                                     const SCEV &MaxBTC, const SCEV &Dist,
                                     uint64_t MaxStride,
                                     uint64_t TypeByteSize) { … }

/// Check the dependence for two accesses with the same stride \p Stride.
/// \p Distance is the positive distance and \p TypeByteSize is type size in
/// bytes.
///
/// \returns true if they are independent.
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
                                          uint64_t TypeByteSize) { … }

std::variant<MemoryDepChecker::Dependence::DepType,
             MemoryDepChecker::DepDistanceStrideAndSizeInfo>
MemoryDepChecker::getDependenceDistanceStrideAndSize(
    const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
    const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) { … }

MemoryDepChecker::Dependence::DepType
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
                              const MemAccessInfo &B, unsigned BIdx) { … }

bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
                                   const MemAccessInfoList &CheckDeps) { … }

SmallVector<Instruction *, 4>
MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool IsWrite) const { … }

const char *MemoryDepChecker::Dependence::DepName[] = …;

void MemoryDepChecker::Dependence::print(
    raw_ostream &OS, unsigned Depth,
    const SmallVectorImpl<Instruction *> &Instrs) const { … }

bool LoopAccessInfo::canAnalyzeLoop() { … }

bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
                                 const TargetLibraryInfo *TLI,
                                 DominatorTree *DT) { … }

void LoopAccessInfo::emitUnsafeDependenceRemark() { … }

bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
                                           DominatorTree *DT)  { … }

OptimizationRemarkAnalysis &
LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) { … }

bool LoopAccessInfo::isInvariant(Value *V) const { … }

/// Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { … }

/// If the argument is a GEP, then returns the operand identified by
/// getGEPInductionOperand. However, if there is some other non-loop-invariant
/// operand, it returns that instead.
static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { … }

/// Get the stride of a pointer access in a loop. Looks for symbolic
/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { … }

void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { … }

LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                               const TargetTransformInfo *TTI,
                               const TargetLibraryInfo *TLI, AAResults *AA,
                               DominatorTree *DT, LoopInfo *LI)
    : … { … }

void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { … }

const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) { … }
void LoopAccessInfoManager::clear() { … }

bool LoopAccessInfoManager::invalidate(
    Function &F, const PreservedAnalyses &PA,
    FunctionAnalysisManager::Invalidator &Inv) { … }

LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
                                              FunctionAnalysisManager &FAM) { … }

AnalysisKey LoopAccessAnalysis::Key;
llvm/llvm/lib/Analysis/LoopAccessAnalysis.cpp