LoopFlatten.cpp | Explore in Territory

//===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass flattens pairs nested loops into a single loop.
//
// The intention is to optimise loop nests like this, which together access an
// array linearly:
//
//   for (int i = 0; i < N; ++i)
//     for (int j = 0; j < M; ++j)
//       f(A[i*M+j]);
//
// into one loop:
//
//   for (int i = 0; i < (N*M); ++i)
//     f(A[i]);
//
// It can also flatten loops where the induction variables are not used in the
// loop. This is only worth doing if the induction variables are only used in an
// expression like i*M+j. If they had any other uses, we would have to insert a
// div/mod to reconstruct the original values, so this wouldn't be profitable.
//
// We also need to prove that N*M will not overflow. The preferred solution is
// to widen the IV, which avoids overflow checks, so that is tried first. If
// the IV cannot be widened, then we try to determine that this new tripcount
// expression won't overflow.
//
// Q: Does LoopFlatten use SCEV?
// Short answer: Yes and no.
//
// Long answer:
// For this transformation to be valid, we require all uses of the induction
// variables to be linear expressions of the form i*M+j. The different Loop
// APIs are used to get some loop components like the induction variable,
// compare statement, etc. In addition, we do some pattern matching to find the
// linear expressions and other loop components like the loop increment. The
// latter are examples of expressions that do use the induction variable, but
// are safe to ignore when we check all uses to be of the form i*M+j. We keep
// track of all of this in bookkeeping struct FlattenInfo.
// We assume the loops to be canonical, i.e. starting at 0 and increment with
// 1. This makes RHS of the compare the loop tripcount (with the right
// predicate). We use SCEV to then sanity check that this tripcount matches
// with the tripcount as computed by SCEV.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/LoopFlatten.h"

#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include <optional>

usingnamespacellvm;
usingnamespacellvm::PatternMatch;

#define DEBUG_TYPE …

STATISTIC(NumFlattened, "Number of loops flattened");

static cl::opt<unsigned> RepeatedInstructionThreshold(
    "loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
    cl::desc("Limit on the cost of instructions that can be repeated due to "
             "loop flattening"));

static cl::opt<bool>
    AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden,
                     cl::init(false),
                     cl::desc("Assume that the product of the two iteration "
                              "trip counts will never overflow"));

static cl::opt<bool>
    WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true),
            cl::desc("Widen the loop induction variables, if possible, so "
                     "overflow checks won't reject flattening"));

static cl::opt<bool>
    VersionLoops("loop-flatten-version-loops", cl::Hidden, cl::init(true),
                 cl::desc("Version loops if flattened loop could overflow"));

namespace {
// We require all uses of both induction variables to match this pattern:
//
//   (OuterPHI * InnerTripCount) + InnerPHI
//
// I.e., it needs to be a linear expression of the induction variables and the
// inner loop trip count. We keep track of all different expressions on which
// checks will be performed in this bookkeeping struct.
//
struct FlattenInfo { … };
} // namespace

static bool
setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
                  SmallPtrSetImpl<Instruction *> &IterationInstructions) { … }

// Given the RHS of the loop latch compare instruction, verify with SCEV
// that this is indeed the loop tripcount.
// TODO: This used to be a straightforward check but has grown to be quite
// complicated now. It is therefore worth revisiting what the additional
// benefits are of this (compared to relying on canonical loops and pattern
// matching).
static bool verifyTripCount(Value *RHS, Loop *L,
     SmallPtrSetImpl<Instruction *> &IterationInstructions,
    PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
    BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) { … }

// Finds the induction variable, increment and trip count for a simple loop that
// we can flatten.
static bool findLoopComponents(
    Loop *L, SmallPtrSetImpl<Instruction *> &IterationInstructions,
    PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
    BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) { … }

static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) { … }

static bool
checkOuterLoopInsts(FlattenInfo &FI,
                    SmallPtrSetImpl<Instruction *> &IterationInstructions,
                    const TargetTransformInfo *TTI) { … }



// We require all uses of both induction variables to match this pattern:
//
//   (OuterPHI * InnerTripCount) + InnerPHI
//
// Any uses of the induction variables not matching that pattern would
// require a div/mod to reconstruct in the flattened loop, so the
// transformation wouldn't be profitable.
static bool checkIVUsers(FlattenInfo &FI) { … }

// Return an OverflowResult dependant on if overflow of the multiplication of
// InnerTripCount and OuterTripCount can be assumed not to happen.
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
                                    AssumptionCache *AC) { … }

static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
                               ScalarEvolution *SE, AssumptionCache *AC,
                               const TargetTransformInfo *TTI) { … }

static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
                              ScalarEvolution *SE, AssumptionCache *AC,
                              const TargetTransformInfo *TTI, LPMUpdater *U,
                              MemorySSAUpdater *MSSAU) { … }

static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
                       ScalarEvolution *SE, AssumptionCache *AC,
                       const TargetTransformInfo *TTI) { … }

static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
                            ScalarEvolution *SE, AssumptionCache *AC,
                            const TargetTransformInfo *TTI, LPMUpdater *U,
                            MemorySSAUpdater *MSSAU,
                            const LoopAccessInfo &LAI) { … }

PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
                                       LoopStandardAnalysisResults &AR,
                                       LPMUpdater &U) { … }
llvm/llvm/lib/Transforms/Scalar/LoopFlatten.cpp