LoopPeel.cpp | Explore in Territory

//===- LoopPeel.cpp -------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Loop Peeling Utilities.
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <optional>

usingnamespacellvm;
usingnamespacellvm::PatternMatch;

#define DEBUG_TYPE …

STATISTIC(NumPeeled, "Number of loops peeled");

static cl::opt<unsigned> UnrollPeelCount(
    "unroll-peel-count", cl::Hidden,
    cl::desc("Set the unroll peeling count, for testing purposes"));

static cl::opt<bool>
    UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
                       cl::desc("Allows loops to be peeled when the dynamic "
                                "trip count is known to be low."));

static cl::opt<bool>
    UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
                                cl::init(false), cl::Hidden,
                                cl::desc("Allows loop nests to be peeled."));

static cl::opt<unsigned> UnrollPeelMaxCount(
    "unroll-peel-max-count", cl::init(7), cl::Hidden,
    cl::desc("Max average trip count which will cause loop peeling."));

static cl::opt<unsigned> UnrollForcePeelCount(
    "unroll-force-peel-count", cl::init(0), cl::Hidden,
    cl::desc("Force a peel count regardless of profiling information."));

static cl::opt<bool> DisableAdvancedPeeling(
    "disable-advanced-peeling", cl::init(false), cl::Hidden,
    cl::desc(
        "Disable advance peeling. Issues for convergent targets (D134803)."));

static const char *PeeledCountMetaData = …;

// Check whether we are capable of peeling this loop.
bool llvm::canPeel(const Loop *L) { … }

namespace {

// As a loop is peeled, it may be the case that Phi nodes become
// loop-invariant (ie, known because there is only one choice).
// For example, consider the following function:
//   void g(int);
//   void binary() {
//     int x = 0;
//     int y = 0;
//     int a = 0;
//     for(int i = 0; i <100000; ++i) {
//       g(x);
//       x = y;
//       g(a);
//       y = a + 1;
//       a = 5;
//     }
//   }
// Peeling 3 iterations is beneficial because the values for x, y and a
// become known.  The IR for this loop looks something like the following:
//
//   %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
//   %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
//   %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
//   %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
//   ...
//   tail call void @_Z1gi(i32 signext %x)
//   tail call void @_Z1gi(i32 signext %a)
//   %add = add nuw nsw i32 %a, 1
//   %inc = add nuw nsw i32 %i, 1
//   %exitcond = icmp eq i32 %inc, 100000
//   br i1 %exitcond, label %for.cond.cleanup, label %for.body
//
// The arguments for the calls to g will become known after 3 iterations
// of the loop, because the phi nodes values become known after 3 iterations
// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
// The first iteration has g(0), g(0); the second has g(0), g(5); the
// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
// Now consider the phi nodes:
//   %a is a phi with constants so it is determined after iteration 1.
//   %y is a phi based on a constant and %a so it is determined on
//     the iteration after %a is determined, so iteration 2.
//   %x is a phi based on a constant and %y so it is determined on
//     the iteration after %y, so iteration 3.
//   %i is based on itself (and is an induction variable) so it is
//     never determined.
// This means that peeling off 3 iterations will result in being able to
// remove the phi nodes for %a, %y, and %x.  The arguments for the
// corresponding calls to g are determined and the code for computing
// x, y, and a can be removed.
//
// The PhiAnalyzer class calculates how many times a loop should be
// peeled based on the above analysis of the phi nodes in the loop while
// respecting the maximum specified.
class PhiAnalyzer { … };

PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
    : … { … }

// This function calculates the number of iterations after which the value
// becomes an invariant. The pre-calculated values are memorized in a map.
// N.B. This number will be Unknown or <= MaxIterations.
// The function is calculated according to the following definition:
// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
//   F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
//   G(%y) = 0 if %y is a loop invariant
//   G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
//   G(%y) = TODO: if %y is an expression based on phis and loop invariants
//           The example looks like:
//           %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
//           %y = phi(0, 5)
//           %a = %y + 1
//   G(%y) = Unknown otherwise (including phi not in header block)
PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { … }

std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() { … }

} // unnamed namespace

// Try to find any invariant memory reads that will become dereferenceable in
// the remainder loop after peeling. The load must also be used (transitively)
// by an exit condition. Returns the number of iterations to peel off (at the
// moment either 0 or 1).
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
                                                      DominatorTree &DT,
                                                      AssumptionCache *AC) { … }

// Return the number of iterations to peel off that make conditions in the
// body true/false. For example, if we peel 2 iterations off the loop below,
// the condition i < 2 can be evaluated at compile time.
//  for (i = 0; i < n; i++)
//    if (i < 2)
//      ..
//    else
//      ..
//   }
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
                                         ScalarEvolution &SE) { … }

/// This "heuristic" exactly matches implicit behavior which used to exist
/// inside getLoopEstimatedTripCount.  It was added here to keep an
/// improvement inside that API from causing peeling to become more aggressive.
/// This should probably be removed.
static bool violatesLegacyMultiExitLoopCheck(Loop *L) { … }


// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
                            TargetTransformInfo::PeelingPreferences &PP,
                            unsigned TripCount, DominatorTree &DT,
                            ScalarEvolution &SE, AssumptionCache *AC,
                            unsigned Threshold) { … }

struct WeightInfo { … };

/// Update the branch weights of an exiting block of a peeled-off loop
/// iteration.
/// Let F is a weight of the edge to continue (fallthrough) into the loop.
/// Let E is a weight of the edge to an exit.
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
/// go to exit.
/// Then, Estimated ExitCount = F / E.
/// For I-th (counting from 0) peeled off iteration we set the weights for
/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
/// The probability to go to exit 1/(EC-I) increases. At the same time
/// the estimated exit count in the remainder loop reduces by I.
/// To avoid dealing with division rounding we can just multiple both part
/// of weights to E and use weight as (F - I * E, E).
static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { … }

/// Initialize the weights for all exiting blocks.
static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
                              Loop *L) { … }

/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
/// peeled off.
/// \param ExitEdges The exit edges of the original loop.
/// \param[out] NewBlocks A list of the blocks in the newly created clone
/// \param[out] VMap The value map between the loop and the new clone.
/// \param LoopBlocks A helper for DFS-traversal of the loop.
/// \param LVMap A value-map that maps instructions from the original loop to
/// instructions in the last peeled-off iteration.
static void cloneLoopBlocks(
    Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
    SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
    SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
    ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
    LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
    ScalarEvolution &SE) { … }

TargetTransformInfo::PeelingPreferences
llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
                               const TargetTransformInfo &TTI,
                               std::optional<bool> UserAllowPeeling,
                               std::optional<bool> UserAllowProfileBasedPeeling,
                               bool UnrollingSpecficValues) { … }

/// Peel off the first \p PeelCount iterations of loop \p L.
///
/// Note that this does not peel them off as a single straight-line block.
/// Rather, each iteration is peeled off separately, and needs to check the
/// exit condition.
/// For loops that dynamically execute \p PeelCount iterations or less
/// this provides a benefit, since the peeled off iterations, which account
/// for the bulk of dynamic execution, can be further simplified by scalar
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
                    ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
                    bool PreserveLCSSA, ValueToValueMapTy &LVMap) { … }
llvm/llvm/lib/Transforms/Utils/LoopPeel.cpp