llvm/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements a simple loop unroller.  It works best when loops have
// been canonicalized by the -indvars pass, allowing it to determine the trip
// counts of loops easily.
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <limits>
#include <optional>
#include <string>
#include <tuple>
#include <utility>

usingnamespacellvm;

#define DEBUG_TYPE

cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
    "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
    cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
             " the current top-most loop. This is sometimes preferred to reduce"
             " compile time."));

static cl::opt<unsigned>
    UnrollThreshold("unroll-threshold", cl::Hidden,
                    cl::desc("The cost threshold for loop unrolling"));

static cl::opt<unsigned>
    UnrollOptSizeThreshold(
      "unroll-optsize-threshold", cl::init(0), cl::Hidden,
      cl::desc("The cost threshold for loop unrolling when optimizing for "
               "size"));

static cl::opt<unsigned> UnrollPartialThreshold(
    "unroll-partial-threshold", cl::Hidden,
    cl::desc("The cost threshold for partial loop unrolling"));

static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
    "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
    cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
             "to the threshold when aggressively unrolling a loop due to the "
             "dynamic cost savings. If completely unrolling a loop will reduce "
             "the total runtime from X to Y, we boost the loop unroll "
             "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
             "X/Y). This limit avoids excessive code bloat."));

static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
    "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
    cl::desc("Don't allow loop unrolling to simulate more than this number of"
             "iterations when checking full unroll profitability"));

static cl::opt<unsigned> UnrollCount(
    "unroll-count", cl::Hidden,
    cl::desc("Use this unroll count for all loops including those with "
             "unroll_count pragma values, for testing purposes"));

static cl::opt<unsigned> UnrollMaxCount(
    "unroll-max-count", cl::Hidden,
    cl::desc("Set the max unroll count for partial and runtime unrolling, for"
             "testing purposes"));

static cl::opt<unsigned> UnrollFullMaxCount(
    "unroll-full-max-count", cl::Hidden,
    cl::desc(
        "Set the max unroll count for full unrolling, for testing purposes"));

static cl::opt<bool>
    UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
                       cl::desc("Allows loops to be partially unrolled until "
                                "-unroll-threshold loop size is reached."));

static cl::opt<bool> UnrollAllowRemainder(
    "unroll-allow-remainder", cl::Hidden,
    cl::desc("Allow generation of a loop remainder (extra iterations) "
             "when unrolling a loop."));

static cl::opt<bool>
    UnrollRuntime("unroll-runtime", cl::Hidden,
                  cl::desc("Unroll loops with run-time trip counts"));

static cl::opt<unsigned> UnrollMaxUpperBound(
    "unroll-max-upperbound", cl::init(8), cl::Hidden,
    cl::desc(
        "The max of trip count upper bound that is considered in unrolling"));

static cl::opt<unsigned> PragmaUnrollThreshold(
    "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
    cl::desc("Unrolled size limit for loops with an unroll(full) or "
             "unroll_count pragma."));

static cl::opt<unsigned> FlatLoopTripCountThreshold(
    "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
    cl::desc("If the runtime tripcount for the loop is lower than the "
             "threshold, the loop is considered as flat and will be less "
             "aggressively unrolled."));

static cl::opt<bool> UnrollUnrollRemainder(
  "unroll-remainder", cl::Hidden,
  cl::desc("Allow the loop remainder to be unrolled."));

// This option isn't ever intended to be enabled, it serves to allow
// experiments to check the assumptions about when this kind of revisit is
// necessary.
static cl::opt<bool> UnrollRevisitChildLoops(
    "unroll-revisit-child-loops", cl::Hidden,
    cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
             "This shouldn't typically be needed as child loops (or their "
             "clones) were already visited."));

static cl::opt<unsigned> UnrollThresholdAggressive(
    "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
    cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
             "optimizations"));
static cl::opt<unsigned>
    UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
                           cl::Hidden,
                           cl::desc("Default threshold (max size of unrolled "
                                    "loop), used in all but O3 optimizations"));

static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
    "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
    cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));

/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold =;

/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
    OptimizationRemarkEmitter &ORE, int OptLevel,
    std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
    std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
    std::optional<bool> UserUpperBound,
    std::optional<unsigned> UserFullUnrollMaxCount) {}

namespace {

/// A struct to densely store the state of an instruction after unrolling at
/// each iteration.
///
/// This is designed to work like a tuple of <Instruction *, int> for the
/// purposes of hashing and lookup, but to be able to associate two boolean
/// states with each key.
struct UnrolledInstState {};

/// Hashing and equality testing for a set of the instruction states.
struct UnrolledInstStateKeyInfo {};

struct EstimatedUnrollCost {};

struct PragmaInfo {};

} // end anonymous namespace

/// Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is std::nullopt.
static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
    const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
    const SmallPtrSetImpl<const Value *> &EphValues,
    const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
    unsigned MaxIterationsCountToAnalyze) {}

UnrollCostEstimator::UnrollCostEstimator(
    const Loop *L, const TargetTransformInfo &TTI,
    const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {}

bool UnrollCostEstimator::canUnroll() const {}

uint64_t UnrollCostEstimator::getUnrolledLoopSize(
    const TargetTransformInfo::UnrollingPreferences &UP,
    unsigned CountOverwrite) const {}

// Returns the loop hint metadata node with the given name (for example,
// "llvm.loop.unroll.count").  If no such metadata node exists, then nullptr is
// returned.
static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {}

// Returns true if the loop has an unroll(full) pragma.
static bool hasUnrollFullPragma(const Loop *L) {}

// Returns true if the loop has an unroll(enable) pragma. This metadata is used
// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
static bool hasUnrollEnablePragma(const Loop *L) {}

// Returns true if the loop has an runtime unroll(disable) pragma.
static bool hasRuntimeUnrollDisablePragma(const Loop *L) {}

// If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma.  Otherwise return 0.
static unsigned unrollCountPragmaValue(const Loop *L) {}

// Computes the boosting factor for complete unrolling.
// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
// be beneficial to fully unroll the loop even if unrolledcost is large. We
// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
// the unroll threshold.
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
                                            unsigned MaxPercentThresholdBoost) {}

static std::optional<unsigned>
shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
                   const unsigned TripMultiple, const unsigned TripCount,
                   unsigned MaxTripCount, const UnrollCostEstimator UCE,
                   const TargetTransformInfo::UnrollingPreferences &UP) {}

static std::optional<unsigned> shouldFullUnroll(
    Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
    ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
    const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
    const TargetTransformInfo::UnrollingPreferences &UP) {}

static std::optional<unsigned>
shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
                    const UnrollCostEstimator UCE,
                    const TargetTransformInfo::UnrollingPreferences &UP) {}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
// that are specific to to the LoopUnroll pass (which, for instance, are
// irrelevant for the LoopUnrollAndJam pass).
// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
// many LoopUnroll-specific options. The shared functionality should be
// refactored into it own function.
bool llvm::computeUnrollCount(
    Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
    AssumptionCache *AC, ScalarEvolution &SE,
    const SmallPtrSetImpl<const Value *> &EphValues,
    OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
    bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
    TargetTransformInfo::UnrollingPreferences &UP,
    TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {}

static LoopUnrollResult
tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
                const TargetTransformInfo &TTI, AssumptionCache &AC,
                OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
                ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
                bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
                std::optional<unsigned> ProvidedCount,
                std::optional<unsigned> ProvidedThreshold,
                std::optional<bool> ProvidedAllowPartial,
                std::optional<bool> ProvidedRuntime,
                std::optional<bool> ProvidedUpperBound,
                std::optional<bool> ProvidedAllowPeeling,
                std::optional<bool> ProvidedAllowProfileBasedPeeling,
                std::optional<unsigned> ProvidedFullUnrollMaxCount,
                AAResults *AA = nullptr) {}

namespace {

class LoopUnroll : public LoopPass {};

} // end anonymous namespace

char LoopUnroll::ID =;

INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)

Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
                                 bool ForgetAllSCEV, int Threshold, int Count,
                                 int AllowPartial, int Runtime, int UpperBound,
                                 int AllowPeeling) {}

PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
                                          LoopStandardAnalysisResults &AR,
                                          LPMUpdater &Updater) {}

PreservedAnalyses LoopUnrollPass::run(Function &F,
                                      FunctionAnalysisManager &AM) {}

void LoopUnrollPass::printPipeline(
    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {}