llvm/llvm/lib/Transforms/Scalar/LoopSink.cpp

//===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass does the inverse transformation of what LICM does.
// It traverses all of the instructions in the loop's preheader and sinks
// them to the loop body where frequency is lower than the loop's preheader.
// This pass is a reverse-transformation of LICM. It differs from the Sink
// pass in the following ways:
//
// * It only handles sinking of instructions from the loop's preheader to the
//   loop's body
// * It uses alias set tracker to get more accurate alias info
// * It uses block frequency info to find the optimal sinking locations
//
// Overall algorithm:
//
// For I in Preheader:
//   InsertBBs = BBs that uses I
//   For BB in sorted(LoopBBs):
//     DomBBs = BBs in InsertBBs that are dominated by BB
//     if freq(DomBBs) > freq(BB)
//       InsertBBs = UseBBs - DomBBs + BB
//   For BB in InsertBBs:
//     Insert I at BB's beginning
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
usingnamespacellvm;

#define DEBUG_TYPE

STATISTIC(NumLoopSunk, "Number of instructions sunk into loop");
STATISTIC(NumLoopSunkCloned, "Number of cloned instructions sunk into loop");

static cl::opt<unsigned> SinkFrequencyPercentThreshold(
    "sink-freq-percent-threshold", cl::Hidden, cl::init(90),
    cl::desc("Do not sink instructions that require cloning unless they "
             "execute less than this percent of the time."));

static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
    "max-uses-for-sinking", cl::Hidden, cl::init(30),
    cl::desc("Do not sink instructions that have too many uses."));

/// Return adjusted total frequency of \p BBs.
///
/// * If there is only one BB, sinking instruction will not introduce code
///   size increase. Thus there is no need to adjust the frequency.
/// * If there are more than one BB, sinking would lead to code size increase.
///   In this case, we add some "tax" to the total frequency to make it harder
///   to sink. E.g.
///     Freq(Preheader) = 100
///     Freq(BBs) = sum(50, 49) = 99
///   Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to
///   BBs as the difference is too small to justify the code size increase.
///   To model this, The adjusted Freq(BBs) will be:
///     AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold%
static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs,
                                      BlockFrequencyInfo &BFI) {}

/// Return a set of basic blocks to insert sinked instructions.
///
/// The returned set of basic blocks (BBsToSinkInto) should satisfy:
///
/// * Inside the loop \p L
/// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
///   that domintates the UseBB
/// * Has minimum total frequency that is no greater than preheader frequency
///
/// The purpose of the function is to find the optimal sinking points to
/// minimize execution cost, which is defined as "sum of frequency of
/// BBsToSinkInto".
/// As a result, the returned BBsToSinkInto needs to have minimum total
/// frequency.
/// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
/// frequency, the optimal solution is not sinking (return empty set).
///
/// \p ColdLoopBBs is used to help find the optimal sinking locations.
/// It stores a list of BBs that is:
///
/// * Inside the loop \p L
/// * Has a frequency no larger than the loop's preheader
/// * Sorted by BB frequency
///
/// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
/// To avoid expensive computation, we cap the maximum UseBBs.size() in its
/// caller.
static SmallPtrSet<BasicBlock *, 2>
findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
                  const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
                  DominatorTree &DT, BlockFrequencyInfo &BFI) {}

// Sinks \p I from the loop \p L's preheader to its uses. Returns true if
// sinking is successful.
// \p LoopBlockNumber is used to sort the insertion blocks to ensure
// determinism.
static bool sinkInstruction(
    Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
    const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
    DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {}

/// Sinks instructions from loop's preheader to the loop body if the
/// sum frequency of inserted copy is smaller than preheader's frequency.
static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
                                          DominatorTree &DT,
                                          BlockFrequencyInfo &BFI,
                                          MemorySSA &MSSA,
                                          ScalarEvolution *SE) {}

PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {}