llvm/llvm/lib/CodeGen/MachineBlockPlacement.cpp

//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements basic block placement transformations using the CFG
// structure and branch probability estimates.
//
// The pass strives to preserve the structure of the CFG (that is, retain
// a topological ordering of basic blocks) in the absence of a *strong* signal
// to the contrary from probabilities. However, within the CFG structure, it
// attempts to choose an ordering which favors placing more likely sequences of
// blocks adjacent to each other.
//
// The algorithm works from the inner-most loop within a function outward, and
// at each stage walks through the basic blocks, trying to coalesce them into
// sequential chains where allowed by the CFG (or demanded by heavy
// probabilities). Finally, it walks the blocks in topological order, and the
// first time it reaches a chain of basic blocks, it schedules them in the
// function in-order.
//
//===----------------------------------------------------------------------===//

#include "BranchFolding.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/CodeLayout.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

usingnamespacellvm;

#define DEBUG_TYPE

STATISTIC(NumCondBranches, "Number of conditional branches");
STATISTIC(NumUncondBranches, "Number of unconditional branches");
STATISTIC(CondBranchTakenFreq,
          "Potential frequency of taking conditional branches");
STATISTIC(UncondBranchTakenFreq,
          "Potential frequency of taking unconditional branches");

static cl::opt<unsigned> AlignAllBlock(
    "align-all-blocks",
    cl::desc("Force the alignment of all blocks in the function in log2 format "
             "(e.g 4 means align on 16B boundaries)."),
    cl::init(0), cl::Hidden);

static cl::opt<unsigned> AlignAllNonFallThruBlocks(
    "align-all-nofallthru-blocks",
    cl::desc("Force the alignment of all blocks that have no fall-through "
             "predecessors (i.e. don't add nops that are executed). In log2 "
             "format (e.g 4 means align on 16B boundaries)."),
    cl::init(0), cl::Hidden);

static cl::opt<unsigned> MaxBytesForAlignmentOverride(
    "max-bytes-for-alignment",
    cl::desc("Forces the maximum bytes allowed to be emitted when padding for "
             "alignment"),
    cl::init(0), cl::Hidden);

// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned> ExitBlockBias(
    "block-placement-exit-block-bias",
    cl::desc("Block frequency percentage a loop exit block needs "
             "over the original exit to be considered the new exit."),
    cl::init(0), cl::Hidden);

// Definition:
// - Outlining: placement of a basic block outside the chain or hot path.

static cl::opt<unsigned> LoopToColdBlockRatio(
    "loop-to-cold-block-ratio",
    cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
             "(frequency of block) is greater than this ratio"),
    cl::init(5), cl::Hidden);

static cl::opt<bool>
    ForceLoopColdBlock("force-loop-cold-block",
                       cl::desc("Force outlining cold blocks from loops."),
                       cl::init(false), cl::Hidden);

static cl::opt<bool>
    PreciseRotationCost("precise-rotation-cost",
                        cl::desc("Model the cost of loop rotation more "
                                 "precisely by using profile data."),
                        cl::init(false), cl::Hidden);

static cl::opt<bool>
    ForcePreciseRotationCost("force-precise-rotation-cost",
                             cl::desc("Force the use of precise cost "
                                      "loop rotation strategy."),
                             cl::init(false), cl::Hidden);

static cl::opt<unsigned> MisfetchCost(
    "misfetch-cost",
    cl::desc("Cost that models the probabilistic risk of an instruction "
             "misfetch due to a jump comparing to falling through, whose cost "
             "is zero."),
    cl::init(1), cl::Hidden);

static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
                                      cl::desc("Cost of jump instructions."),
                                      cl::init(1), cl::Hidden);
static cl::opt<bool>
    TailDupPlacement("tail-dup-placement",
                     cl::desc("Perform tail duplication during placement. "
                              "Creates more fallthrough opportunites in "
                              "outline branches."),
                     cl::init(true), cl::Hidden);

static cl::opt<bool>
    BranchFoldPlacement("branch-fold-placement",
                        cl::desc("Perform branch folding during placement. "
                                 "Reduces code size."),
                        cl::init(true), cl::Hidden);

// Heuristic for tail duplication.
static cl::opt<unsigned> TailDupPlacementThreshold(
    "tail-dup-placement-threshold",
    cl::desc("Instruction cutoff for tail duplication during layout. "
             "Tail merging during layout is forced to have a threshold "
             "that won't conflict."),
    cl::init(2), cl::Hidden);

// Heuristic for aggressive tail duplication.
static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
    "tail-dup-placement-aggressive-threshold",
    cl::desc("Instruction cutoff for aggressive tail duplication during "
             "layout. Used at -O3. Tail merging during layout is forced to "
             "have a threshold that won't conflict."),
    cl::init(4), cl::Hidden);

// Heuristic for tail duplication.
static cl::opt<unsigned> TailDupPlacementPenalty(
    "tail-dup-placement-penalty",
    cl::desc(
        "Cost penalty for blocks that can avoid breaking CFG by copying. "
        "Copying can increase fallthrough, but it also increases icache "
        "pressure. This parameter controls the penalty to account for that. "
        "Percent as integer."),
    cl::init(2), cl::Hidden);

// Heuristic for tail duplication if profile count is used in cost model.
static cl::opt<unsigned> TailDupProfilePercentThreshold(
    "tail-dup-profile-percent-threshold",
    cl::desc("If profile count information is used in tail duplication cost "
             "model, the gained fall through number from tail duplication "
             "should be at least this percent of hot count."),
    cl::init(50), cl::Hidden);

// Heuristic for triangle chains.
static cl::opt<unsigned> TriangleChainCount(
    "triangle-chain-count",
    cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
             "triangle tail duplication heuristic to kick in. 0 to disable."),
    cl::init(2), cl::Hidden);

// Use case: When block layout is visualized after MBP pass, the basic blocks
// are labeled in layout order; meanwhile blocks could be numbered in a
// different order. It's hard to map between the graph and pass output.
// With this option on, the basic blocks are renumbered in function layout
// order. For debugging only.
static cl::opt<bool> RenumberBlocksBeforeView(
    "renumber-blocks-before-view",
    cl::desc(
        "If true, basic blocks are re-numbered before MBP layout is printed "
        "into a dot graph. Only used when a function is being printed."),
    cl::init(false), cl::Hidden);

static cl::opt<unsigned> ExtTspBlockPlacementMaxBlocks(
    "ext-tsp-block-placement-max-blocks",
    cl::desc("Maximum number of basic blocks in a function to run ext-TSP "
             "block placement."),
    cl::init(UINT_MAX), cl::Hidden);

// Apply the ext-tsp algorithm minimizing the size of a binary.
static cl::opt<bool>
    ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden,
                       cl::desc("Use ext-tsp for size-aware block placement."));

namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
extern cl::opt<bool> ApplyExtTspWithoutProfile;
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;

// Internal option used to control BFI display only after MBP pass.
// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
// -view-block-layout-with-bfi=
extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;

// Command line option to specify the name of the function for CFG dump
// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
} // namespace llvm

namespace {

class BlockChain;

/// Type for our function-wide basic block -> block chain mapping.
BlockToChainMapType;

/// A chain of blocks which will be laid out contiguously.
///
/// This is the datastructure representing a chain of consecutive blocks that
/// are profitable to layout together in order to maximize fallthrough
/// probabilities and code locality. We also can use a block chain to represent
/// a sequence of basic blocks which have some external (correctness)
/// requirement for sequential layout.
///
/// Chains can be built around a single basic block and can be merged to grow
/// them. They participate in a block-to-chain mapping, which is updated
/// automatically as chains are merged together.
class BlockChain {};

class MachineBlockPlacement : public MachineFunctionPass {};

} // end anonymous namespace

char MachineBlockPlacement::ID =;

char &llvm::MachineBlockPlacementID =;

INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
                      "Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
                    "Branch Probability Basic Block Placement", false, false)

#ifndef NDEBUG
/// Helper to print the name of a MBB.
///
/// Only used by debug logging.
static std::string getBlockName(const MachineBasicBlock *BB) {
  std::string Result;
  raw_string_ostream OS(Result);
  OS << printMBBReference(*BB);
  OS << " ('" << BB->getName() << "')";
  OS.flush();
  return Result;
}
#endif

/// Mark a chain's successors as having one fewer preds.
///
/// When a chain is being merged into the "placed" chain, this routine will
/// quickly walk the successors of each block in the chain and mark them as
/// having one fewer active predecessor. It also adds any successors of this
/// chain which reach the zero-predecessor state to the appropriate worklist.
void MachineBlockPlacement::markChainSuccessors(
    const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
    const BlockFilterSet *BlockFilter) {}

/// Mark a single block's successors as having one fewer preds.
///
/// Under normal circumstances, this is only called by markChainSuccessors,
/// but if a block that was to be placed is completely tail-duplicated away,
/// and was duplicated into the chain end, we need to redo markBlockSuccessors
/// for just that block.
void MachineBlockPlacement::markBlockSuccessors(
    const BlockChain &Chain, const MachineBasicBlock *MBB,
    const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {}

/// This helper function collects the set of successors of block
/// \p BB that are allowed to be its layout successors, and return
/// the total branch probability of edges from \p BB to those
/// blocks.
BranchProbability MachineBlockPlacement::collectViableSuccessors(
    const MachineBasicBlock *BB, const BlockChain &Chain,
    const BlockFilterSet *BlockFilter,
    SmallVector<MachineBasicBlock *, 4> &Successors) {}

/// The helper function returns the branch probability that is adjusted
/// or normalized over the new total \p AdjustedSumProb.
static BranchProbability
getAdjustedProbability(BranchProbability OrigProb,
                       BranchProbability AdjustedSumProb) {}

/// Check if \p BB has exactly the successors in \p Successors.
static bool
hasSameSuccessors(MachineBasicBlock &BB,
                  SmallPtrSetImpl<const MachineBasicBlock *> &Successors) {}

/// Check if a block should be tail duplicated to increase fallthrough
/// opportunities.
/// \p BB Block to check.
bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {}

/// Compare 2 BlockFrequency's with a small penalty for \p A.
/// In order to be conservative, we apply a X% penalty to account for
/// increased icache pressure and static heuristics. For small frequencies
/// we use only the numerators to improve accuracy. For simplicity, we assume
/// the penalty is less than 100%
/// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
                            BlockFrequency EntryFreq) {}

/// Check the edge frequencies to see if tail duplication will increase
/// fallthroughs. It only makes sense to call this function when
/// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is
/// always locally profitable if we would have picked \p Succ without
/// considering duplication.
bool MachineBlockPlacement::isProfitableToTailDup(
    const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
    BranchProbability QProb, const BlockChain &Chain,
    const BlockFilterSet *BlockFilter) {}

/// Check for a trellis layout. \p BB is the upper part of a trellis if its
/// successors form the lower part of a trellis. A successor set S forms the
/// lower part of a trellis if all of the predecessors of S are either in S or
/// have all of S as successors. We ignore trellises where BB doesn't have 2
/// successors because for fewer than 2, it's trivial, and for 3 or greater they
/// are very uncommon and complex to compute optimally. Allowing edges within S
/// is not strictly a trellis, but the same algorithm works, so we allow it.
bool MachineBlockPlacement::isTrellis(
    const MachineBasicBlock *BB,
    const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
    const BlockChain &Chain, const BlockFilterSet *BlockFilter) {}

/// Pick the highest total weight pair of edges that can both be laid out.
/// The edges in \p Edges[0] are assumed to have a different destination than
/// the edges in \p Edges[1]. Simple counting shows that the best pair is either
/// the individual highest weight edges to the 2 different destinations, or in
/// case of a conflict, one of them should be replaced with a 2nd best edge.
std::pair<MachineBlockPlacement::WeightedEdge,
          MachineBlockPlacement::WeightedEdge>
MachineBlockPlacement::getBestNonConflictingEdges(
    const MachineBasicBlock *BB,
    MutableArrayRef<SmallVector<MachineBlockPlacement::WeightedEdge, 8>>
        Edges) {}

/// Get the best successor from \p BB based on \p BB being part of a trellis.
/// We only handle trellises with 2 successors, so the algorithm is
/// straightforward: Find the best pair of edges that don't conflict. We find
/// the best incoming edge for each successor in the trellis. If those conflict,
/// we consider which of them should be replaced with the second best.
/// Upon return the two best edges will be in \p BestEdges. If one of the edges
/// comes from \p BB, it will be in \p BestEdges[0]
MachineBlockPlacement::BlockAndTailDupResult
MachineBlockPlacement::getBestTrellisSuccessor(
    const MachineBasicBlock *BB,
    const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
    BranchProbability AdjustedSumProb, const BlockChain &Chain,
    const BlockFilterSet *BlockFilter) {}

/// When the option allowTailDupPlacement() is on, this method checks if the
/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
/// into all of its unplaced, unfiltered predecessors, that are not BB.
bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
    const MachineBasicBlock *BB, MachineBasicBlock *Succ,
    const BlockChain &Chain, const BlockFilterSet *BlockFilter) {}

/// Find chains of triangles where we believe it would be profitable to
/// tail-duplicate them all, but a local analysis would not find them.
/// There are 3 ways this can be profitable:
/// 1) The post-dominators marked 50% are actually taken 55% (This shrinks with
///    longer chains)
/// 2) The chains are statically correlated. Branch probabilities have a very
///    U-shaped distribution.
///    [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805]
///    If the branches in a chain are likely to be from the same side of the
///    distribution as their predecessor, but are independent at runtime, this
///    transformation is profitable. (Because the cost of being wrong is a small
///    fixed cost, unlike the standard triangle layout where the cost of being
///    wrong scales with the # of triangles.)
/// 3) The chains are dynamically correlated. If the probability that a previous
///    branch was taken positively influences whether the next branch will be
///    taken
/// We believe that 2 and 3 are common enough to justify the small margin in 1.
void MachineBlockPlacement::precomputeTriangleChains() {}

// When profile is not present, return the StaticLikelyProb.
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability
getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB) {}

/// Checks to see if the layout candidate block \p Succ has a better layout
/// predecessor than \c BB. If yes, returns true.
/// \p SuccProb: The probability adjusted for only remaining blocks.
///   Only used for logging
/// \p RealSuccProb: The un-adjusted probability.
/// \p Chain: The chain that BB belongs to and Succ is being considered for.
/// \p BlockFilter: if non-null, the set of blocks that make up the loop being
///    considered
bool MachineBlockPlacement::hasBetterLayoutPredecessor(
    const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
    const BlockChain &SuccChain, BranchProbability SuccProb,
    BranchProbability RealSuccProb, const BlockChain &Chain,
    const BlockFilterSet *BlockFilter) {}

/// Select the best successor for a block.
///
/// This looks across all successors of a particular block and attempts to
/// select the "best" one to be the layout successor. It only considers direct
/// successors which also pass the block filter. It will attempt to avoid
/// breaking CFG structure, but cave and break such structures in the case of
/// very hot successor edges.
///
/// \returns The best successor block found, or null if none are viable, along
/// with a boolean indicating if tail duplication is necessary.
MachineBlockPlacement::BlockAndTailDupResult
MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,
                                           const BlockChain &Chain,
                                           const BlockFilterSet *BlockFilter) {}

/// Select the best block from a worklist.
///
/// This looks through the provided worklist as a list of candidate basic
/// blocks and select the most profitable one to place. The definition of
/// profitable only really makes sense in the context of a loop. This returns
/// the most frequently visited block in the worklist, which in the case of
/// a loop, is the one most desirable to be physically close to the rest of the
/// loop body in order to improve i-cache behavior.
///
/// \returns The best block found, or null if none are viable.
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
    const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {}

/// Retrieve the first unplaced basic block in the entire function.
///
/// This routine is called when we are unable to use the CFG to walk through
/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
/// We walk through the function's blocks in order, starting from the
/// LastUnplacedBlockIt. We update this iterator on each call to avoid
/// re-scanning the entire sequence on repeated calls to this routine.
MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
    const BlockChain &PlacedChain,
    MachineFunction::iterator &PrevUnplacedBlockIt) {}

/// Retrieve the first unplaced basic block among the blocks in BlockFilter.
///
/// This is similar to getFirstUnplacedBlock for the entire function, but since
/// the size of BlockFilter is typically far less than the number of blocks in
/// the entire function, iterating through the BlockFilter is more efficient.
/// When processing the entire funciton, using the version without BlockFilter
/// has a complexity of #(loops in function) * #(blocks in function), while this
/// version has a complexity of sum(#(loops in block) foreach block in function)
/// which is always smaller. For long function mostly sequential in structure,
/// the complexity is amortized to 1 * #(blocks in function).
MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
    const BlockChain &PlacedChain,
    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
    const BlockFilterSet *BlockFilter) {}

void MachineBlockPlacement::fillWorkLists(
    const MachineBasicBlock *MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
    const BlockFilterSet *BlockFilter = nullptr) {}

void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
                                       BlockChain &Chain,
                                       BlockFilterSet *BlockFilter) {}

// If bottom of block BB has only one successor OldTop, in most cases it is
// profitable to move it before OldTop, except the following case:
//
//     -->OldTop<-
//     |    .    |
//     |    .    |
//     |    .    |
//     ---Pred   |
//          |    |
//         BB-----
//
// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
// layout the other successor below it, so it can't reduce taken branch.
// In this case we keep its original layout.
bool MachineBlockPlacement::canMoveBottomBlockToTop(
    const MachineBasicBlock *BottomBlock, const MachineBasicBlock *OldTop) {}

// Find out the possible fall through frequence to the top of a loop.
BlockFrequency
MachineBlockPlacement::TopFallThroughFreq(const MachineBasicBlock *Top,
                                          const BlockFilterSet &LoopBlockSet) {}

// Compute the fall through gains when move NewTop before OldTop.
//
// In following diagram, edges marked as "-" are reduced fallthrough, edges
// marked as "+" are increased fallthrough, this function computes
//
//      SUM(increased fallthrough) - SUM(decreased fallthrough)
//
//              |
//              | -
//              V
//        --->OldTop
//        |     .
//        |     .
//       +|     .    +
//        |   Pred --->
//        |     |-
//        |     V
//        --- NewTop <---
//              |-
//              V
//
BlockFrequency MachineBlockPlacement::FallThroughGains(
    const MachineBasicBlock *NewTop, const MachineBasicBlock *OldTop,
    const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) {}

/// Helper function of findBestLoopTop. Find the best loop top block
/// from predecessors of old top.
///
/// Look for a block which is strictly better than the old top for laying
/// out before the old top of the loop. This looks for only two patterns:
///
///     1. a block has only one successor, the old loop top
///
///        Because such a block will always result in an unconditional jump,
///        rotating it in front of the old top is always profitable.
///
///     2. a block has two successors, one is old top, another is exit
///        and it has more than one predecessors
///
///        If it is below one of its predecessors P, only P can fall through to
///        it, all other predecessors need a jump to it, and another conditional
///        jump to loop header. If it is moved before loop header, all its
///        predecessors jump to it, then fall through to loop header. So all its
///        predecessors except P can reduce one taken branch.
///        At the same time, move it before old top increases the taken branch
///        to loop exit block, so the reduced taken branch will be compared with
///        the increased taken branch to the loop exit block.
MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(
    MachineBasicBlock *OldTop, const MachineLoop &L,
    const BlockFilterSet &LoopBlockSet) {}

/// Find the best loop top block for layout.
///
/// This function iteratively calls findBestLoopTopHelper, until no new better
/// BB can be found.
MachineBasicBlock *
MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
                                       const BlockFilterSet &LoopBlockSet) {}

/// Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
                                        const BlockFilterSet &LoopBlockSet,
                                        BlockFrequency &ExitFreq) {}

/// Check if there is a fallthrough to loop header Top.
///
///   1. Look for a Pred that can be layout before Top.
///   2. Check if Top is the most possible successor of Pred.
bool MachineBlockPlacement::hasViableTopFallthrough(
    const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) {}

/// Attempt to rotate an exiting block to the bottom of the loop.
///
/// Once we have built a chain, try to rotate it to line up the hot exit block
/// with fallthrough out of the loop if doing so doesn't introduce unnecessary
/// branches. For example, if the loop has fallthrough into its header and out
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
                                       const MachineBasicBlock *ExitingBB,
                                       BlockFrequency ExitFreq,
                                       const BlockFilterSet &LoopBlockSet) {}

/// Attempt to rotate a loop based on profile data to reduce branch cost.
///
/// With profile data, we can determine the cost in terms of missed fall through
/// opportunities when rotating a loop chain and select the best rotation.
/// Basically, there are three kinds of cost to consider for each rotation:
///    1. The possibly missed fall through edge (if it exists) from BB out of
///    the loop to the loop header.
///    2. The possibly missed fall through edges (if they exist) from the loop
///    exits to BB out of the loop.
///    3. The missed fall through edge (if it exists) from the last BB to the
///    first BB in the loop chain.
///  Therefore, the cost for a given rotation is the sum of costs listed above.
///  We select the best rotation with the smallest cost.
void MachineBlockPlacement::rotateLoopWithProfile(
    BlockChain &LoopChain, const MachineLoop &L,
    const BlockFilterSet &LoopBlockSet) {}

/// Collect blocks in the given loop that are to be placed.
///
/// When profile data is available, exclude cold blocks from the returned set;
/// otherwise, collect all blocks in the loop.
MachineBlockPlacement::BlockFilterSet
MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {}

/// Forms basic block chains from the natural loop structures.
///
/// These chains are designed to preserve the existing *structure* of the code
/// as much as possible. We can then stitch the chains together in a way which
/// both preserves the topological structure and minimizes taken conditional
/// branches.
void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {}

void MachineBlockPlacement::buildCFGChains() {}

void MachineBlockPlacement::optimizeBranches() {}

void MachineBlockPlacement::alignBlocks() {}

/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
/// it was duplicated into its chain predecessor and removed.
/// \p BB    - Basic block that may be duplicated.
///
/// \p LPred - Chosen layout predecessor of \p BB.
///            Updated to be the chain end if LPred is removed.
/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
///                  Used to identify which blocks to update predecessor
///                  counts.
/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
///                          chosen in the given order due to unnatural CFG
///                          only needed if \p BB is removed and
///                          \p PrevUnplacedBlockIt pointed to \p BB.
/// @return true if \p BB was removed.
bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
    MachineBasicBlock *BB, MachineBasicBlock *&LPred,
    const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
    BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt,
    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {}

/// Tail duplicate \p BB into (some) predecessors if profitable.
/// \p BB    - Basic block that may be duplicated
/// \p LPred - Chosen layout predecessor of \p BB
/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
///                  Used to identify which blocks to update predecessor
///                  counts.
/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
///                          chosen in the given order due to unnatural CFG
///                          only needed if \p BB is removed and
///                          \p PrevUnplacedBlockIt pointed to \p BB.
/// \p DuplicatedToLPred - True if the block was duplicated into LPred.
/// \return  - True if the block was duplicated into all preds and removed.
bool MachineBlockPlacement::maybeTailDuplicateBlock(
    MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
    BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt,
    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
    bool &DuplicatedToLPred) {}

// Count the number of actual machine instructions.
static uint64_t countMBBInstruction(MachineBasicBlock *MBB) {}

// The size cost of duplication is the instruction size of the duplicated block.
// So we should scale the threshold accordingly. But the instruction size is not
// available on all targets, so we use the number of instructions instead.
BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {}

// Returns true if BB is Pred's best successor.
bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
                                            MachineBasicBlock *Pred,
                                            BlockFilterSet *BlockFilter) {}

// Find out the predecessors of BB and BB can be beneficially duplicated into
// them.
void MachineBlockPlacement::findDuplicateCandidates(
    SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,
    BlockFilterSet *BlockFilter) {}

void MachineBlockPlacement::initTailDupThreshold() {}

bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {}

void MachineBlockPlacement::applyExtTsp(bool OptForSize) {}

void MachineBlockPlacement::assignBlockOrder(
    const std::vector<const MachineBasicBlock *> &NewBlockOrder) {}

void MachineBlockPlacement::createCFGChainExtTsp() {}

namespace {

/// A pass to compute block placement statistics.
///
/// A separate pass to compute interesting statistics for evaluating block
/// placement. This is separate from the actual placement pass so that they can
/// be computed in the absence of any placement transformations or when using
/// alternative placement strategies.
class MachineBlockPlacementStats : public MachineFunctionPass {};

} // end anonymous namespace

char MachineBlockPlacementStats::ID =;

char &llvm::MachineBlockPlacementStatsID =;

INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
                      "Basic Block Placement Stats", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
                    "Basic Block Placement Stats", false, false)

bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {}