#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ModuloSchedule.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <climits>
#include <cstdint>
#include <deque>
#include <functional>
#include <iomanip>
#include <iterator>
#include <map>
#include <memory>
#include <sstream>
#include <tuple>
#include <utility>
#include <vector>
usingnamespacellvm;
#define DEBUG_TYPE …
STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
STATISTIC(NumPipelined, "Number of loops software pipelined");
STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
cl::desc("Enable Software Pipelining"));
static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
cl::desc("Enable SWP at Os."), cl::Hidden,
cl::init(false));
static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
cl::desc("Size limit for the MII."),
cl::Hidden, cl::init(27));
static cl::opt<int> SwpForceII("pipeliner-force-ii",
cl::desc("Force pipeliner to use specified II."),
cl::Hidden, cl::init(-1));
static cl::opt<int>
SwpMaxStages("pipeliner-max-stages",
cl::desc("Maximum stages allowed in the generated scheduled."),
cl::Hidden, cl::init(3));
static cl::opt<bool>
SwpPruneDeps("pipeliner-prune-deps",
cl::desc("Prune dependences between unrelated Phi nodes."),
cl::Hidden, cl::init(true));
static cl::opt<bool>
SwpPruneLoopCarried("pipeliner-prune-loop-carried",
cl::desc("Prune loop carried order dependences."),
cl::Hidden, cl::init(true));
#ifndef NDEBUG
static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
#endif
static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
cl::ReallyHidden,
cl::desc("Ignore RecMII"));
static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
cl::init(false));
static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
cl::init(false));
static cl::opt<bool> EmitTestAnnotations(
"pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
cl::desc("Instead of emitting the pipelined code, annotate instructions "
"with the generated schedule for feeding into the "
"-modulo-schedule-test pass"));
static cl::opt<bool> ExperimentalCodeGen(
"pipeliner-experimental-cg", cl::Hidden, cl::init(false),
cl::desc(
"Use the experimental peeling code generator for software pipelining"));
static cl::opt<int> SwpIISearchRange("pipeliner-ii-search-range",
cl::desc("Range to search for II"),
cl::Hidden, cl::init(10));
static cl::opt<bool>
LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false),
cl::desc("Limit register pressure of scheduled loop"));
static cl::opt<int>
RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden,
cl::init(5),
cl::desc("Margin representing the unused percentage of "
"the register pressure limit"));
static cl::opt<bool>
MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
cl::desc("Use the MVE code generator for software pipelining"));
namespace llvm {
cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
cl::init(true),
cl::desc("Enable CopyToPhi DAG Mutation"));
cl::opt<int> SwpForceIssueWidth(
"pipeliner-force-issue-width",
cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
cl::init(-1));
cl::opt<WindowSchedulingFlag> WindowSchedulingOption(
"window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On),
cl::desc("Set how to use window scheduling algorithm."),
cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off",
"Turn off window algorithm."),
clEnumValN(WindowSchedulingFlag::WS_On, "on",
"Use window algorithm after SMS algorithm fails."),
clEnumValN(WindowSchedulingFlag::WS_Force, "force",
"Use window algorithm instead of SMS algorithm.")));
}
unsigned SwingSchedulerDAG::Circuits::MaxPaths = …;
char MachinePipeliner::ID = …;
#ifndef NDEBUG
int MachinePipeliner::NumTries = 0;
#endif
char &llvm::MachinePipelinerID = …;
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { … }
bool MachinePipeliner::scheduleLoop(MachineLoop &L) { … }
void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { … }
bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { … }
void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { … }
bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { … }
void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const { … }
bool MachinePipeliner::runWindowScheduler(MachineLoop &L) { … }
bool MachinePipeliner::useSwingModuloScheduler() { … }
bool MachinePipeliner::useWindowScheduler(bool Changed) { … }
void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) { … }
void SwingSchedulerDAG::setMAX_II() { … }
void SwingSchedulerDAG::schedule() { … }
void SwingSchedulerDAG::finishBlock() { … }
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
unsigned &InitVal, unsigned &LoopVal) { … }
static unsigned getLoopPhiReg(const MachineInstr &Phi,
const MachineBasicBlock *LoopBB) { … }
static bool isSuccOrder(SUnit *SUa, SUnit *SUb) { … }
static bool isDependenceBarrier(MachineInstr &MI) { … }
static void getUnderlyingObjects(const MachineInstr *MI,
SmallVectorImpl<const Value *> &Objs) { … }
void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { … }
void SwingSchedulerDAG::updatePhiDependences() { … }
void SwingSchedulerDAG::changeDependences() { … }
static void computeScheduledInsts(const SwingSchedulerDAG *SSD,
SMSchedule &Schedule,
std::vector<MachineInstr *> &OrderedInsts,
DenseMap<MachineInstr *, unsigned> &Stages) { … }
namespace {
struct FuncUnitSorter { … };
class HighRegisterPressureDetector { … };
}
unsigned SwingSchedulerDAG::calculateResMII() { … }
unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { … }
static void swapAntiDependences(std::vector<SUnit> &SUnits) { … }
void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
SwingSchedulerDAG *DAG) { … }
bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
const SwingSchedulerDAG *DAG,
bool HasBackedge) { … }
void SwingSchedulerDAG::Circuits::unblock(int U) { … }
void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { … }
void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { … }
static bool ignoreDependence(const SDep &D, bool isPred) { … }
void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { … }
static bool pred_L(SetVector<SUnit *> &NodeOrder,
SmallSetVector<SUnit *, 8> &Preds,
const NodeSet *S = nullptr) { … }
static bool succ_L(SetVector<SUnit *> &NodeOrder,
SmallSetVector<SUnit *, 8> &Succs,
const NodeSet *S = nullptr) { … }
static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
SetVector<SUnit *> &DestNodes,
SetVector<SUnit *> &Exclude,
SmallPtrSet<SUnit *, 8> &Visited) { … }
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
NodeSet &NS) { … }
void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) { … }
void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) { … }
void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) { … }
void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { … }
void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
SetVector<SUnit *> &NodesAdded) { … }
static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
SmallSetVector<SUnit *, 8> &Result) { … }
void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) { … }
void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) { … }
void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { … }
bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { … }
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const { … }
bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
unsigned &BasePos,
unsigned &OffsetPos,
unsigned &NewBase,
int64_t &Offset) { … }
void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
SMSchedule &Schedule) { … }
MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { … }
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
bool isSucc) const { … }
void SwingSchedulerDAG::postProcessDAG() { … }
bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { … }
int SMSchedule::earliestCycleInChain(const SDep &Dep) { … }
int SMSchedule::latestCycleInChain(const SDep &Dep) { … }
static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { … }
void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
int II, SwingSchedulerDAG *DAG) { … }
void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
std::deque<SUnit *> &Insts) const { … }
bool SMSchedule::isLoopCarried(const SwingSchedulerDAG *SSD,
MachineInstr &Phi) const { … }
bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD,
MachineInstr *Def,
MachineOperand &MO) const { … }
bool SMSchedule::onlyHasLoopCarriedOutputOrOrderPreds(
SUnit *SU, SwingSchedulerDAG *DAG) const { … }
SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { … }
bool SMSchedule::normalizeNonPipelinedInstructions(
SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { … }
bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { … }
void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { … }
void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) { … }
std::deque<SUnit *>
SMSchedule::reorderInstructions(const SwingSchedulerDAG *SSD,
const std::deque<SUnit *> &Instrs) const { … }
void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { … }
void NodeSet::print(raw_ostream &os) const { … }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void SMSchedule::print(raw_ostream &os) const {
for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
for (SUnit *CI : cycleInstrs->second) {
os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
os << "(" << CI->NodeNum << ") ";
CI->getInstr()->print(os);
os << "\n";
}
}
}
LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
void ResourceManager::dumpMRT() const {
LLVM_DEBUG({
if (UseDFA)
return;
std::stringstream SS;
SS << "MRT:\n";
SS << std::setw(4) << "Slot";
for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
SS << std::setw(3) << I;
SS << std::setw(7) << "#Mops"
<< "\n";
for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
SS << std::setw(4) << Slot;
for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
SS << std::setw(3) << MRT[Slot][I];
SS << std::setw(7) << NumScheduledMops[Slot] << "\n";
}
dbgs() << SS.str();
});
}
#endif
void ResourceManager::initProcResourceVectors(
const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) { … }
bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) { … }
void ResourceManager::reserveResources(SUnit &SU, int Cycle) { … }
void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
int Cycle) { … }
void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
int Cycle) { … }
bool ResourceManager::isOverbooked() const { … }
int ResourceManager::calculateResMIIDFA() const { … }
int ResourceManager::calculateResMII() const { … }
void ResourceManager::init(int II) { … }