//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This implements bottom-up and top-down register pressure reduction list // schedulers, using standard algorithms. The basic approach uses a priority // queue of available nodes to schedule. One at a time, nodes are taken from // the priority queue (thus in priority order), checked for legality to // schedule, and emitted if legal. // //===----------------------------------------------------------------------===// #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstdint> #include <cstdlib> #include <iterator> #include <limits> #include <memory> #include <utility> #include <vector> usingnamespacellvm; #define DEBUG_TYPE … STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); STATISTIC(NumPRCopies, "Number of physical register copies"); static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler); static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler); static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler); static cl::opt<bool> DisableSchedCycles( "disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling")); // Temporary sched=list-ilp flags until the heuristics are robust. // Some options are also available under sched=list-hybrid. static cl::opt<bool> DisableSchedRegPressure( "disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp")); static cl::opt<bool> DisableSchedLiveUses( "disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp")); static cl::opt<bool> DisableSchedVRegCycle( "disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks")); static cl::opt<bool> DisableSchedPhysRegJoin( "disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity")); static cl::opt<bool> DisableSchedStalls( "disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp")); static cl::opt<bool> DisableSchedCriticalPath( "disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp")); static cl::opt<bool> DisableSchedHeight( "disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp")); static cl::opt<bool> Disable2AddrHack( "disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack")); static cl::opt<int> MaxReorderWindow( "max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp")); static cl::opt<unsigned> AvgIPC( "sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists.")); namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler /// implementation. This supports both top-down and bottom-up scheduling. /// class ScheduleDAGRRList : public ScheduleDAGSDNodes { … }; } // end anonymous namespace static constexpr unsigned RegSequenceCost = …; /// GetCostForDef - Looks up the register class and cost for a given definition. /// Typically this just means looking up the representative register class, /// but for untyped values (MVT::Untyped) it means inspecting the node's /// opcode to determine what register class is being generated. static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF) { … } /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { … } //===----------------------------------------------------------------------===// // Bottom-Up Scheduling //===----------------------------------------------------------------------===// /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to /// the AvailableQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { … } /// IsChainDependent - Test if Outer is reachable from Inner through /// chain dependencies. static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII) { … } /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate /// the corresponding (lowered) CALLSEQ_BEGIN node. /// /// NestLevel and MaxNested are used in recursion to indcate the current level /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum /// level seen so far. /// /// TODO: It would be better to give CALLSEQ_END an explicit operand to point /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII) { … } /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range /// across a sequence of two-address node. This is important because the /// entire chain must be scheduled together. Example: /// /// flags = (3) add /// flags = (2) addc flags /// flags = (1) addc flags /// /// results in /// /// LiveRegDefs[flags] = 3 /// LiveRegGens[flags] = 1 /// /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid /// interference on flags. void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { … } /// Check to see if any of the pending instructions are ready to issue. If /// so, add them to the available queue. void ScheduleDAGRRList::ReleasePending() { … } /// Move the scheduler state forward by the specified number of Cycles. void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { … } /// Move the scheduler state forward until the specified node's dependents are /// ready and can be scheduled with no resource conflicts. void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { … } /// Record this SUnit in the HazardRecognizer. /// Does not update CurCycle. void ScheduleDAGRRList::EmitNode(SUnit *SU) { … } static void resetVRegCycle(SUnit *SU); /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { … } /// CapturePred - This does the opposite of ReleasePred. Since SU is being /// unscheduled, increase the succ left count of its predecessors. Remove /// them from AvailableQueue if necessary. void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { … } /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { … } /// After backtracking, the hazard checker needs to be restored to a state /// corresponding the current cycle. void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { … } /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in /// BTCycle in order to schedule a specific node. void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { … } static bool isOperandOf(const SUnit *SU, SDNode *N) { … } /// TryUnfold - Attempt to unfold SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { … } /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { … } /// InsertCopiesAndMoveSuccs - Insert register copies and move all /// scheduled successors of the given SUnit to the last copy. void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl<SUnit*> &Copies) { … } /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { … } /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node = nullptr) { … } /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered /// by RegMask, and add them to LRegs. static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef<SUnit*> LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs) { … } /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. static const uint32_t *getNodeRegMask(const SDNode *N) { … } /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGRRList:: DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { … } void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { … } /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { … } /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up /// schedulers. void ScheduleDAGRRList::ListScheduleBottomUp() { … } namespace { class RegReductionPQBase; struct queue_sort { … }; #ifndef NDEBUG template<class SF> struct reverse_sort : public queue_sort { SF &SortFunc; reverse_sort(SF &sf) : SortFunc(sf) {} bool operator()(SUnit* left, SUnit* right) const { // reverse left/right rather than simply !SortFunc(left, right) // to expose different paths in the comparison logic. return SortFunc(right, left); } }; #endif // NDEBUG /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. struct bu_ls_rr_sort : public queue_sort { … }; // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public queue_sort { … }; // hybrid_ls_rr_sort - Priority function for hybrid scheduler. struct hybrid_ls_rr_sort : public queue_sort { … }; // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) // scheduler. struct ilp_ls_rr_sort : public queue_sort { … }; class RegReductionPQBase : public SchedulingPriorityQueue { … }; template<class SF> static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { … } template<class SF> SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { … } //===----------------------------------------------------------------------===// // RegReductionPriorityQueue Definition //===----------------------------------------------------------------------===// // // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers // to reduce register pressure. // template<class SF> class RegReductionPriorityQueue : public RegReductionPQBase { … }; BURegReductionPriorityQueue; SrcRegReductionPriorityQueue; HybridBURRPriorityQueue; ILPBURRPriorityQueue; } // end anonymous namespace //===----------------------------------------------------------------------===// // Static Node Priority for Register Pressure Reduction //===----------------------------------------------------------------------===// // Check for special nodes that bypass scheduling heuristics. // Currently this pushes TokenFactor nodes down, but may be used for other // pseudo-ops as well. // // Return -1 to schedule right above left, 1 for left above right. // Return 0 if no bias exists. static int checkSpecialNodes(const SUnit *left, const SUnit *right) { … } /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { … } /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all /// scheduling units. void RegReductionPQBase::CalculateSethiUllmanNumbers() { … } void RegReductionPQBase::addNode(const SUnit *SU) { … } void RegReductionPQBase::updateNode(const SUnit *SU) { … } // Lower priority means schedule further down. For bottom-up scheduling, lower // priority SUs are scheduled before higher priority SUs. unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { … } //===----------------------------------------------------------------------===// // Register Pressure Tracking //===----------------------------------------------------------------------===// #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { for (const TargetRegisterClass *RC : TRI->regclasses()) { unsigned Id = RC->getID(); unsigned RP = RegPressure[Id]; if (!RP) continue; LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " << RegLimit[Id] << '\n'); } } #endif bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { … } bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { … } // Compute the register pressure contribution by this instruction by count up // for uses that are not live and down for defs. Only count register classes // that are already under high pressure. As a side effect, compute the number of // uses of registers that are already live. // // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure // so could probably be factored. int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { … } void RegReductionPQBase::scheduledNode(SUnit *SU) { … } void RegReductionPQBase::unscheduledNode(SUnit *SU) { … } //===----------------------------------------------------------------------===// // Dynamic Node Priority for Register Pressure Reduction //===----------------------------------------------------------------------===// /// closestSucc - Returns the scheduled cycle of the successor which is /// closest to the current cycle. static unsigned closestSucc(const SUnit *SU) { … } /// calcMaxScratches - Returns an cost estimate of the worse case requirement /// for scratch registers, i.e. number of data dependencies. static unsigned calcMaxScratches(const SUnit *SU) { … } /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are /// CopyFromReg from a virtual register. static bool hasOnlyLiveInOpers(const SUnit *SU) { … } /// hasOnlyLiveOutUses - Return true if SU has only value successors that are /// CopyToReg to a virtual register. This SU def is probably a liveout and /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { … } // Set isVRegCycle for a node with only live in opers and live out uses. Also // set isVRegCycle for its CopyFromReg operands. // // This is only relevant for single-block loops, in which case the VRegCycle // node is likely an induction variable in which the operand and target virtual // registers should be coalesced (e.g. pre/post increment values). Setting the // isVRegCycle flag helps the scheduler prioritize other uses of the same // CopyFromReg so that this node becomes the virtual register "kill". This // avoids interference between the values live in and out of the block and // eliminates a copy inside the loop. static void initVRegCycle(SUnit *SU) { … } // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of // CopyFromReg operands. We should no longer penalize other uses of this VReg. static void resetVRegCycle(SUnit *SU) { … } // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This // means a node that defines the VRegCycle has not been scheduled yet. static bool hasVRegCycleUse(const SUnit *SU) { … } // Check for either a dependence (latency) or resource (hazard) stall. // // Note: The ScheduleHazardRecognizer interface requires a non-const SU. static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { … } // Return -1 if left has higher priority, 1 if right has higher priority. // Return 0 if latency-based priority is equivalent. static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ) { … } static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { … } // Bottom up bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { … } // Source order, otherwise bottom up. bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { … } // If the time between now and when the instruction will be ready can cover // the spill code, then avoid adding it to the ready queue. This gives long // stalls highest priority and allows hoisting across calls. It should also // speed up processing the available queue. bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { … } // Return true if right should be scheduled with higher priority than left. bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { … } // Schedule as many instructions in each cycle as possible. So don't make an // instruction available unless it is ready in the current cycle. bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { … } static bool canEnableCoalescing(SUnit *SU) { … } // list-ilp is currently an experimental scheduler that allows various // heuristics to be enabled prior to the normal register reduction logic. bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { … } void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { … } //===----------------------------------------------------------------------===// // Preschedule for Register Pressure //===----------------------------------------------------------------------===// bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { … } /// canClobberReachingPhysRegUse - True if SU would clobber one of it's /// successor's explicit physregs whose definition can reach DepSU. /// i.e. DepSU should not be scheduled above SU. static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { … } /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's /// physical register defs. static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { … } /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses /// are not handled well by the general register pressure reduction /// heuristics. When presented with code like this: /// /// N /// / | /// / | /// U store /// | /// ... /// /// the heuristics tend to push the store up, but since the /// operand of the store has another use (U), this would increase /// the length of that other use (the U->N edge). /// /// This function transforms code like the above to route U's /// dependence through the store when possible, like this: /// /// N /// || /// || /// store /// | /// U /// | /// ... /// /// This results in the store being scheduled immediately /// after N, which shortens the U->N live range, reducing /// register pressure. void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { … } /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses /// it as a def&use operand. Add a pseudo control edge from it to the other /// node (if it won't create a cycle) so the two-address one will be scheduled /// first (lower in the schedule). If both nodes are two-address, favor the /// one that has a CopyToReg use (more likely to be a loop induction update). /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. void RegReductionPQBase::AddPseudoTwoAddrDeps() { … } //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// ScheduleDAGSDNodes *llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel) { … } ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel) { … } ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel) { … } ScheduleDAGSDNodes *llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel) { … }