#include "AMDGPUIGroupLP.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/TargetOpcodes.h"
usingnamespacellvm;
#define DEBUG_TYPE …
namespace {
static cl::opt<bool> EnableExactSolver(
"amdgpu-igrouplp-exact-solver", cl::Hidden,
cl::desc("Whether to use the exponential time solver to fit "
"the instructions to the pipeline as closely as "
"possible."),
cl::init(false));
static cl::opt<unsigned> CutoffForExact(
"amdgpu-igrouplp-exact-solver-cutoff", cl::init(0), cl::Hidden,
cl::desc("The maximum number of scheduling group conflicts "
"which we attempt to solve with the exponential time "
"exact solver. Problem sizes greater than this will"
"be solved by the less accurate greedy algorithm. Selecting "
"solver by size is superseded by manually selecting "
"the solver (e.g. by amdgpu-igrouplp-exact-solver"));
static cl::opt<uint64_t> MaxBranchesExplored(
"amdgpu-igrouplp-exact-solver-max-branches", cl::init(0), cl::Hidden,
cl::desc("The amount of branches that we are willing to explore with"
"the exact algorithm before giving up."));
static cl::opt<bool> UseCostHeur(
"amdgpu-igrouplp-exact-solver-cost-heur", cl::init(true), cl::Hidden,
cl::desc("Whether to use the cost heuristic to make choices as we "
"traverse the search space using the exact solver. Defaulted "
"to on, and if turned off, we will use the node order -- "
"attempting to put the later nodes in the later sched groups. "
"Experimentally, results are mixed, so this should be set on a "
"case-by-case basis."));
enum class SchedGroupMask { … };
class SchedGroup;
class InstructionRule { … };
SUnitsToCandidateSGsMap;
class SchedGroup { … };
static void resetEdges(SUnit &SU, ScheduleDAGInstrs *DAG) { … }
SUToCandSGsPair;
SUsToCandSGsVec;
class PipelineSolver { … };
void PipelineSolver::reset() { … }
void PipelineSolver::convertSyncMapsToArrays() { … }
template <typename T> void PipelineSolver::linkSchedGroups(T I, T E) { … }
void PipelineSolver::makePipeline() { … }
template <typename T>
int PipelineSolver::linkSUnit(
SUnit *SU, int SGID, std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges,
T I, T E) { … }
int PipelineSolver::addEdges(
SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) { … }
void PipelineSolver::removeEdges(
const std::vector<std::pair<SUnit *, SUnit *>> &EdgesToRemove) { … }
void PipelineSolver::advancePosition() { … }
void PipelineSolver::retreatPosition() { … }
bool PipelineSolver::checkOptimal() { … }
template <typename T>
void PipelineSolver::populateReadyList(
SmallVectorImpl<std::pair<int, int>> &ReadyList, T I, T E) { … }
bool PipelineSolver::solveExact() { … }
template <typename T>
void PipelineSolver::greedyFind(
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E) { … }
bool PipelineSolver::solveGreedy() { … }
unsigned PipelineSolver::computeProblemSize() { … }
void PipelineSolver::solve() { … }
enum IGLPStrategyID : int { … };
class IGLPStrategy { … };
class MFMASmallGemmOpt final : public IGLPStrategy { … };
bool MFMASmallGemmOpt::applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
AMDGPU::SchedulingPhase Phase) { … }
class MFMAExpInterleaveOpt final : public IGLPStrategy { … };
unsigned MFMAExpInterleaveOpt::TransPipeCount = …;
unsigned MFMAExpInterleaveOpt::MFMAPipeCount = …;
unsigned MFMAExpInterleaveOpt::AddPipeCount = …;
unsigned MFMAExpInterleaveOpt::MFMAEnablement = …;
unsigned MFMAExpInterleaveOpt::ExpRequirement = …;
unsigned MFMAExpInterleaveOpt::MFMAChains = …;
unsigned MFMAExpInterleaveOpt::MFMAChainLength = …;
bool MFMAExpInterleaveOpt::HasCvt = …;
bool MFMAExpInterleaveOpt::HasChainBetweenCvt = …;
std::optional<unsigned> MFMAExpInterleaveOpt::FirstPipeDSR = …;
bool MFMAExpInterleaveOpt::analyzeDAG(const SIInstrInfo *TII) { … }
bool MFMAExpInterleaveOpt::shouldApplyStrategy(ScheduleDAGInstrs *DAG,
AMDGPU::SchedulingPhase Phase) { … }
bool MFMAExpInterleaveOpt::applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
AMDGPU::SchedulingPhase Phase) { … }
class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy { … };
static unsigned DSWCount = …;
static unsigned DSWWithPermCount = …;
static unsigned DSWWithSharedVMEMCount = …;
bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
AMDGPU::SchedulingPhase Phase) { … }
static std::unique_ptr<IGLPStrategy>
createIGLPStrategy(IGLPStrategyID ID, ScheduleDAGInstrs *DAG,
const SIInstrInfo *TII) { … }
class IGroupLPDAGMutation : public ScheduleDAGMutation { … };
unsigned SchedGroup::NumSchedGroups = …;
bool SchedGroup::tryAddEdge(SUnit *A, SUnit *B) { … }
bool SchedGroup::canAddMI(const MachineInstr &MI) const { … }
int SchedGroup::link(SUnit &SU, bool MakePred,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) { … }
void SchedGroup::link(SUnit &SU, bool MakePred) { … }
void SchedGroup::link(SUnit &SU,
function_ref<bool(const SUnit *A, const SUnit *B)> P) { … }
void SchedGroup::link(SchedGroup &OtherGroup) { … }
bool SchedGroup::canAddSU(SUnit &SU) const { … }
void SchedGroup::initSchedGroup() { … }
void SchedGroup::initSchedGroup(std::vector<SUnit>::reverse_iterator RIter,
SUnitsToCandidateSGsMap &SyncedInstrs) { … }
void SchedGroup::initSchedGroup(SUnitsToCandidateSGsMap &SyncedInstrs) { … }
void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { … }
void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) { … }
SchedGroupMask
IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const { … }
void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
std::vector<SUnit>::reverse_iterator RIter) { … }
bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) { … }
}
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) { … }
}