#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPURegBankSelect.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIFoldOperands.h"
#include "SILoadStoreOptimizer.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIPeepholeSDWA.h"
#include "SIShrinkInstructions.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
usingnamespacellvm;
usingnamespacellvm::PatternMatch;
namespace {
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> { … };
class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> { … };
static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
const MachineRegisterInfo &MRI,
const Register Reg) { … }
static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
const MachineRegisterInfo &MRI,
const Register Reg) { … }
static FunctionPass *useDefaultRegisterAllocator() { … }
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
static SGPRRegisterRegAlloc
defaultSGPRRegAlloc("default",
"pick SGPR register allocator based on -O option",
useDefaultRegisterAllocator);
static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<SGPRRegisterRegAlloc>>
SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use for SGPRs"));
static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<VGPRRegisterRegAlloc>>
VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use for VGPRs"));
static void initializeDefaultSGPRRegisterAllocatorOnce() { … }
static void initializeDefaultVGPRRegisterAllocatorOnce() { … }
static FunctionPass *createBasicSGPRRegisterAllocator() { … }
static FunctionPass *createGreedySGPRRegisterAllocator() { … }
static FunctionPass *createFastSGPRRegisterAllocator() { … }
static FunctionPass *createBasicVGPRRegisterAllocator() { … }
static FunctionPass *createGreedyVGPRRegisterAllocator() { … }
static FunctionPass *createFastVGPRRegisterAllocator() { … }
static SGPRRegisterRegAlloc basicRegAllocSGPR(
"basic", "basic register allocator", createBasicSGPRRegisterAllocator);
static SGPRRegisterRegAlloc greedyRegAllocSGPR(
"greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
static SGPRRegisterRegAlloc fastRegAllocSGPR(
"fast", "fast register allocator", createFastSGPRRegisterAllocator);
static VGPRRegisterRegAlloc basicRegAllocVGPR(
"basic", "basic register allocator", createBasicVGPRRegisterAllocator);
static VGPRRegisterRegAlloc greedyRegAllocVGPR(
"greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
static VGPRRegisterRegAlloc fastRegAllocVGPR(
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
}
static cl::opt<bool>
EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
cl::desc("Run early if-conversion"),
cl::init(false));
static cl::opt<bool>
OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
cl::desc("Run pre-RA exec mask optimizations"),
cl::init(true));
static cl::opt<bool>
LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
cl::desc("Lower GPU ctor / dtors to globals on the device."),
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLoadStoreVectorizer(
"amdgpu-load-store-vectorizer",
cl::desc("Enable load store vectorizer"),
cl::init(true),
cl::Hidden);
static cl::opt<bool> ScalarizeGlobal(
"amdgpu-scalarize-global-loads",
cl::desc("Enable global load scalarization"),
cl::init(true),
cl::Hidden);
static cl::opt<bool> InternalizeSymbols(
"amdgpu-internalize-symbols",
cl::desc("Enable elimination of non-kernel functions and unused globals"),
cl::init(false),
cl::Hidden);
static cl::opt<bool> EarlyInlineAll(
"amdgpu-early-inline-all",
cl::desc("Inline all functions early"),
cl::init(false),
cl::Hidden);
static cl::opt<bool> RemoveIncompatibleFunctions(
"amdgpu-enable-remove-incompatible-functions", cl::Hidden,
cl::desc("Enable removal of functions when they"
"use features not supported by the target GPU"),
cl::init(true));
static cl::opt<bool> EnableSDWAPeephole(
"amdgpu-sdwa-peephole",
cl::desc("Enable SDWA peepholer"),
cl::init(true));
static cl::opt<bool> EnableDPPCombine(
"amdgpu-dpp-combine",
cl::desc("Enable DPP combiner"),
cl::init(true));
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
cl::desc("Enable AMDGPU Alias Analysis"),
cl::init(true));
static cl::opt<bool> EnableLibCallSimplify(
"amdgpu-simplify-libcall",
cl::desc("Enable amdgpu library simplifications"),
cl::init(true),
cl::Hidden);
static cl::opt<bool> EnableLowerKernelArguments(
"amdgpu-ir-lower-kernel-arguments",
cl::desc("Lower kernel argument loads in IR pass"),
cl::init(true),
cl::Hidden);
static cl::opt<bool> EnableRegReassign(
"amdgpu-reassign-regs",
cl::desc("Enable register reassign optimizations on gfx10+"),
cl::init(true),
cl::Hidden);
static cl::opt<bool> OptVGPRLiveRange(
"amdgpu-opt-vgpr-liverange",
cl::desc("Enable VGPR liverange optimizations for if-else structure"),
cl::init(true), cl::Hidden);
static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
"amdgpu-atomic-optimizer-strategy",
cl::desc("Select DPP or Iterative strategy for scan"),
cl::init(ScanOptions::Iterative),
cl::values(
clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"),
clEnumValN(ScanOptions::Iterative, "Iterative",
"Use Iterative approach for scan"),
clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));
static cl::opt<bool> EnableSIModeRegisterPass(
"amdgpu-mode-register",
cl::desc("Enable mode register pass"),
cl::init(true),
cl::Hidden);
static cl::opt<bool>
EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
cl::desc("Enable s_singleuse_vdst insertion"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
EnableInsertDelayAlu("amdgpu-enable-delay-alu",
cl::desc("Enable s_delay_alu insertion"),
cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableVOPD("amdgpu-enable-vopd",
cl::desc("Enable VOPD, dual issue of VALU in wave32"),
cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableDCEInRA("amdgpu-dce-in-ra",
cl::init(true), cl::Hidden,
cl::desc("Enable machine DCE inside regalloc"));
static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
cl::desc("Adjust wave priority"),
cl::init(false), cl::Hidden);
static cl::opt<bool> EnableScalarIRPasses(
"amdgpu-scalar-ir-passes",
cl::desc("Enable scalar IR passes"),
cl::init(true),
cl::Hidden);
static cl::opt<bool>
EnableSwLowerLDS("amdgpu-enable-sw-lower-lds",
cl::desc("Enable lowering of lds to global memory pass "
"and asan instrument resulting IR."),
cl::init(true), cl::Hidden);
static cl::opt<bool, true> EnableLowerModuleLDS(
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
cl::Hidden);
static cl::opt<bool> EnablePreRAOptimizations(
"amdgpu-enable-pre-ra-optimizations",
cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
cl::Hidden);
static cl::opt<bool> EnablePromoteKernelArguments(
"amdgpu-enable-promote-kernel-arguments",
cl::desc("Enable promotion of flat kernel pointer arguments to global"),
cl::Hidden, cl::init(true));
static cl::opt<bool> EnableImageIntrinsicOptimizer(
"amdgpu-enable-image-intrinsic-optimizer",
cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
cl::Hidden);
static cl::opt<bool>
EnableLoopPrefetch("amdgpu-loop-prefetch",
cl::desc("Enable loop data prefetch on AMDGPU"),
cl::Hidden, cl::init(false));
static cl::opt<bool> EnableMaxIlpSchedStrategy(
"amdgpu-enable-max-ilp-scheduling-strategy",
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
cl::Hidden, cl::init(false));
static cl::opt<bool> EnableRewritePartialRegUses(
"amdgpu-enable-rewrite-partial-reg-uses",
cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
cl::Hidden);
static cl::opt<bool> EnableHipStdPar(
"amdgpu-enable-hipstdpar",
cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
cl::Hidden);
static cl::opt<bool>
EnableAMDGPUAttributor("amdgpu-attributor-enable",
cl::desc("Enable AMDGPUAttributorPass"),
cl::init(true), cl::Hidden);
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { … }
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { … }
static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { … }
static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { … }
static ScheduleDAGInstrs *
createGCNMaxILPMachineScheduler(MachineSchedContext *C) { … }
static ScheduleDAGInstrs *
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { … }
static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) { … }
static ScheduleDAGInstrs *
createIterativeILPMachineScheduler(MachineSchedContext *C) { … }
static MachineSchedRegistry
SISchedRegistry("si", "Run SI's custom scheduler",
createSIMachineScheduler);
static MachineSchedRegistry
GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
"Run GCN scheduler to maximize occupancy",
createGCNMaxOccupancyMachineScheduler);
static MachineSchedRegistry
GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp",
createGCNMaxILPMachineScheduler);
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry(
"gcn-iterative-max-occupancy-experimental",
"Run GCN scheduler to maximize occupancy (experimental)",
createIterativeGCNMaxOccupancyMachineScheduler);
static MachineSchedRegistry GCNMinRegSchedRegistry(
"gcn-iterative-minreg",
"Run GCN iterative scheduler for minimal register usage (experimental)",
createMinRegScheduler);
static MachineSchedRegistry GCNILPSchedRegistry(
"gcn-iterative-ilp",
"Run GCN iterative scheduler for ILP scheduling (experimental)",
createIterativeILPMachineScheduler);
static StringRef computeDataLayout(const Triple &TT) { … }
LLVM_READNONE
static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { … }
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { … }
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OptLevel)
: … { … }
bool AMDGPUTargetMachine::EnableFunctionCalls = …;
bool AMDGPUTargetMachine::EnableLowerModuleLDS = …;
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { … }
StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { … }
static bool mustPreserveGV(const GlobalValue &GV) { … }
void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { … }
static Expected<ScanOptions>
parseAMDGPUAtomicOptimizerStrategy(StringRef Params) { … }
Expected<AMDGPUAttributorOptions>
parseAMDGPUAttributorPassOptions(StringRef Params) { … }
void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { … }
int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) { … }
bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const { … }
unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const { … }
std::pair<const Value *, unsigned>
AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const { … }
unsigned
AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const { … }
bool AMDGPUTargetMachine::splitModule(
Module &M, unsigned NumParts,
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) { … }
GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OL, bool JIT)
: … { … }
const TargetSubtargetInfo *
GCNTargetMachine::getSubtargetImpl(const Function &F) const { … }
TargetTransformInfo
GCNTargetMachine::getTargetTransformInfo(const Function &F) const { … }
Error GCNTargetMachine::buildCodeGenPipeline(
ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
CodeGenFileType FileType, const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC) { … }
std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const { … }
namespace {
class GCNPassConfig final : public AMDGPUPassConfig { … };
}
AMDGPUPassConfig::AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
: … { … }
void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { … }
void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { … }
void AMDGPUPassConfig::addIRPasses() { … }
void AMDGPUPassConfig::addCodeGenPrepare() { … }
bool AMDGPUPassConfig::addPreISel() { … }
bool AMDGPUPassConfig::addInstSelector() { … }
bool AMDGPUPassConfig::addGCPasses() { … }
llvm::ScheduleDAGInstrs *
AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const { … }
ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
MachineSchedContext *C) const { … }
bool GCNPassConfig::addPreISel() { … }
void GCNPassConfig::addMachineSSAOptimization() { … }
bool GCNPassConfig::addILPOpts() { … }
bool GCNPassConfig::addInstSelector() { … }
bool GCNPassConfig::addIRTranslator() { … }
void GCNPassConfig::addPreLegalizeMachineIR() { … }
bool GCNPassConfig::addLegalizeMachineIR() { … }
void GCNPassConfig::addPreRegBankSelect() { … }
bool GCNPassConfig::addRegBankSelect() { … }
void GCNPassConfig::addPreGlobalInstructionSelect() { … }
bool GCNPassConfig::addGlobalInstructionSelect() { … }
void GCNPassConfig::addFastRegAlloc() { … }
void GCNPassConfig::addOptimizedRegAlloc() { … }
bool GCNPassConfig::addPreRewrite() { … }
FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) { … }
FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) { … }
FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) { … }
static const char RegAllocOptNotSupportedMessage[] = …;
bool GCNPassConfig::addRegAssignAndRewriteFast() { … }
bool GCNPassConfig::addRegAssignAndRewriteOptimized() { … }
void GCNPassConfig::addPostRegAlloc() { … }
void GCNPassConfig::addPreSched2() { … }
void GCNPassConfig::addPreEmitPass() { … }
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { … }
void GCNTargetMachine::registerMachineRegisterInfoCallback(
MachineFunction &MF) const { … }
MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const { … }
yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const { … }
yaml::MachineFunctionInfo *
GCNTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { … }
bool GCNTargetMachine::parseMachineFunctionInfo(
const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
SMDiagnostic &Error, SMRange &SourceRange) const { … }
AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
GCNTargetMachine &TM, const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC)
: … { … }
void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { … }
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { … }
void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { … }
void AMDGPUCodeGenPassBuilder::addAsmPrinter(AddMachinePass &addPass,
CreateMCStreamer) const { … }
Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const { … }
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const { … }
void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const { … }
void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
AddIRPass &addPass) const { … }