#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
#include <optional>
usingnamespacellvm;
#define DEBUG_TYPE …
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
cl::init(2700), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",
cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
cl::init(1000), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdIf(
"amdgpu-unroll-threshold-if",
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
cl::init(200), cl::Hidden);
static cl::opt<bool> UnrollRuntimeLocal(
"amdgpu-unroll-runtime-local",
cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"),
cl::init(true), cl::Hidden);
static cl::opt<unsigned> UnrollMaxBlockToAnalyze(
"amdgpu-unroll-max-block-to-analyze",
cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"),
cl::init(32), cl::Hidden);
static cl::opt<unsigned> ArgAllocaCost("amdgpu-inline-arg-alloca-cost",
cl::Hidden, cl::init(4000),
cl::desc("Cost of alloca argument"));
static cl::opt<unsigned>
ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden,
cl::init(256),
cl::desc("Maximum alloca size to use for inline cost"));
static cl::opt<size_t> InlineMaxBB(
"amdgpu-inline-max-bb", cl::Hidden, cl::init(1100),
cl::desc("Maximum number of BBs allowed in a function after inlining"
" (compile time constraint)"));
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
unsigned Depth = 0) { … }
AMDGPUTTIImpl::AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: … { … }
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) { … }
void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) { … }
int64_t AMDGPUTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const { … }
const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = …;
GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: … { … }
bool GCNTTIImpl::hasBranchDivergence(const Function *F) const { … }
unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const { … }
TypeSize
GCNTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { … }
unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const { … }
unsigned GCNTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { … }
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const { … }
unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const { … }
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { … }
bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
Align Alignment,
unsigned AddrSpace) const { … }
bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
Align Alignment,
unsigned AddrSpace) const { … }
bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
Align Alignment,
unsigned AddrSpace) const { … }
int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const { … }
Type *GCNTTIImpl::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const { … }
void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const { … }
unsigned GCNTTIImpl::getMaxInterleaveFactor(ElementCount VF) { … }
bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) const { … }
InstructionCost GCNTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
ArrayRef<const Value *> Args,
const Instruction *CxtI) { … }
static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { … }
InstructionCost
GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) { … }
InstructionCost GCNTTIImpl::getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind,
const Instruction *I) { … }
InstructionCost
GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) { … }
InstructionCost
GCNTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
FastMathFlags FMF,
TTI::TargetCostKind CostKind) { … }
InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) { … }
bool GCNTTIImpl::isInlineAsmSourceOfDivergence(
const CallInst *CI, ArrayRef<unsigned> Indices) const { … }
bool GCNTTIImpl::isReadRegisterSourceOfDivergence(
const IntrinsicInst *ReadReg) const { … }
bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { … }
bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { … }
bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const { … }
Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *OldV,
Value *NewV) const { … }
InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args,
const Instruction *CxtI) { … }
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const { … }
static unsigned adjustInliningThresholdUsingCallee(const CallBase *CB,
const SITargetLowering *TLI,
const GCNTTIImpl *TTIImpl) { … }
static unsigned getCallArgsTotalAllocaSize(const CallBase *CB,
const DataLayout &DL) { … }
unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const { … }
unsigned GCNTTIImpl::getCallerAllocaCost(const CallBase *CB,
const AllocaInst *AI) const { … }
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) { … }
void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) { … }
int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const { … }
std::pair<InstructionCost, MVT>
GCNTTIImpl::getTypeLegalizationCost(Type *Ty) const { … }
unsigned GCNTTIImpl::getPrefetchDistance() const { … }
bool GCNTTIImpl::shouldPrefetchAddressSpace(unsigned AS) const { … }