#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#define DEBUG_TYPE …
usingnamespacellvm;
namespace {
static cl::opt<bool>
DisablePromoteAllocaToVector("disable-promote-alloca-to-vector",
cl::desc("Disable promote alloca to vector"),
cl::init(false));
static cl::opt<bool>
DisablePromoteAllocaToLDS("disable-promote-alloca-to-lds",
cl::desc("Disable promote alloca to LDS"),
cl::init(false));
static cl::opt<unsigned> PromoteAllocaToVectorLimit(
"amdgpu-promote-alloca-to-vector-limit",
cl::desc("Maximum byte size to consider promote alloca to vector"),
cl::init(0));
static cl::opt<unsigned>
LoopUserWeight("promote-alloca-vector-loop-user-weight",
cl::desc("The bonus weight of users of allocas within loop "
"when sorting profitable allocas"),
cl::init(4));
class AMDGPUPromoteAllocaImpl { … };
class AMDGPUPromoteAlloca : public FunctionPass { … };
class AMDGPUPromoteAllocaToVector : public FunctionPass { … };
unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) { … }
}
char AMDGPUPromoteAlloca::ID = …;
char AMDGPUPromoteAllocaToVector::ID = …;
INITIALIZE_PASS_BEGIN(…)
INITIALIZE_PASS_BEGIN(AMDGPUPromoteAllocaToVector, DEBUG_TYPE "-to-vector",
"AMDGPU promote alloca to vector", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUPromoteAllocaToVector, DEBUG_TYPE "-to-vector",
"AMDGPU promote alloca to vector", false, false)
char &llvm::AMDGPUPromoteAllocaID = …;
char &llvm::AMDGPUPromoteAllocaToVectorID = …;
PreservedAnalyses AMDGPUPromoteAllocaPass::run(Function &F,
FunctionAnalysisManager &AM) { … }
PreservedAnalyses
AMDGPUPromoteAllocaToVectorPass::run(Function &F, FunctionAnalysisManager &AM) { … }
FunctionPass *llvm::createAMDGPUPromoteAlloca() { … }
FunctionPass *llvm::createAMDGPUPromoteAllocaToVector() { … }
static void collectAllocaUses(AllocaInst &Alloca,
SmallVectorImpl<Use *> &Uses) { … }
void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
SmallVectorImpl<AllocaInst *> &Allocas) { … }
bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) { … }
struct MemTransferInfo { … };
static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
const DataLayout &DL) { … }
static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) { … }
static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
Type *VecElemTy, const DataLayout &DL) { … }
static Value *promoteAllocaUserToVector(
Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
unsigned VecStoreSize, unsigned ElementSize,
DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
std::map<GetElementPtrInst *, Value *> &GEPVectorIdx, Value *CurVal,
SmallVectorImpl<LoadInst *> &DeferredLoads) { … }
static bool isSupportedAccessType(FixedVectorType *VecTy, Type *AccessTy,
const DataLayout &DL) { … }
template <typename InstContainer>
static void forEachWorkListItem(const InstContainer &WorkList,
std::function<void(Instruction *)> Fn) { … }
bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) { … }
std::pair<Value *, Value *>
AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) { … }
Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
unsigned N) { … }
static bool isCallPromotable(CallInst *CI) { … }
bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
Value *BaseAlloca, Value *Val, Instruction *Inst, int OpIdx0,
int OpIdx1) const { … }
bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
Value *BaseAlloca, Value *Val, std::vector<Value *> &WorkList) const { … }
bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) { … }
bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
bool SufficientLDS) { … }