#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/IntegerDivision.h"
#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE …
usingnamespacellvm;
usingnamespacellvm::PatternMatch;
namespace {
static cl::opt<bool> WidenLoads(
"amdgpu-codegenprepare-widen-constant-loads",
cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,
cl::init(false));
static cl::opt<bool> Widen16BitOps(
"amdgpu-codegenprepare-widen-16-bit-ops",
cl::desc("Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,
cl::init(true));
static cl::opt<bool>
BreakLargePHIs("amdgpu-codegenprepare-break-large-phis",
cl::desc("Break large PHI nodes for DAGISel"),
cl::ReallyHidden, cl::init(true));
static cl::opt<bool>
ForceBreakLargePHIs("amdgpu-codegenprepare-force-break-large-phis",
cl::desc("For testing purposes, always break large "
"PHIs even if it isn't profitable."),
cl::ReallyHidden, cl::init(false));
static cl::opt<unsigned> BreakLargePHIsThreshold(
"amdgpu-codegenprepare-break-large-phis-threshold",
cl::desc("Minimum type size in bits for breaking large PHI nodes"),
cl::ReallyHidden, cl::init(32));
static cl::opt<bool> UseMul24Intrin(
"amdgpu-codegenprepare-mul24",
cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,
cl::init(true));
static cl::opt<bool> ExpandDiv64InIR(
"amdgpu-codegenprepare-expand-div64",
cl::desc("Expand 64-bit division in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,
cl::init(false));
static cl::opt<bool> DisableIDivExpand(
"amdgpu-codegenprepare-disable-idiv-expansion",
cl::desc("Prevent expanding integer division in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,
cl::init(false));
static cl::opt<bool> DisableFDivExpand(
"amdgpu-codegenprepare-disable-fdiv-expansion",
cl::desc("Prevent expanding floating point division in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,
cl::init(false));
class AMDGPUCodeGenPrepareImpl
: public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> { … };
class AMDGPUCodeGenPrepare : public FunctionPass { … };
}
bool AMDGPUCodeGenPrepareImpl::run(Function &F) { … }
unsigned AMDGPUCodeGenPrepareImpl::getBaseElementBitWidth(const Type *T) const { … }
Type *AMDGPUCodeGenPrepareImpl::getI32Ty(IRBuilder<> &B, const Type *T) const { … }
bool AMDGPUCodeGenPrepareImpl::isSigned(const BinaryOperator &I) const { … }
bool AMDGPUCodeGenPrepareImpl::isSigned(const SelectInst &I) const { … }
bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const { … }
bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const { … }
static bool promotedOpIsNSW(const Instruction &I) { … }
static bool promotedOpIsNUW(const Instruction &I) { … }
bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const { … }
bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const { … }
bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(ICmpInst &I) const { … }
bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(SelectInst &I) const { … }
bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
IntrinsicInst &I) const { … }
unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const { … }
unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const { … }
static void extractValues(IRBuilder<> &Builder,
SmallVectorImpl<Value *> &Values, Value *V) { … }
static Value *insertValues(IRBuilder<> &Builder,
Type *Ty,
SmallVectorImpl<Value *> &Values) { … }
bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const { … }
static SelectInst *findSelectThroughCast(Value *V, CastInst *&Cast) { … }
bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const { … }
std::pair<Value *, Value *>
AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
Value *Src) const { … }
Value *AMDGPUCodeGenPrepareImpl::emitRcpIEEE1ULP(IRBuilder<> &Builder,
Value *Src,
bool IsNegative) const { … }
Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
Value *RHS,
FastMathFlags FMF) const { … }
Value *AMDGPUCodeGenPrepareImpl::emitSqrtIEEE2ULP(IRBuilder<> &Builder,
Value *Src,
FastMathFlags FMF) const { … }
static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src,
bool IsNegative) { … }
bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
FastMathFlags DivFMF,
FastMathFlags SqrtFMF) const { … }
Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
IRBuilder<> &Builder, Value *Num, Value *Den, const FastMathFlags DivFMF,
const FastMathFlags SqrtFMF, const Instruction *CtxI) const { … }
Value *
AMDGPUCodeGenPrepareImpl::optimizeWithRcp(IRBuilder<> &Builder, Value *Num,
Value *Den, FastMathFlags FMF,
const Instruction *CtxI) const { … }
Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast(
IRBuilder<> &Builder, Value *Num, Value *Den, float ReqdAccuracy) const { … }
Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
FastMathFlags SqrtFMF, Value *RsqOp, const Instruction *FDivInst,
float ReqdDivAccuracy) const { … }
bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) { … }
static bool hasUnsafeFPMath(const Function &F) { … }
static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
Value *LHS, Value *RHS) { … }
static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { … }
int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
Value *Den, unsigned AtLeast,
bool IsSigned) const { … }
Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder,
BinaryOperator &I, Value *Num,
Value *Den, bool IsDiv,
bool IsSigned) const { … }
Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
IRBuilder<> &Builder, BinaryOperator &I, Value *Num, Value *Den,
unsigned DivBits, bool IsDiv, bool IsSigned) const { … }
bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
Value *Num,
Value *Den) const { … }
static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) { … }
Value *AMDGPUCodeGenPrepareImpl::expandDivRem32(IRBuilder<> &Builder,
BinaryOperator &I, Value *X,
Value *Y) const { … }
Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
BinaryOperator &I, Value *Num,
Value *Den) const { … }
void AMDGPUCodeGenPrepareImpl::expandDivRem64(BinaryOperator &I) const { … }
bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { … }
bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) { … }
bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) { … }
bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) { … }
static bool areInSameBB(const Value *A, const Value *B) { … }
static bool isInterestingPHIIncomingValue(const Value *V) { … }
static void collectPHINodes(const PHINode &I,
SmallPtrSet<const PHINode *, 8> &SeenPHIs) { … }
bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(const PHINode &I) { … }
class VectorSlice { … };
bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) { … }
static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
const AMDGPUTargetMachine &TM, unsigned AS) { … }
bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { … }
bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) { … }
bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) { … }
Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) { … }
Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
Value *FractArg) { … }
bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) { … }
static bool isOneOrNegOne(const Value *Val) { … }
bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) { … }
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) { … }
bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { … }
PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
FunctionAnalysisManager &FAM) { … }
INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
false, false)
char AMDGPUCodeGenPrepare::ID = …;
FunctionPass *llvm::createAMDGPUCodeGenPreparePass() { … }