#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUMachineFunction.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Target/TargetMachine.h"
usingnamespacellvm;
#include "AMDGPUGenCallingConv.inc"
static cl::opt<bool> AMDGPUBypassSlowDiv(
"amdgpu-bypass-slow-div",
cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
cl::init(true));
EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { … }
unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { … }
unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { … }
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: … { … }
bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { … }
LLVM_READNONE
static bool fnegFoldsIntoOpcode(unsigned Opc) { … }
static bool fnegFoldsIntoOp(const SDNode *N) { … }
LLVM_READONLY
static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) { … }
LLVM_READONLY
static bool selectSupportsSourceMods(const SDNode *N) { … }
LLVM_READONLY
static bool hasSourceMods(const SDNode *N) { … }
bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
unsigned CostThreshold) { … }
EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const { … }
MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { … }
bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { … }
bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const { … }
bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { … }
bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
ISD::LoadExtType ExtTy,
EVT NewVT) const { … }
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
const SelectionDAG &DAG,
const MachineMemOperand &MMO) const { … }
bool AMDGPUTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { … }
bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { … }
bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { … }
SDValue AMDGPUTargetLowering::getNegatedExpression(
SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize,
NegatibleCost &Cost, unsigned Depth) const { … }
bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { … }
bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { … }
bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
unsigned NumElem,
unsigned AS) const { … }
bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const { … }
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { … }
bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { … }
bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { … }
bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { … }
bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { … }
bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
const SDNode* N, CombineLevel Level) const { … }
CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) { … }
CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
bool IsVarArg) { … }
void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const { … }
SDValue AMDGPUTargetLowering::LowerReturn(
SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const { … }
CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) { … }
CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
bool IsVarArg) { … }
SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
SelectionDAG &DAG,
MachineFrameInfo &MFI,
int ClobberedFI) const { … }
SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals,
StringRef Reason) const { … }
SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const { … }
SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const { … }
void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const { … }
static SDValue peekFNeg(SDValue Val) { … }
static SDValue peekFPSignOps(SDValue Val) { … }
SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
SDValue False, SDValue CC, DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
SDValue LHS, SDValue RHS,
SDValue True, SDValue False,
SDValue CC,
DAGCombinerInfo &DCI) const { … }
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const { … }
std::pair<EVT, EVT>
AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const { … }
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
const EVT &LoVT, const EVT &HiVT,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
bool Sign) const { … }
void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const { … }
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { … }
static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL,
SelectionDAG &DAG) { … }
SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { … }
static bool valueIsKnownNeverF32Denorm(SDValue Src) { … }
bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
SDNodeFlags Flags) { … }
bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
SDValue Src,
SDNodeFlags Flags) { … }
SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG,
SDValue Src,
SDNodeFlags Flags) const { … }
SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
SDNodeFlags Flags) const { … }
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
SDValue Src, SDNodeFlags Flags) const { … }
SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const { … }
static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X,
SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) { … }
SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
SelectionDAG &DAG, bool IsLog10,
SDNodeFlags Flags) const { … }
SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
SelectionDAG &DAG,
SDNodeFlags Flags) const { … }
SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL,
SelectionDAG &DAG,
SDNodeFlags Flags) const { … }
SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { … }
static bool isCtlzOpc(unsigned Opc) { … }
static bool isCttzOpc(unsigned Opc) { … }
SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
bool Signed) const { … }
SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
bool Signed) const { … }
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG,
bool Signed) const { … }
SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op,
SelectionDAG &DAG) const { … }
SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const { … }
static bool isU24(SDValue Op, SelectionDAG &DAG) { … }
static bool isI24(SDValue Op, SelectionDAG &DAG) { … }
static SDValue simplifyMul24(SDNode *Node24,
TargetLowering::DAGCombinerInfo &DCI) { … }
template <typename IntTy>
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset,
uint32_t Width, const SDLoc &DL) { … }
static bool hasVolatileUser(SDNode *Val) { … }
bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { … }
SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
SDNode *N, DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
uint32_t ValLo, uint32_t ValHi) const { … }
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performTruncateCombine(
SDNode *N, DAGCombinerInfo &DCI) const { … }
static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
SDValue N0, SDValue N1, unsigned Size, bool Signed) { … }
static SDValue getAddOneOp(const SDNode *V) { … }
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue
AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
SDValue Op,
const SDLoc &DL,
unsigned Opc) const { … }
SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
SDValue LHS, SDValue RHS,
DAGCombinerInfo &DCI) const { … }
static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI,
unsigned Op,
const SDLoc &SL,
SDValue Cond,
SDValue N1,
SDValue N2) { … }
SDValue
AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
SDValue N) const { … }
SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
static bool isInv2Pi(const APFloat &APF) { … }
TargetLowering::NegatibleCost
AMDGPUTargetLowering::getConstantNegateCost(const ConstantFPSDNode *C) const { … }
bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const { … }
bool AMDGPUTargetLowering::isConstantCheaperToNegate(SDValue N) const { … }
static unsigned inverseMinMax(unsigned Opc) { … }
bool AMDGPUTargetLowering::shouldFoldFNegIntoSrc(SDNode *N, SDValue N0) { … }
SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const { … }
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
Register Reg, EVT VT,
const SDLoc &SL,
bool RawReg) const { … }
static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
int64_t Offset) { … }
SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
EVT VT,
const SDLoc &SL,
int64_t Offset) const { … }
SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Chain,
SDValue ArgVal,
int64_t Offset) const { … }
SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
const TargetRegisterClass *RC,
EVT VT, const SDLoc &SL,
const ArgDescriptor &Arg) const { … }
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
uint64_t ExplicitKernArgSize, const ImplicitParameter Param) const { … }
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
const MachineFunction &MF, const ImplicitParameter Param) const { … }
#define NODE_NAME_CASE(node) …
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { … }
SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const { … }
SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const { … }
static unsigned workitemIntrinsicDim(unsigned ID) { … }
void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known,
const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { … }
unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const { … }
unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
GISelKnownBits &Analysis, Register R,
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
unsigned Depth) const { … }
bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
unsigned Depth) const { … }
bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const { … }
bool AMDGPUTargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const { … }