AMDGPUISelLowering.cpp | Explore in Territory

//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This is the parent TargetLowering class for hardware code gen
/// targets.
//
//===----------------------------------------------------------------------===//

#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUMachineFunction.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Target/TargetMachine.h"

usingnamespacellvm;

#include "AMDGPUGenCallingConv.inc"

static cl::opt<bool> AMDGPUBypassSlowDiv(
  "amdgpu-bypass-slow-div",
  cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
  cl::init(true));

// Find a larger type to do a load / store of a vector with.
EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { … }

unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { … }

unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { … }

AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
                                           const AMDGPUSubtarget &STI)
    : … { … }

bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { … }

//===----------------------------------------------------------------------===//
// Target Information
//===----------------------------------------------------------------------===//

LLVM_READNONE
static bool fnegFoldsIntoOpcode(unsigned Opc) { … }

static bool fnegFoldsIntoOp(const SDNode *N) { … }

/// \p returns true if the operation will definitely need to use a 64-bit
/// encoding, and thus will use a VOP3 encoding regardless of the source
/// modifiers.
LLVM_READONLY
static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) { … }

/// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the
/// type for ISD::SELECT.
LLVM_READONLY
static bool selectSupportsSourceMods(const SDNode *N) { … }

// Most FP instructions support source modifiers, but this could be refined
// slightly.
LLVM_READONLY
static bool hasSourceMods(const SDNode *N) { … }

bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
                                                 unsigned CostThreshold) { … }

EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
                                              ISD::NodeType ExtendKind) const { … }

MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { … }

bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { … }

// The backend supports 32 and 64 bit floating point immediates.
// FIXME: Why are we reporting vectors of FP immediates as legal?
bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
                                        bool ForCodeSize) const { … }

// We don't want to shrink f64 / f32 constants.
bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { … }

bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
                                                 ISD::LoadExtType ExtTy,
                                                 EVT NewVT) const { … }

bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
                                                   const SelectionDAG &DAG,
                                                   const MachineMemOperand &MMO) const { … }

// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
// profitable with the expansion for 64-bit since it's generally good to
// speculate things.
bool AMDGPUTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { … }

bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { … }

bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { … }

SDValue AMDGPUTargetLowering::getNegatedExpression(
    SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize,
    NegatibleCost &Cost, unsigned Depth) const { … }

//===---------------------------------------------------------------------===//
// Target Properties
//===---------------------------------------------------------------------===//

bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { … }

bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { … }

bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
                                                         unsigned NumElem,
                                                         unsigned AS) const { … }

bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const { … }

bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { … }

bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { … }

bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { … }

bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { … }

bool AMDGPUTargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT,
                                                 EVT DestVT) const { … }

bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
    const SDNode* N, CombineLevel Level) const { … }

//===---------------------------------------------------------------------===//
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//

CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
                                                  bool IsVarArg) { … }

CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
                                                    bool IsVarArg) { … }

/// The SelectionDAGBuilder will automatically promote function arguments
/// with illegal types.  However, this does not work for the AMDGPU targets
/// since the function arguments are stored in memory as these illegal types.
/// In order to handle this properly we need to get the original types sizes
/// from the LLVM IR Function and fixup the ISD:InputArg values before
/// passing them to AnalyzeFormalArguments()

/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
/// input values across multiple registers.  Each item in the Ins array
/// represents a single value that will be stored in registers.  Ins[x].VT is
/// the value type of the value that will be stored in the register, so
/// whatever SDNode we lower the argument to needs to be this type.
///
/// In order to correctly lower the arguments we need to know the size of each
/// argument.  Since Ins[x].VT gives us the size of the register that will
/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
/// for the original function argument so that we can deduce the correct memory
/// type to use for Ins[x].  In most cases the correct memory type will be
/// Ins[x].ArgVT.  However, this will not always be the case.  If, for example,
/// we have a kernel argument of type v8i8, this argument will be split into
/// 8 parts and each part will be represented by its own item in the Ins array.
/// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
/// the argument before it was split.  From this, we deduce that the memory type
/// for each individual part is i8.  We pass the memory type as LocVT to the
/// calling convention analysis function and the register type (Ins[x].VT) as
/// the ValVT.
void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
  CCState &State,
  const SmallVectorImpl<ISD::InputArg> &Ins) const { … }

SDValue AMDGPUTargetLowering::LowerReturn(
  SDValue Chain, CallingConv::ID CallConv,
  bool isVarArg,
  const SmallVectorImpl<ISD::OutputArg> &Outs,
  const SmallVectorImpl<SDValue> &OutVals,
  const SDLoc &DL, SelectionDAG &DAG) const { … }

//===---------------------------------------------------------------------===//
// Target specific lowering
//===---------------------------------------------------------------------===//

/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
                                                    bool IsVarArg) { … }

CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
                                                      bool IsVarArg) { … }

SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
                                                  SelectionDAG &DAG,
                                                  MachineFrameInfo &MFI,
                                                  int ClobberedFI) const { … }

SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI,
                                                 SmallVectorImpl<SDValue> &InVals,
                                                 StringRef Reason) const { … }

SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
                                        SmallVectorImpl<SDValue> &InVals) const { … }

SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                      SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
                                             SelectionDAG &DAG) const { … }

void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
                                              SmallVectorImpl<SDValue> &Results,
                                              SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
                                                 SDValue Op,
                                                 SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
                                                  SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
                                                     SelectionDAG &DAG) const { … }

// TODO: Handle fabs too
static SDValue peekFNeg(SDValue Val) { … }

static SDValue peekFPSignOps(SDValue Val) { … }

SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
    const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
    SDValue False, SDValue CC, DAGCombinerInfo &DCI) const { … }

/// Generate Min/Max node
SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
                                                   SDValue LHS, SDValue RHS,
                                                   SDValue True, SDValue False,
                                                   SDValue CC,
                                                   DAGCombinerInfo &DCI) const { … }

std::pair<SDValue, SDValue>
AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const { … }

// Split a vector type into two parts. The first part is a power of two vector.
// The second part is whatever is left over, and is a scalar if it would
// otherwise be a 1-vector.
std::pair<EVT, EVT>
AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const { … }

// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
// scalar.
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
                                  const EVT &LoVT, const EVT &HiVT,
                                  SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
                                              SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
                                                     SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
                                               SelectionDAG &DAG) const { … }

// This is a shortcut for integer division because we have fast i32<->f32
// conversions, and fast f32 reciprocal instructions. The fractional part of a
// float is enough to accurately represent up to a 24-bit signed integer.
SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
                                            bool Sign) const { … }

void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
                                      SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &Results) const { … }

SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
                                           SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
                                           SelectionDAG &DAG) const { … }

// (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { … }

static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL,
                                  SelectionDAG &DAG) { … }

SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
                                              SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op,
                                              SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { … }

// XXX - May require not supporting f32 denormals?

// Don't handle v2f16. The extra instructions to scalarize and repack around the
// compare and vselect end up producing worse code than scalarizing the whole
// operation.
SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { … }

/// Return true if it's known that \p Src can never be an f32 denormal value.
static bool valueIsKnownNeverF32Denorm(SDValue Src) { … }

bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
                                           SDNodeFlags Flags) { … }

bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
                                                  SDValue Src,
                                                  SDNodeFlags Flags) { … }

SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG,
                                                    SDValue Src,
                                                    SDNodeFlags Flags) const { … }

SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
                                          SDNodeFlags Flags) const { … }

/// If denormal handling is required return the scaled input to FLOG2, and the
/// check for denormal range. Otherwise, return null values.
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
                                        SDValue Src, SDNodeFlags Flags) const { … }

SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const { … }

static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X,
                      SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) { … }

SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
                                              SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const { … }

// Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a
// promote f16 operation.
SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
                                              SelectionDAG &DAG, bool IsLog10,
                                              SDNodeFlags Flags) const { … }

SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
                                              SelectionDAG &DAG,
                                              SDNodeFlags Flags) const { … }

/// Emit approx-funcs appropriate lowering for exp10. inf/nan should still be
/// handled correctly.
SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL,
                                                SelectionDAG &DAG,
                                                SDNodeFlags Flags) const { … }

SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { … }

static bool isCtlzOpc(unsigned Opc) { … }

static bool isCttzOpc(unsigned Opc) { … }

SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op,
                                               SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
                                               bool Signed) const { … }

SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
                                               bool Signed) const { … }

SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
                                               SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
                                              SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG,
                                               bool Signed) const { … }

SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op,
                                             SelectionDAG &DAG) const { … }

SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                     SelectionDAG &DAG) const { … }

//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//

static bool isU24(SDValue Op, SelectionDAG &DAG) { … }

static bool isI24(SDValue Op, SelectionDAG &DAG) { … }

static SDValue simplifyMul24(SDNode *Node24,
                             TargetLowering::DAGCombinerInfo &DCI) { … }

template <typename IntTy>
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset,
                               uint32_t Width, const SDLoc &DL) { … }

static bool hasVolatileUser(SDNode *Val) { … }

bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { … }

// Replace load of an illegal type with a store of a bitcast to a friendlier
// type.
SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const { … }

// Replace store of an illegal type with a store of a bitcast to a friendlier
// type.
SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const { … }

// FIXME: This should go in generic DAG combiner with an isTruncateFree check,
// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
// issues.
SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
                                                        DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
  SDNode *N, DAGCombinerInfo &DCI) const { … }

/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
/// binary operation \p Opc to it with the corresponding constant operands.
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
  DAGCombinerInfo &DCI, const SDLoc &SL,
  unsigned Opc, SDValue LHS,
  uint32_t ValLo, uint32_t ValHi) const { … }

SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performTruncateCombine(
  SDNode *N, DAGCombinerInfo &DCI) const { … }

// We need to specifically handle i64 mul here to avoid unnecessary conversion
// instructions. If we only match on the legalized i64 mul expansion,
// SimplifyDemandedBits will be unable to remove them because there will be
// multiple uses due to the separate mul + mulh[su].
static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
                        SDValue N0, SDValue N1, unsigned Size, bool Signed) { … }

/// If \p V is an add of a constant 1, returns the other operand. Otherwise
/// return SDValue().
static SDValue getAddOneOp(const SDNode *V) { … }

SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const { … }

SDValue
AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
                                            DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
                                          SDValue Op,
                                          const SDLoc &DL,
                                          unsigned Opc) const { … }

// The native instructions return -1 on 0 input. Optimize out a select that
// produces -1 on 0.
//
// TODO: If zero is not undef, we could also do this if the output is compared
// against the bitwidth.
//
// TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
                                                 SDValue LHS, SDValue RHS,
                                                 DAGCombinerInfo &DCI) const { … }

static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI,
                                         unsigned Op,
                                         const SDLoc &SL,
                                         SDValue Cond,
                                         SDValue N1,
                                         SDValue N2) { … }

// Pull a free FP operation out of a select so it may fold into uses.
//
// select c, (fneg x), (fneg y) -> fneg (select c, x, y)
// select c, (fneg x), k -> fneg (select c, x, (fneg k))
//
// select c, (fabs x), (fabs y) -> fabs (select c, x, y)
// select c, (fabs x), +k -> fabs (select c, x, k)
SDValue
AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
                                           SDValue N) const { … }

SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
                                                   DAGCombinerInfo &DCI) const { … }

static bool isInv2Pi(const APFloat &APF) { … }

// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
// additional cost to negate them.
TargetLowering::NegatibleCost
AMDGPUTargetLowering::getConstantNegateCost(const ConstantFPSDNode *C) const { … }

bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const { … }

bool AMDGPUTargetLowering::isConstantCheaperToNegate(SDValue N) const { … }

static unsigned inverseMinMax(unsigned Opc) { … }

/// \return true if it's profitable to try to push an fneg into its source
/// instruction.
bool AMDGPUTargetLowering::shouldFoldFNegIntoSrc(SDNode *N, SDValue N0) { … }

SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const { … }

SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const { … }

//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//

SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
                                                   const TargetRegisterClass *RC,
                                                   Register Reg, EVT VT,
                                                   const SDLoc &SL,
                                                   bool RawReg) const { … }

// This may be called multiple times, and nothing prevents creating multiple
// objects at the same offset. See if we already defined this object.
static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
                                       int64_t Offset) { … }

SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
                                                  EVT VT,
                                                  const SDLoc &SL,
                                                  int64_t Offset) const { … }

SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
                                                   const SDLoc &SL,
                                                   SDValue Chain,
                                                   SDValue ArgVal,
                                                   int64_t Offset) const { … }

SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
                                             const TargetRegisterClass *RC,
                                             EVT VT, const SDLoc &SL,
                                             const ArgDescriptor &Arg) const { … }

uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
    uint64_t ExplicitKernArgSize, const ImplicitParameter Param) const { … }

uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
    const MachineFunction &MF, const ImplicitParameter Param) const { … }

#define NODE_NAME_CASE(node) …

const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { … }

SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
                                              SelectionDAG &DAG, int Enabled,
                                              int &RefinementSteps,
                                              bool &UseOneConstNR,
                                              bool Reciprocal) const { … }

SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
                                               SelectionDAG &DAG, int Enabled,
                                               int &RefinementSteps) const { … }

static unsigned workitemIntrinsicDim(unsigned ID) { … }

void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
    const SDValue Op, KnownBits &Known,
    const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { … }

unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
    unsigned Depth) const { … }

unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
  GISelKnownBits &Analysis, Register R,
  const APInt &DemandedElts, const MachineRegisterInfo &MRI,
  unsigned Depth) const { … }

bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
                                                        const SelectionDAG &DAG,
                                                        bool SNaN,
                                                        unsigned Depth) const { … }

bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
                                               Register N0, Register N1) const { … }

/// Whether it is profitable to sink the operands of an
/// Instruction I to the basic block of I.
/// This helps using several modifiers (like abs and neg) more often.
bool AMDGPUTargetLowering::shouldSinkOperands(
    Instruction *I, SmallVectorImpl<Use *> &Ops) const { … }
llvm/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp