AMDGPUISelDAGToDAG.cpp | Explore in Territory

//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// Defines an instruction selector for the AMDGPU target.
//
//===----------------------------------------------------------------------===//

#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600RegisterInfo.h"
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"

#ifdef EXPENSIVE_CHECKS
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#endif

#define DEBUG_TYPE …

usingnamespacellvm;

//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//

namespace {
static SDValue stripBitcast(SDValue Val) { … }

// Figure out if this is really an extract of the high 16-bits of a dword.
static bool isExtractHiElt(SDValue In, SDValue &Out) { … }

// Look through operations that obscure just looking at the low 16-bits of the
// same register.
static SDValue stripExtractLoElt(SDValue In) { … }

} // end anonymous namespace

INITIALIZE_PASS_BEGIN(…)

/// This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
                                        CodeGenOptLevel OptLevel) { … }

AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM,
                                       CodeGenOptLevel OptLevel)
    : … { … }

bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { … }

bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const { … }

bool AMDGPUDAGToDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) { … }

void AMDGPUDAGToDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { … }

bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { … }

void AMDGPUDAGToDAGISel::PreprocessISelDAG() { … }

bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { … }

/// Determine the register class for \p OpNo
/// \returns The register class of the virtual register that will be used for
/// the given operand number \OpNo or NULL if the register class cannot be
/// determined.
const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
                                                          unsigned OpNo) const { … }

SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
                                         SDValue Glue) const { … }

SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { … }

SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { … }

MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
                                                  EVT VT) const { … }

void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { … }

void AMDGPUDAGToDAGISel::Select(SDNode *N) { … }

bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { … }

bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
                                             unsigned ShAmtBits) const { … }

static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
                                          SDValue &N0, SDValue &N1) { … }

bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
                                                    SDValue &RHS) const { … }

StringRef AMDGPUDAGToDAGISelLegacy::getPassName() const { … }

AMDGPUISelDAGToDAGPass::AMDGPUISelDAGToDAGPass(TargetMachine &TM)
    : … { … }

PreservedAnalyses
AMDGPUISelDAGToDAGPass::run(MachineFunction &MF,
                            MachineFunctionAnalysisManager &MFAM) { … }

//===----------------------------------------------------------------------===//
// Complex Patterns
//===----------------------------------------------------------------------===//

bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
                                            SDValue &Offset) { … }

bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
                                            SDValue &Offset) { … }

SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
                                                       const SDLoc &DL) const { … }

// FIXME: Should only handle uaddo_carry/usubo_carry
void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { … }

// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { … }

// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { … }

// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) { … }

bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
                                              SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
                                          unsigned Offset1,
                                          unsigned Size) const { … }

// Return whether the operation has NoUnsignedWrap property.
static bool isNoUnsignedWrap(SDValue Addr) { … }

// Check that the base address of flat scratch load/store in the form of `base +
// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
// requirement). We always treat the first operand as the base address here.
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const { … }

// Check address value in SGPR/VGPR are legal for flat scratch in the form
// of: SGPR + VGPR.
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const { … }

// Check address value in SGPR/VGPR are legal for flat scratch in the form
// of: SGPR + VGPR + Imm.
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const { … }

// TODO: If offset is too big, put low 16-bit into offset.
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
                                                   SDValue &Offset0,
                                                   SDValue &Offset1) const { … }

bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
                                                    SDValue &Offset0,
                                                    SDValue &Offset1) const { … }

bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
                                            SDValue &Offset0, SDValue &Offset1,
                                            unsigned Size) const { … }

bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
                                     SDValue &SOffset, SDValue &Offset,
                                     SDValue &Offen, SDValue &Idxen,
                                     SDValue &Addr64) const { … }

bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
                                           SDValue &VAddr, SDValue &SOffset,
                                           SDValue &Offset) const { … }

std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { … }

bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
                                                 SDValue Addr, SDValue &Rsrc,
                                                 SDValue &VAddr, SDValue &SOffset,
                                                 SDValue &ImmOffset) const { … }

static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) { … }

bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
                                                  SDValue Addr,
                                                  SDValue &SRsrc,
                                                  SDValue &SOffset,
                                                  SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                           SDValue &SOffset, SDValue &Offset
                                           ) const { … }

bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
                                          SDValue &SOffset) const { … }

// Find a load or store from corresponding pattern root.
// Roots may be build_vector, bitconvert or their combinations.
static MemSDNode* findMemSDNode(SDNode *N) { … }

bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
                                              SDValue &VAddr, SDValue &Offset,
                                              uint64_t FlatVariant) const { … }

bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
                                          SDValue &VAddr,
                                          SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
                                            SDValue &VAddr,
                                            SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
                                             SDValue &VAddr,
                                             SDValue &Offset) const { … }

// If this matches zero_extend i32:x, return x
static SDValue matchZExtFromI32(SDValue Op) { … }

// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
                                           SDValue Addr,
                                           SDValue &SAddr,
                                           SDValue &VOffset,
                                           SDValue &Offset) const { … }

static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) { … }

// Match (32-bit SGPR base) + sext(imm offset)
bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
                                            SDValue &SAddr,
                                            SDValue &Offset) const { … }

// Check whether the flat scratch SVS swizzle bug affects this access.
bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
    SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const { … }

bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
                                             SDValue &VAddr, SDValue &SAddr,
                                             SDValue &Offset) const  { … }

// For unbuffered smem loads, it is illegal for the Immediate Offset to be
// negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
// Handle the case where the Immediate Offset + SOffset is negative.
bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
                                                     bool Imm32Only,
                                                     bool IsBuffer,
                                                     int64_t ImmOffset) const { … }

// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
// not null) offset. If Imm32Only is true, match only 32-bit immediate
// offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
                                          SDValue *SOffset, SDValue *Offset,
                                          bool Imm32Only, bool IsBuffer,
                                          bool HasSOffset,
                                          int64_t ImmOffset) const { … }

SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { … }

// Match a base and an immediate (if Offset is not null) or an SGPR (if
// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
// true, match only 32-bit immediate offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
                                              SDValue *SOffset, SDValue *Offset,
                                              bool Imm32Only, bool IsBuffer,
                                              bool HasSOffset,
                                              int64_t ImmOffset) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
                                    SDValue *SOffset, SDValue *Offset,
                                    bool Imm32Only) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
                                       SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
                                         SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
                                        SDValue &SOffset) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
                                           SDValue &SOffset,
                                           SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
                                               SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
                                                 SDValue &Offset) const { … }

bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
                                            SDValue &Base,
                                            SDValue &Offset) const { … }

SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
                                     SDValue Val, uint32_t Offset,
                                     uint32_t Width) { … }

void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { … }

bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { … }

static SDValue combineBallotPattern(SDValue VCMP, bool &Negate) { … }

void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { … }

// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) { … }

static unsigned gwsIntrinToOpcode(unsigned IntrID) { … }

void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { … }

void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(SDNode *N) { … }

void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(SDNode *N) { … }

bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
                                            unsigned &Mods,
                                            bool IsCanonicalizing,
                                            bool AllowAbs) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
                                        SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
    SDValue In, SDValue &Src, SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
                                         SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { … }

bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
                                               SDValue &SrcMods,
                                               bool OpSel) const { … }

bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
                                           SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
                                             SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
                                         SDValue &SrcMods, SDValue &Clamp,
                                         SDValue &Omod) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
                                          SDValue &SrcMods, SDValue &Clamp,
                                          SDValue &Omod) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
                                         SDValue &Clamp, SDValue &Omod) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
                                         SDValue &SrcMods, bool IsDOT) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
                                            SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const { … }

bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
                                                  SDValue &Src) const { … }

static MachineSDNode *buildRegSequence32(SmallVectorImpl<SDValue> &Elts,
                                         llvm::SelectionDAG *CurDAG,
                                         const SDLoc &DL) { … }

static MachineSDNode *buildRegSequence16(SmallVectorImpl<SDValue> &Elts,
                                         llvm::SelectionDAG *CurDAG,
                                         const SDLoc &DL) { … }

static MachineSDNode *buildRegSequence(SmallVectorImpl<SDValue> &Elts,
                                       llvm::SelectionDAG *CurDAG,
                                       const SDLoc &DL, unsigned ElementSize) { … }

static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,
                                 SmallVectorImpl<SDValue> &Elts, SDValue &Src,
                                 llvm::SelectionDAG *CurDAG, const SDLoc &DL,
                                 unsigned ElementSize) { … }

// Check all f16 elements for modifiers while looking through b32 and v2b16
// build vector, stop if element does not satisfy ModifierCheck.
static void
checkWMMAElementsModifiersF16(BuildVectorSDNode *BV,
                              std::function<bool(SDValue)> ModifierCheck) { … }

bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src,
                                              SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
                                                 SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
                                                 SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const { … }

bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src,
                                            SDValue &IndexKey) const { … }

bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,
                                             SDValue &IndexKey) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
                                         SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
                                             SDValue &SrcMods) const { … }

// The return value is not whether the match is possible (which it always is),
// but whether or not it a conversion is really used.
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
                                                   unsigned &Mods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
                                                  SDValue &SrcMods) const { … }

bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
                                               SDValue &SrcMods) const { … }

SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { … }

bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { … }

bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const { … }

void AMDGPUDAGToDAGISel::PostprocessISelDAG() { … }

AMDGPUDAGToDAGISelLegacy::AMDGPUDAGToDAGISelLegacy(TargetMachine &TM,
                                                   CodeGenOptLevel OptLevel)
    : … { … }

char AMDGPUDAGToDAGISelLegacy::ID = …;
llvm/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp