AMDGPULegalizerInfo.cpp | Explore in Territory

//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the Machinelegalizer class for
/// AMDGPU.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//

#include "AMDGPULegalizerInfo.h"

#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"

#define DEBUG_TYPE …

usingnamespacellvm;
usingnamespaceLegalizeActions;
usingnamespaceLegalizeMutations;
usingnamespaceLegalityPredicates;
usingnamespaceMIPatternMatch;

// Hack until load/store selection patterns support any tuple of legal types.
static cl::opt<bool> EnableNewLegality(
  "amdgpu-global-isel-new-legality",
  cl::desc("Use GlobalISel desired legality, rather than try to use"
           "rules compatible with selection patterns"),
  cl::init(false),
  cl::ReallyHidden);

static constexpr unsigned MaxRegisterSize = …;

// Round the number of elements to the next power of two elements
static LLT getPow2VectorType(LLT Ty) { … }

// Round the number of bits to the next power of two bits
static LLT getPow2ScalarType(LLT Ty) { … }

/// \returns true if this is an odd sized vector which should widen by adding an
/// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
/// excludes s1 vectors, which should always be scalarized.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { … }

static LegalityPredicate sizeIsMultipleOf32(unsigned TypeIdx) { … }

static LegalityPredicate isWideVec16(unsigned TypeIdx) { … }

static LegalizeMutation oneMoreElement(unsigned TypeIdx) { … }

static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) { … }

// Increase the number of vector elements to reach the next multiple of 32-bit
// type.
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { … }

// Increase the number of vector elements to reach the next legal RegClass.
static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) { … }

static LLT getBufferRsrcScalarType(const LLT Ty) { … }

static LLT getBufferRsrcRegisterType(const LLT Ty) { … }

static LLT getBitcastRegisterType(const LLT Ty) { … }

static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) { … }

static LegalizeMutation bitcastToVectorElement32(unsigned TypeIdx) { … }

static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) { … }

static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) { … }

static LegalityPredicate numElementsNotEven(unsigned TypeIdx) { … }

static bool isRegisterSize(unsigned Size) { … }

static bool isRegisterVectorElementType(LLT EltTy) { … }

static bool isRegisterVectorType(LLT Ty) { … }

// TODO: replace all uses of isRegisterType with isRegisterClassType
static bool isRegisterType(LLT Ty) { … }

// Any combination of 32 or 64-bit elements up the maximum register size, and
// multiples of v2s16.
static LegalityPredicate isRegisterType(unsigned TypeIdx) { … }

// RegisterType that doesn't have a corresponding RegClass.
// TODO: Once `isRegisterType` is replaced with `isRegisterClassType` this
// should be removed.
static LegalityPredicate isIllegalRegisterType(unsigned TypeIdx) { … }

static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) { … }

static const LLT S1 = …;
static const LLT S8 = …;
static const LLT S16 = …;
static const LLT S32 = …;
static const LLT F32 = …;
static const LLT S64 = …;
static const LLT F64 = …;
static const LLT S96 = …;
static const LLT S128 = …;
static const LLT S160 = …;
static const LLT S192 = …;
static const LLT S224 = …;
static const LLT S256 = …;
static const LLT S512 = …;
static const LLT S1024 = …;
static const LLT MaxScalar = …;

static const LLT V2S8 = …;
static const LLT V2S16 = …;
static const LLT V4S16 = …;
static const LLT V6S16 = …;
static const LLT V8S16 = …;
static const LLT V10S16 = …;
static const LLT V12S16 = …;
static const LLT V16S16 = …;

static const LLT V2F16 = …;
static const LLT V2BF16 = …; // FIXME

static const LLT V2S32 = …;
static const LLT V3S32 = …;
static const LLT V4S32 = …;
static const LLT V5S32 = …;
static const LLT V6S32 = …;
static const LLT V7S32 = …;
static const LLT V8S32 = …;
static const LLT V9S32 = …;
static const LLT V10S32 = …;
static const LLT V11S32 = …;
static const LLT V12S32 = …;
static const LLT V16S32 = …;
static const LLT V32S32 = …;

static const LLT V2S64 = …;
static const LLT V3S64 = …;
static const LLT V4S64 = …;
static const LLT V5S64 = …;
static const LLT V6S64 = …;
static const LLT V7S64 = …;
static const LLT V8S64 = …;
static const LLT V16S64 = …;

static const LLT V2S128 = …;
static const LLT V4S128 = …;

static std::initializer_list<LLT> AllScalarTypes = …;

static std::initializer_list<LLT> AllS16Vectors{ … };

static std::initializer_list<LLT> AllS32Vectors = …;

static std::initializer_list<LLT> AllS64Vectors = …;

// Checks whether a type is in the list of legal register types.
static bool isRegisterClassType(LLT Ty) { … }

static LegalityPredicate isRegisterClassType(unsigned TypeIdx) { … }

// If we have a truncating store or an extending load with a data size larger
// than 32-bits, we need to reduce to a 32-bit type.
static LegalityPredicate isWideScalarExtLoadTruncStore(unsigned TypeIdx) { … }

// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
// handle some operations by just promoting the register during
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS,
                                    bool IsLoad, bool IsAtomic) { … }

static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
                                 const LegalityQuery &Query) { … }

// The newer buffer intrinsic forms take their resource arguments as
// pointers in address space 8, aka s128 values. However, in order to not break
// SelectionDAG, the underlying operations have to continue to take v4i32
// arguments. Therefore, we convert resource pointers - or vectors of them
// to integer values here.
static bool hasBufferRsrcWorkaround(const LLT Ty) { … }

// The current selector can't handle <6 x s16>, <8 x s16>, s96, s128 etc, so
// workaround this. Eventually it should ignore the type for loads and only care
// about the size. Return true in cases where we will workaround this for now by
// bitcasting.
static bool loadStoreBitcastWorkaround(const LLT Ty) { … }

static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query) { … }

/// Return true if a load or store of the type should be lowered with a bitcast
/// to a different type.
static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
                                       const LLT MemTy) { … }

/// Return true if we should legalize a load by widening an odd sized memory
/// access up to the alignment. Note this case when the memory access itself
/// changes, not the size of the result register.
static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
                            uint64_t AlignInBits, unsigned AddrSpace,
                            unsigned Opcode) { … }

static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query,
                            unsigned Opcode) { … }

/// Mutates IR (typicaly a load instruction) to use a <4 x s32> as the initial
/// type of the operand `idx` and then to transform it to a `p8` via bitcasts
/// and inttoptr. In addition, handle vectors of p8. Returns the new type.
static LLT castBufferRsrcFromV4I32(MachineInstr &MI, MachineIRBuilder &B,
                                   MachineRegisterInfo &MRI, unsigned Idx) { … }

/// Cast a buffer resource (an address space 8 pointer) into a 4xi32, which is
/// the form in which the value must be in order to be passed to the low-level
/// representations used for MUBUF/MTBUF intrinsics. This is a hack, which is
/// needed in order to account for the fact that we can't define a register
/// class for s128 without breaking SelectionDAG.
static Register castBufferRsrcToV4I32(Register Pointer, MachineIRBuilder &B) { … }

static void castBufferRsrcArgToV4I32(MachineInstr &MI, MachineIRBuilder &B,
                                     unsigned Idx) { … }

AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
                                         const GCNTargetMachine &TM)
  : … { … }

bool AMDGPULegalizerInfo::legalizeCustom(
    LegalizerHelper &Helper, MachineInstr &MI,
    LostDebugLocObserver &LocObserver) const { … }

Register AMDGPULegalizerInfo::getSegmentAperture(
  unsigned AS,
  MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

/// Return true if the value is a known valid address, such that a null check is
/// not necessary.
static bool isKnownNonNull(Register Val, MachineRegisterInfo &MRI,
                           const AMDGPUTargetMachine &TM, unsigned AddrSpace) { … }

bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFroundeven(MachineInstr &MI,
                                             MachineRegisterInfo &MRI,
                                             MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFceil(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFrem(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

static MachineInstrBuilder extractF64Exponent(Register Hi,
                                              MachineIRBuilder &B) { … }

bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeITOFP(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B, bool Signed) const { … }

// TODO: Copied from DAG implementation. Verify logic and document how this
// actually works.
bool AMDGPULegalizerInfo::legalizeFPTOI(MachineInstr &MI,
                                        MachineRegisterInfo &MRI,
                                        MachineIRBuilder &B,
                                        bool Signed) const { … }

bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
                                               MachineInstr &MI) const { … }

bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeSinCos(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
                                                  MachineIRBuilder &B,
                                                  const GlobalValue *GV,
                                                  int64_t Offset,
                                                  unsigned GAFlags) const { … }

// Emit a ABS32_LO / ABS32_HI relocation stub.
void AMDGPULegalizerInfo::buildAbsGlobalAddress(
    Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
    MachineRegisterInfo &MRI) const { … }

bool AMDGPULegalizerInfo::legalizeGlobalValue(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

static LLT widenToNextPowerOf2(LLT Ty) { … }

bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
                                       MachineInstr &MI) const { … }

bool AMDGPULegalizerInfo::legalizeStore(LegalizerHelper &Helper,
                                        MachineInstr &MI) const { … }

bool AMDGPULegalizerInfo::legalizeFMad(
  MachineInstr &MI, MachineRegisterInfo &MRI,
  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
  MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { … }

/// Return true if it's known that \p Src can never be an f32 denormal value.
static bool valueIsKnownNeverF32Denorm(const MachineRegisterInfo &MRI,
                                       Register Src) { … }

static bool allowApproxFunc(const MachineFunction &MF, unsigned Flags) { … }

static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
                                   unsigned Flags) { … }

std::pair<Register, Register>
AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src,
                                       unsigned Flags) const { … }

bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
                                        MachineIRBuilder &B) const { … }

static Register getMad(MachineIRBuilder &B, LLT Ty, Register X, Register Y,
                       Register Z, unsigned Flags) { … }

bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
                                             MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
                                             Register Src, bool IsLog10,
                                             unsigned Flags) const { … }

bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
                                        MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
                                             Register X, unsigned Flags) const { … }

bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
                                       MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
                                       MachineIRBuilder &B) const { … }

// Find a source register, ignoring any possible source modifiers.
static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) { … }

bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &B) const { … }

// Turn an illegal packed v2s16 build vector into bit operations.
// TODO: This should probably be a bitcast action in LegalizerHelper.
bool AMDGPULegalizerInfo::legalizeBuildVector(
  MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { … }

// Build a big integer multiply or multiply-add using MAD_64_32 instructions.
//
// Source and accumulation registers must all be 32-bits.
//
// TODO: When the multiply is uniform, we should produce a code sequence
// that is better suited to instruction selection on the SALU. Instead of
// the outer loop going over parts of the result, the outer loop should go
// over parts of one of the factors. This should result in instruction
// selection that makes full use of S_ADDC_U32 instructions.
void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper,
                                        MutableArrayRef<Register> Accum,
                                        ArrayRef<Register> Src0,
                                        ArrayRef<Register> Src1,
                                        bool UsePartialMad64_32,
                                        bool SeparateOddAlignedProducts) const { … }

// Custom narrowing of wide multiplies using wide multiply-add instructions.
//
// TODO: If the multiply is followed by an addition, we should attempt to
// integrate it to make better use of V_MAD_U64_U32's multiply-add capabilities.
bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper,
                                      MachineInstr &MI) const { … }

// Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to
// ctlz/cttz_zero_undef. This allows us to fix up the result for the zero input
// case with a single min instruction instead of a compare+select.
bool AMDGPULegalizerInfo::legalizeCTLZ_CTTZ(MachineInstr &MI,
                                            MachineRegisterInfo &MRI,
                                            MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI,
                                                  MachineRegisterInfo &MRI,
                                                  MachineIRBuilder &B) const { … }

// Check that this is a G_XOR x, -1
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) { … }

// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *
verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br,
                  MachineBasicBlock *&UncondBrTarget, bool &Negated) { … }

bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
                                         const ArgDescriptor *Arg,
                                         const TargetRegisterClass *ArgRC,
                                         LLT ArgTy) const { … }

bool AMDGPULegalizerInfo::loadInputValue(
    Register DstReg, MachineIRBuilder &B,
    AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { … }

bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
    AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { … }

static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI,
                                int64_t C) { … }

bool AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
    unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { … }

Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
                                                     int64_t Offset) const { … }

/// Legalize a value that's loaded from kernel arguments. This is only used by
/// legacy intrinsics.
bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI,
                                                      MachineIRBuilder &B,
                                                      uint64_t Offset,
                                                      Align Alignment) const { … }

bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
                                       MachineRegisterInfo &MRI,
                                       MachineIRBuilder &B) const { … }

void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B,
                                                        Register DstDivReg,
                                                        Register DstRemReg,
                                                        Register X,
                                                        Register Y) const { … }

// Build integer reciprocal sequence around V_RCP_IFLAG_F32
//
// Return lo, hi of result
//
// %cvt.lo = G_UITOFP Val.lo
// %cvt.hi = G_UITOFP Val.hi
// %mad = G_FMAD %cvt.hi, 2**32, %cvt.lo
// %rcp = G_AMDGPU_RCP_IFLAG %mad
// %mul1 = G_FMUL %rcp, 0x5f7ffffc
// %mul2 = G_FMUL %mul1, 2**(-32)
// %trunc = G_INTRINSIC_TRUNC %mul2
// %mad2 = G_FMAD %trunc, -(2**32), %mul1
// return {G_FPTOUI %mad2, G_FPTOUI %trunc}
static std::pair<Register, Register> emitReciprocalU64(MachineIRBuilder &B,
                                                       Register Val) { … }

void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B,
                                                        Register DstDivReg,
                                                        Register DstRemReg,
                                                        Register Numer,
                                                        Register Denom) const { … }

bool AMDGPULegalizerInfo::legalizeUnsignedDIV_REM(MachineInstr &MI,
                                                  MachineRegisterInfo &MRI,
                                                  MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeSignedDIV_REM(MachineInstr &MI,
                                                MachineRegisterInfo &MRI,
                                                MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
                                                 MachineRegisterInfo &MRI,
                                                 MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI,
                                                   MachineRegisterInfo &MRI,
                                                   MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &B) const { … }

static constexpr unsigned SPDenormModeBitField = …;

// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
// to enable denorm mode. When 'Enable' is false, disable denorm mode.
static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
                               const GCNSubtarget &ST,
                               SIModeRegisterDefaults Mode) { … }

bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
                                                 MachineRegisterInfo &MRI,
                                                 MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFSQRTF16(MachineInstr &MI,
                                           MachineRegisterInfo &MRI,
                                           MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFSQRTF32(MachineInstr &MI,
                                           MachineRegisterInfo &MRI,
                                           MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
                                           MachineRegisterInfo &MRI,
                                           MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
                                        MachineRegisterInfo &MRI,
                                        MachineIRBuilder &B) const { … }

// Expand llvm.amdgcn.rsq.clamp on targets that don't support the instruction.
// FIXME: Why do we handle this one but not other removed instructions?
//
// Reciprocal square root.  The clamp prevents infinite results, clamping
// infinities to max_float.  D.f = 1.0 / sqrt(S0.f), result clamped to
// +-max_float.
bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
                                                    MachineRegisterInfo &MRI,
                                                    MachineIRBuilder &B) const { … }

// TODO: Fix pointer type handling
bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
                                         MachineInstr &MI,
                                         Intrinsic::ID IID) const { … }

bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg,
                                            MachineRegisterInfo &MRI,
                                            MachineIRBuilder &B) const { … }

/// To create a buffer resource from a 64-bit pointer, mask off the upper 32
/// bits of the pointer and replace them with the stride argument, then
/// merge_values everything together. In the common case of a raw buffer (the
/// stride component is 0), we can just AND off the upper half.
bool AMDGPULegalizerInfo::legalizePointerAsRsrcIntrin(
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
                                                 MachineRegisterInfo &MRI,
                                                 MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::getLDSKernelId(Register DstReg,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeLDSKernelId(MachineInstr &MI,
                                              MachineRegisterInfo &MRI,
                                              MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
                                              MachineRegisterInfo &MRI,
                                              MachineIRBuilder &B,
                                              unsigned AddrSpace) const { … }

// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
// offset (the offset that is included in bounds checking and swizzling, to be
// split between the instruction's voffset and immoffset fields) and soffset
// (the offset that is excluded from bounds checking and swizzling, to go in
// the instruction's soffset field).  This function takes the first kind of
// offset and figures out how to split it between voffset and immoffset.
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
                                        Register OrigOffset) const { … }

/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
                                             MachineRegisterInfo &MRI,
                                             Register Reg,
                                             bool ImageStore) const { … }

Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
                                                 Register VData, LLT MemTy,
                                                 bool IsFormat) const { … }

bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
                                              LegalizerHelper &Helper,
                                              bool IsTyped,
                                              bool IsFormat) const { … }

static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
                            Register VIndex, Register VOffset, Register SOffset,
                            unsigned ImmOffset, unsigned Format,
                            unsigned AuxiliaryData, MachineMemOperand *MMO,
                            bool IsTyped, bool HasVIndex, MachineIRBuilder &B) { … }

bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
                                             LegalizerHelper &Helper,
                                             bool IsFormat,
                                             bool IsTyped) const { … }

static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) { … }

bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
                                               MachineIRBuilder &B,
                                               Intrinsic::ID IID) const { … }

/// Turn a set of s16 typed registers in \p AddrRegs into a dword sized
/// vector with s16 typed elements.
static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI,
                                      SmallVectorImpl<Register> &PackedAddrs,
                                      unsigned ArgOffset,
                                      const AMDGPU::ImageDimIntrinsicInfo *Intr,
                                      bool IsA16, bool IsG16) { … }

/// Convert from separate vaddr components to a single vector address register,
/// and replace the remaining operands with $noreg.
static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
                                     int DimIdx, int NumVAddrs) { … }

/// Rewrite image intrinsics to use register layouts expected by the subtarget.
///
/// Depending on the subtarget, load/store with 16-bit element data need to be
/// rewritten to use the low half of 32-bit registers, or directly use a packed
/// layout. 16-bit addresses should also sometimes be packed into 32-bit
/// registers.
///
/// We don't want to directly select image instructions just yet, but also want
/// to exposes all register repacking to the legalizer/combiners. We also don't
/// want a selected instruction entering RegBankSelect. In order to avoid
/// defining a multitude of intermediate image instructions, directly hack on
/// the intrinsic's arguments. In cases like a16 addresses, this requires
/// padding now unnecessary arguments with $noreg.
bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
    MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer,
    const AMDGPU::ImageDimIntrinsicInfo *Intr) const { … }

bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper,
                                              MachineInstr &MI) const { … }

bool AMDGPULegalizerInfo::legalizeSBufferPrefetch(LegalizerHelper &Helper,
                                                  MachineInstr &MI) const { … }

// TODO: Move to selection
bool AMDGPULegalizerInfo::legalizeTrap(MachineInstr &MI,
                                       MachineRegisterInfo &MRI,
                                       MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeTrapEndpgm(
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeTrapHsa(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeDebugTrap(MachineInstr &MI,
                                            MachineRegisterInfo &MRI,
                                            MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
                                               MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
                                            MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI,
                                         MachineIRBuilder &B) const { … }

static constexpr unsigned FPEnvModeBitField = …;

static constexpr unsigned FPEnvTrapBitField = …;

bool AMDGPULegalizerInfo::legalizeGetFPEnv(MachineInstr &MI,
                                           MachineRegisterInfo &MRI,
                                           MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeSetFPEnv(MachineInstr &MI,
                                           MachineRegisterInfo &MRI,
                                           MachineIRBuilder &B) const { … }

bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                            MachineInstr &MI) const { … }
llvm/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp