SIFoldOperands.cpp | Explore in Territory

//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
//

#include "SIFoldOperands.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOperand.h"

#define DEBUG_TYPE …
usingnamespacellvm;

namespace {

struct FoldCandidate { … };

class SIFoldOperandsImpl { … };

class SIFoldOperandsLegacy : public MachineFunctionPass { … };

} // End anonymous namespace.

INITIALIZE_PASS(…)

char SIFoldOperandsLegacy::ID = …;

char &llvm::SIFoldOperandsLegacyID = …;

static const TargetRegisterClass *getRegOpRC(const MachineRegisterInfo &MRI,
                                             const TargetRegisterInfo &TRI,
                                             const MachineOperand &MO) { … }

// Map multiply-accumulate opcode to corresponding multiply-add opcode if any.
static unsigned macToMad(unsigned Opc) { … }

// TODO: Add heuristic that the frame index might not fit in the addressing mode
// immediate offset to avoid materializing in loops.
bool SIFoldOperandsImpl::frameIndexMayFold(
    const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) const { … }

FunctionPass *llvm::createSIFoldOperandsLegacyPass() { … }

bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold) const { … }

bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const { … }

bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const { … }

static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
                              const MachineInstr *MI) { … }

static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
                                MachineInstr *MI, unsigned OpNo,
                                MachineOperand *FoldOp, bool Commuted = false,
                                int ShrinkOp = -1) { … }

bool SIFoldOperandsImpl::tryAddToFoldList(
    SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo,
    MachineOperand *OpToFold) const { … }

bool SIFoldOperandsImpl::isUseSafeToFold(const MachineInstr &MI,
                                         const MachineOperand &UseMO) const { … }

// Find a def of the UseReg, check if it is a reg_sequence and find initializers
// for each subreg, tracking it to foldable inline immediate if possible.
// Returns true on success.
bool SIFoldOperandsImpl::getRegSeqInit(
    SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
    Register UseReg, uint8_t OpTy) const { … }

bool SIFoldOperandsImpl::tryToFoldACImm(
    const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
    SmallVectorImpl<FoldCandidate> &FoldList) const { … }

void SIFoldOperandsImpl::foldOperand(
    MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx,
    SmallVectorImpl<FoldCandidate> &FoldList,
    SmallVectorImpl<MachineInstr *> &CopiesToReplace) const { … }

static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
                                  uint32_t LHS, uint32_t RHS) { … }

static unsigned getMovOpc(bool IsScalar) { … }

static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { … }

MachineOperand *
SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const { … }

// Try to simplify operations with a constant that may appear after instruction
// selection.
// TODO: See if a frame index with a fixed offset can fold.
bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { … }

// Try to fold an instruction into a simpler one
bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { … }

bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const { … }

bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
                                         MachineOperand &OpToFold) const { … }

bool SIFoldOperandsImpl::tryFoldFoldableCopy(
    MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const { … }

// Clamp patterns are canonically selected to v_max_* instructions, so only
// handle them.
const MachineOperand *
SIFoldOperandsImpl::isClamp(const MachineInstr &MI) const { … }

// FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) { … }

static int getOModValue(unsigned Opc, int64_t Val) { … }

// FIXME: Does this really not support denormals with f16?
// FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
// handled, so will anything other than that break?
std::pair<const MachineOperand *, int>
SIFoldOperandsImpl::isOMod(const MachineInstr &MI) const { … }

// FIXME: Does this need to check IEEE bit on function?
bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) { … }

// Try to fold a reg_sequence with vgpr output and agpr inputs into an
// instruction which can take an agpr. So far that means a store.
bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) { … }

/// Checks whether \p Copy is a AGPR -> VGPR copy. Returns `true` on success and
/// stores the AGPR register in \p OutReg and the subreg in \p OutSubReg
static bool isAGPRCopy(const SIRegisterInfo &TRI,
                       const MachineRegisterInfo &MRI, const MachineInstr &Copy,
                       Register &OutReg, unsigned &OutSubReg) { … }

// Try to hoist an AGPR to VGPR copy across a PHI.
// This should allow folding of an AGPR into a consumer which may support it.
//
// Example 1: LCSSA PHI
//      loop:
//        %1:vreg = COPY %0:areg
//      exit:
//        %2:vreg = PHI %1:vreg, %loop
//  =>
//      loop:
//      exit:
//        %1:areg = PHI %0:areg, %loop
//        %2:vreg = COPY %1:areg
//
// Example 2: PHI with multiple incoming values:
//      entry:
//        %1:vreg = GLOBAL_LOAD(..)
//      loop:
//        %2:vreg = PHI %1:vreg, %entry, %5:vreg, %loop
//        %3:areg = COPY %2:vreg
//        %4:areg = (instr using %3:areg)
//        %5:vreg = COPY %4:areg
//  =>
//      entry:
//        %1:vreg = GLOBAL_LOAD(..)
//        %2:areg = COPY %1:vreg
//      loop:
//        %3:areg = PHI %2:areg, %entry, %X:areg,
//        %4:areg = (instr using %3:areg)
bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &PHI) { … }

// Attempt to convert VGPR load to an AGPR load.
bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &MI) { … }

// tryFoldPhiAGPR will aggressively try to create AGPR PHIs.
// For GFX90A and later, this is pretty much always a good thing, but for GFX908
// there's cases where it can create a lot more AGPR-AGPR copies, which are
// expensive on this architecture due to the lack of V_ACCVGPR_MOV.
//
// This function looks at all AGPR PHIs in a basic block and collects their
// operands. Then, it checks for register that are used more than once across
// all PHIs and caches them in a VGPR. This prevents ExpandPostRAPseudo from
// having to create one VGPR temporary per use, which can get very messy if
// these PHIs come from a broken-up large PHI (e.g. 32 AGPR phis, one per vector
// element).
//
// Example
//      a:
//        %in:agpr_256 = COPY %foo:vgpr_256
//      c:
//        %x:agpr_32 = ..
//      b:
//        %0:areg = PHI %in.sub0:agpr_32, %a, %x, %c
//        %1:areg = PHI %in.sub0:agpr_32, %a, %y, %c
//        %2:areg = PHI %in.sub0:agpr_32, %a, %z, %c
//  =>
//      a:
//        %in:agpr_256 = COPY %foo:vgpr_256
//        %tmp:vgpr_32 = V_ACCVGPR_READ_B32_e64 %in.sub0:agpr_32
//        %tmp_agpr:agpr_32 = COPY %tmp
//      c:
//        %x:agpr_32 = ..
//      b:
//        %0:areg = PHI %tmp_agpr, %a, %x, %c
//        %1:areg = PHI %tmp_agpr, %a, %y, %c
//        %2:areg = PHI %tmp_agpr, %a, %z, %c
bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) { … }

bool SIFoldOperandsImpl::run(MachineFunction &MF) { … }

PreservedAnalyses SIFoldOperandsPass::run(MachineFunction &MF,
                                          MachineFunctionAnalysisManager &) { … }
llvm/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp