MipsSEISelLowering.cpp | Explore in Territory

//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Subclass of MipsTargetLowering specialized for mips32/64.
//
//===----------------------------------------------------------------------===//

#include "MipsSEISelLowering.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsMips.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>

usingnamespacellvm;

#define DEBUG_TYPE …

static cl::opt<bool>
UseMipsTailCalls("mips-tail-calls", cl::Hidden,
                    cl::desc("MIPS: permit tail calls."), cl::init(false));

static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
                                   cl::desc("Expand double precision loads and "
                                            "stores to their single precision "
                                            "counterparts"));

MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
                                           const MipsSubtarget &STI)
    : … { … }

const MipsTargetLowering *
llvm::createMipsSETargetLowering(const MipsTargetMachine &TM,
                                 const MipsSubtarget &STI) { … }

const TargetRegisterClass *
MipsSETargetLowering::getRepRegClassFor(MVT VT) const { … }

// Enable MSA support for the given integer type and Register class.
void MipsSETargetLowering::
addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { … }

// Enable MSA support for the given floating-point type and Register class.
void MipsSETargetLowering::
addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { … }

SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { … }

bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
    EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { … }

SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
                                             SelectionDAG &DAG) const { … }

// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
//
// Performs the following transformations:
// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
//   sign/zero-extension is completely overwritten by the new one performed by
//   the ISD::AND.
// - Removes redundant zero extensions performed by an ISD::AND.
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSubtarget &Subtarget) { … }

// Determine if the specified node is a constant vector splat.
//
// Returns true and sets Imm if:
// * N is a ISD::BUILD_VECTOR representing a constant splat
//
// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
// differences are that it assumes the MSA has already been checked and the
// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
// must not be in order for binsri.d to be selectable).
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { … }

// Test whether the given node is an all-ones build_vector.
static bool isVectorAllOnes(SDValue N) { … }

// Test whether N is the bitwise inverse of OfNode.
static bool isBitwiseInverse(SDValue N, SDValue OfNode) { … }

// Perform combines where ISD::OR is the root node.
//
// Performs the following transformations:
// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
//   where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
//   vector type.
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
                                TargetLowering::DAGCombinerInfo &DCI,
                                const MipsSubtarget &Subtarget) { … }

static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
                                               SelectionDAG &DAG,
                                               const MipsSubtarget &Subtarget) { … }

static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
                            EVT ShiftTy, SelectionDAG &DAG) { … }

static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
                                 const TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSETargetLowering *TL,
                                 const MipsSubtarget &Subtarget) { … }

static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
                                      SelectionDAG &DAG,
                                      const MipsSubtarget &Subtarget) { … }

static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSubtarget &Subtarget) { … }

// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
// constant splats into MipsISD::SHRA_DSP for DSPr2.
//
// Performs the following transformations:
// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
//   sign/zero-extension is completely overwritten by the new one performed by
//   the ISD::SRA and ISD::SHL nodes.
// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
//   sequence.
//
// See performDSPShiftCombine for more information about the transformation
// used for DSPr2.
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSubtarget &Subtarget) { … }


static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSubtarget &Subtarget) { … }

static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { … }

static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { … }

static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { … }

static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
                                 const MipsSubtarget &Subtarget) { … }

SDValue
MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { … }

MachineBasicBlock *
MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                  MachineBasicBlock *BB) const { … }

bool MipsSETargetLowering::isEligibleForTailCallOptimization(
    const CCState &CCInfo, unsigned NextStackOffset,
    const MipsFunctionInfo &FI) const { … }

void MipsSETargetLowering::
getOpndList(SmallVectorImpl<SDValue> &Ops,
            std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
            bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
            bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
            SDValue Chain) const { … }

SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { … }

SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { … }

SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
                                           SelectionDAG &DAG) const { … }

SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
                                          bool HasLo, bool HasHi,
                                          SelectionDAG &DAG) const { … }

static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { … }

static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { … }

// This function expands mips intrinsic nodes which have 64-bit input operands
// or output values.
//
// out64 = intrinsic-node in64
// =>
// lo = copy (extract-element (in64, 0))
// hi = copy (extract-element (in64, 1))
// mips-specific-node
// v0 = copy lo
// v1 = copy hi
// out64 = merge-values (v0, v1)
//
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { … }

// Lower an MSA copy intrinsic into the specified SelectionDAG node
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { … }

static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { … }

static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
                                bool IsSigned = false) { … }

static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
                                   bool BigEndian, SelectionDAG &DAG) { … }

static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
                                        unsigned Opc, SDValue Imm,
                                        bool BigEndian) { … }

static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { … }

static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { … }

static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { … }

SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                      SelectionDAG &DAG) const { … }

static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
                                const MipsSubtarget &Subtarget) { … }

SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
                                                     SelectionDAG &DAG) const { … }

static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
                                 const MipsSubtarget &Subtarget) { … }

SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
                                                  SelectionDAG &DAG) const { … }

// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
//
// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
// choose to sign-extend but we could have equally chosen zero-extend. The
// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
// result into this node later (possibly changing it to a zero-extend in the
// process).
SDValue MipsSETargetLowering::
lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { … }

static bool isConstantOrUndef(const SDValue Op) { … }

static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { … }

// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
// backend.
//
// Lowers according to the following rules:
// - Constant splats are legal as-is as long as the SplatBitSize is a power of
//   2 less than or equal to 64 and the value fits into a signed 10-bit
//   immediate
// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
//   is a power of 2 less than or equal to 64 and the value does not fit into a
//   signed 10-bit immediate
// - Non-constant splats are legal as-is.
// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
// - All others are illegal and must be expanded.
SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                                SelectionDAG &DAG) const { … }

// Lower VECTOR_SHUFFLE into SHF (if possible).
//
// SHF splits the vector into blocks of four elements, then shuffles these
// elements according to a <4 x i2> constant (encoded as an integer immediate).
//
// It is therefore possible to lower into SHF when the mask takes the form:
//   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
// When undef's appear they are treated as if they were whatever value is
// necessary in order to fit the above forms.
//
// For example:
//   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
//                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
//                                 i32 7, i32 6, i32 5, i32 4>
// is lowered to:
//   (SHF_H $w0, $w1, 27)
// where the 27 comes from:
//   3 + (2 << 2) + (1 << 4) + (0 << 6)
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
                                       SmallVector<int, 16> Indices,
                                       SelectionDAG &DAG) { … }

/// Determine whether a range fits a regular pattern of values.
/// This function accounts for the possibility of jumping over the End iterator.
template <typename ValType>
static bool
fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
                   unsigned CheckStride,
                   typename SmallVectorImpl<ValType>::const_iterator End,
                   ValType ExpectedIndex, unsigned ExpectedIndexStride) { … }

// Determine whether VECTOR_SHUFFLE is a SPLATI.
//
// It is a SPLATI when the mask is:
//   <x, x, x, ...>
// where x is any valid index.
//
// When undef's appear in the mask they are treated as if they were whatever
// value is necessary in order to fit the above form.
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy,
                                    SmallVector<int, 16> Indices,
                                    SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into ILVEV (if possible).
//
// ILVEV interleaves the even elements from each vector.
//
// It is possible to lower into ILVEV when the mask consists of two of the
// following forms interleaved:
//   <0, 2, 4, ...>
//   <n, n+2, n+4, ...>
// where n is the number of elements in the vector.
// For example:
//   <0, 0, 2, 2, 4, 4, ...>
//   <0, n, 2, n+2, 4, n+4, ...>
//
// When undef's appear in the mask they are treated as if they were whatever
// value is necessary in order to fit the above forms.
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
                                         SmallVector<int, 16> Indices,
                                         SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into ILVOD (if possible).
//
// ILVOD interleaves the odd elements from each vector.
//
// It is possible to lower into ILVOD when the mask consists of two of the
// following forms interleaved:
//   <1, 3, 5, ...>
//   <n+1, n+3, n+5, ...>
// where n is the number of elements in the vector.
// For example:
//   <1, 1, 3, 3, 5, 5, ...>
//   <1, n+1, 3, n+3, 5, n+5, ...>
//
// When undef's appear in the mask they are treated as if they were whatever
// value is necessary in order to fit the above forms.
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
                                         SmallVector<int, 16> Indices,
                                         SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into ILVR (if possible).
//
// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
// each vector.
//
// It is possible to lower into ILVR when the mask consists of two of the
// following forms interleaved:
//   <0, 1, 2, ...>
//   <n, n+1, n+2, ...>
// where n is the number of elements in the vector.
// For example:
//   <0, 0, 1, 1, 2, 2, ...>
//   <0, n, 1, n+1, 2, n+2, ...>
//
// When undef's appear in the mask they are treated as if they were whatever
// value is necessary in order to fit the above forms.
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
                                        SmallVector<int, 16> Indices,
                                        SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into ILVL (if possible).
//
// ILVL interleaves consecutive elements from the left (highest-indexed) half
// of each vector.
//
// It is possible to lower into ILVL when the mask consists of two of the
// following forms interleaved:
//   <x, x+1, x+2, ...>
//   <n+x, n+x+1, n+x+2, ...>
// where n is the number of elements in the vector and x is half n.
// For example:
//   <x, x, x+1, x+1, x+2, x+2, ...>
//   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
//
// When undef's appear in the mask they are treated as if they were whatever
// value is necessary in order to fit the above forms.
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
                                        SmallVector<int, 16> Indices,
                                        SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into PCKEV (if possible).
//
// PCKEV copies the even elements of each vector into the result vector.
//
// It is possible to lower into PCKEV when the mask consists of two of the
// following forms concatenated:
//   <0, 2, 4, ...>
//   <n, n+2, n+4, ...>
// where n is the number of elements in the vector.
// For example:
//   <0, 2, 4, ..., 0, 2, 4, ...>
//   <0, 2, 4, ..., n, n+2, n+4, ...>
//
// When undef's appear in the mask they are treated as if they were whatever
// value is necessary in order to fit the above forms.
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
                                         SmallVector<int, 16> Indices,
                                         SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into PCKOD (if possible).
//
// PCKOD copies the odd elements of each vector into the result vector.
//
// It is possible to lower into PCKOD when the mask consists of two of the
// following forms concatenated:
//   <1, 3, 5, ...>
//   <n+1, n+3, n+5, ...>
// where n is the number of elements in the vector.
// For example:
//   <1, 3, 5, ..., 1, 3, 5, ...>
//   <1, 3, 5, ..., n+1, n+3, n+5, ...>
//
// When undef's appear in the mask they are treated as if they were whatever
// value is necessary in order to fit the above forms.
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
                                         SmallVector<int, 16> Indices,
                                         SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into VSHF.
//
// This mostly consists of converting the shuffle indices in Indices into a
// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
// if the type is v8i16 and all the indices are less than 8 then the second
// operand is unused and can be replaced with anything. We choose to replace it
// with the used operand since this reduces the number of instructions overall.
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
                                        const SmallVector<int, 16> &Indices,
                                        SelectionDAG &DAG) { … }

// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
// indices in the shuffle.
SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
                                                  SelectionDAG &DAG) const { … }

MachineBasicBlock *
MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
                                   MachineBasicBlock *BB) const { … }

MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
    MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { … }

// Emit the COPY_FW pseudo instruction.
//
// copy_fw_pseudo $fd, $ws, n
// =>
// copy_u_w $rt, $ws, $n
// mtc1     $rt, $fd
//
// When n is zero, the equivalent operation can be performed with (potentially)
// zero instructions due to register overlaps. This optimization is never valid
// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
MachineBasicBlock *
MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
                                  MachineBasicBlock *BB) const { … }

// Emit the COPY_FD pseudo instruction.
//
// copy_fd_pseudo $fd, $ws, n
// =>
// splati.d $wt, $ws, $n
// copy $fd, $wt:sub_64
//
// When n is zero, the equivalent operation can be performed with (potentially)
// zero instructions due to register overlaps. This optimization is always
// valid because FR=1 mode which is the only supported mode in MSA.
MachineBasicBlock *
MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
                                  MachineBasicBlock *BB) const { … }

// Emit the INSERT_FW pseudo instruction.
//
// insert_fw_pseudo $wd, $wd_in, $n, $fs
// =>
// subreg_to_reg $wt:sub_lo, $fs
// insve_w $wd[$n], $wd_in, $wt[0]
MachineBasicBlock *
MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
                                    MachineBasicBlock *BB) const { … }

// Emit the INSERT_FD pseudo instruction.
//
// insert_fd_pseudo $wd, $fs, n
// =>
// subreg_to_reg $wt:sub_64, $fs
// insve_d $wd[$n], $wd_in, $wt[0]
MachineBasicBlock *
MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
                                    MachineBasicBlock *BB) const { … }

// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
//
// For integer:
// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
// =>
// (SLL $lanetmp1, $lane, <log2size)
// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
// (NEG $lanetmp2, $lanetmp1)
// (SLD_B $wd, $wdtmp2, $wdtmp2,  $lanetmp2)
//
// For floating point:
// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
// =>
// (SUBREG_TO_REG $wt, $fs, <subreg>)
// (SLL $lanetmp1, $lane, <log2size)
// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
// (NEG $lanetmp2, $lanetmp1)
// (SLD_B $wd, $wdtmp2, $wdtmp2,  $lanetmp2)
MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
    MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
    bool IsFP) const { … }

// Emit the FILL_FW pseudo instruction.
//
// fill_fw_pseudo $wd, $fs
// =>
// implicit_def $wt1
// insert_subreg $wt2:subreg_lo, $wt1, $fs
// splati.w $wd, $wt2[0]
MachineBasicBlock *
MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
                                  MachineBasicBlock *BB) const { … }

// Emit the FILL_FD pseudo instruction.
//
// fill_fd_pseudo $wd, $fs
// =>
// implicit_def $wt1
// insert_subreg $wt2:subreg_64, $wt1, $fs
// splati.d $wd, $wt2[0]
MachineBasicBlock *
MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
                                  MachineBasicBlock *BB) const { … }

// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
// register.
//
// STF16 MSA128F16:$wd, mem_simm10:$addr
// =>
//  copy_u.h $rtemp,$wd[0]
//  sh $rtemp, $addr
//
// Safety: We can't use st.h & co as they would over write the memory after
// the destination. It would require half floats be allocated 16 bytes(!) of
// space.
MachineBasicBlock *
MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
                                       MachineBasicBlock *BB) const { … }

// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
//
// LD_F16 MSA128F16:$wd, mem_simm10:$addr
// =>
//  lh $rtemp, $addr
//  fill.h $wd, $rtemp
//
// Safety: We can't use ld.h & co as they over-read from the source.
// Additionally, if the address is not modulo 16, 2 cases can occur:
//  a) Segmentation fault as the load instruction reads from a memory page
//     memory it's not supposed to.
//  b) The load crosses an implementation specific boundary, requiring OS
//     intervention.
MachineBasicBlock *
MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
                                       MachineBasicBlock *BB) const { … }

// Emit the FPROUND_PSEUDO instruction.
//
// Round an FGR64Opnd, FGR32Opnd to an f16.
//
// Safety: Cycle the operand through the GPRs so the result always ends up
//         the correct MSA register.
//
// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
//        / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
//        (which they can be, as the MSA registers are defined to alias the
//        FPU's 64 bit and 32 bit registers) the result can be accessed using
//        the correct register class. That requires operands be tie-able across
//        register classes which have a sub/super register class relationship.
//
// For FPG32Opnd:
//
// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
// =>
//  mfc1 $rtemp, $fs
//  fill.w $rtemp, $wtemp
//  fexdo.w $wd, $wtemp, $wtemp
//
// For FPG64Opnd on mips32r2+:
//
// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
// =>
//  mfc1 $rtemp, $fs
//  fill.w $rtemp, $wtemp
//  mfhc1 $rtemp2, $fs
//  insert.w $wtemp[1], $rtemp2
//  insert.w $wtemp[3], $rtemp2
//  fexdo.w $wtemp2, $wtemp, $wtemp
//  fexdo.h $wd, $temp2, $temp2
//
// For FGR64Opnd on mips64r2+:
//
// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
// =>
//  dmfc1 $rtemp, $fs
//  fill.d $rtemp, $wtemp
//  fexdo.w $wtemp2, $wtemp, $wtemp
//  fexdo.h $wd, $wtemp2, $wtemp2
//
// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
//              undef bits are "just right" and the exception enable bits are
//              set. By using fill.w to replicate $fs into all elements over
//              insert.w for one element, we avoid that potiential case. If
//              fexdo.[hw] causes an exception in, the exception is valid and it
//              occurs for all elements.
MachineBasicBlock *
MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
                                         MachineBasicBlock *BB,
                                         bool IsFGR64) const { … }

// Emit the FPEXTEND_PSEUDO instruction.
//
// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
//
// Safety: Cycle the result through the GPRs so the result always ends up
//         the correct floating point register.
//
// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
//        / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
//        (which they can be, as the MSA registers are defined to alias the
//        FPU's 64 bit and 32 bit registers) the result can be accessed using
//        the correct register class. That requires operands be tie-able across
//        register classes which have a sub/super register class relationship. I
//        haven't checked.
//
// For FGR32Opnd:
//
// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
// =>
//  fexupr.w $wtemp, $ws
//  copy_s.w $rtemp, $ws[0]
//  mtc1 $rtemp, $fd
//
// For FGR64Opnd on Mips64:
//
// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
// =>
//  fexupr.w $wtemp, $ws
//  fexupr.d $wtemp2, $wtemp
//  copy_s.d $rtemp, $wtemp2s[0]
//  dmtc1 $rtemp, $fd
//
// For FGR64Opnd on Mips32:
//
// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
// =>
//  fexupr.w $wtemp, $ws
//  fexupr.d $wtemp2, $wtemp
//  copy_s.w $rtemp, $wtemp2[0]
//  mtc1 $rtemp, $ftemp
//  copy_s.w $rtemp2, $wtemp2[1]
//  $fd = mthc1 $rtemp2, $ftemp
MachineBasicBlock *
MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
                                          MachineBasicBlock *BB,
                                          bool IsFGR64) const { … }

// Emit the FEXP2_W_1 pseudo instructions.
//
// fexp2_w_1_pseudo $wd, $wt
// =>
// ldi.w $ws, 1
// fexp2.w $wd, $ws, $wt
MachineBasicBlock *
MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
                                    MachineBasicBlock *BB) const { … }

// Emit the FEXP2_D_1 pseudo instructions.
//
// fexp2_d_1_pseudo $wd, $wt
// =>
// ldi.d $ws, 1
// fexp2.d $wd, $ws, $wt
MachineBasicBlock *
MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
                                    MachineBasicBlock *BB) const { … }
llvm/llvm/lib/Target/Mips/MipsSEISelLowering.cpp