//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Subclass of MipsTargetLowering specialized for mips32/64. // //===----------------------------------------------------------------------===// #include "MipsSEISelLowering.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsMips.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include <algorithm> #include <cassert> #include <cstdint> #include <iterator> #include <utility> usingnamespacellvm; #define DEBUG_TYPE … static cl::opt<bool> UseMipsTailCalls("mips-tail-calls", cl::Hidden, cl::desc("MIPS: permit tail calls."), cl::init(false)); static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " "stores to their single precision " "counterparts")); MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI) : … { … } const MipsTargetLowering * llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI) { … } const TargetRegisterClass * MipsSETargetLowering::getRepRegClassFor(MVT VT) const { … } // Enable MSA support for the given integer type and Register class. void MipsSETargetLowering:: addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { … } // Enable MSA support for the given floating-point type and Register class. void MipsSETargetLowering:: addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { … } SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { … } bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { … } SDValue MipsSETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { … } // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT // // Performs the following transformations: // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its // sign/zero-extension is completely overwritten by the new one performed by // the ISD::AND. // - Removes redundant zero extensions performed by an ISD::AND. static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { … } // Determine if the specified node is a constant vector splat. // // Returns true and sets Imm if: // * N is a ISD::BUILD_VECTOR representing a constant splat // // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The // differences are that it assumes the MSA has already been checked and the // arbitrary requirement for a maximum of 32-bit integers isn't applied (and // must not be in order for binsri.d to be selectable). static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { … } // Test whether the given node is an all-ones build_vector. static bool isVectorAllOnes(SDValue N) { … } // Test whether N is the bitwise inverse of OfNode. static bool isBitwiseInverse(SDValue N, SDValue OfNode) { … } // Perform combines where ISD::OR is the root node. // // Performs the following transformations: // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit // vector type. static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { … } static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, SelectionDAG &DAG, const MipsSubtarget &Subtarget) { … } static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { … } static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget) { … } static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget &Subtarget) { … } static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { … } // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold // constant splats into MipsISD::SHRA_DSP for DSPr2. // // Performs the following transformations: // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its // sign/zero-extension is completely overwritten by the new one performed by // the ISD::SRA and ISD::SHL nodes. // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL // sequence. // // See performDSPShiftCombine for more information about the transformation // used for DSPr2. static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { … } static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { … } static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { … } static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { … } static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { … } static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget) { … } SDValue MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { … } MachineBasicBlock * MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { … } bool MipsSETargetLowering::isEligibleForTailCallOptimization( const CCState &CCInfo, unsigned NextStackOffset, const MipsFunctionInfo &FI) const { … } void MipsSETargetLowering:: getOpndList(SmallVectorImpl<SDValue> &Ops, std::deque<std::pair<unsigned, SDValue>> &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { … } SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { … } SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { … } SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { … } SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { … } static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { … } static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { … } // This function expands mips intrinsic nodes which have 64-bit input operands // or output values. // // out64 = intrinsic-node in64 // => // lo = copy (extract-element (in64, 0)) // hi = copy (extract-element (in64, 1)) // mips-specific-node // v0 = copy lo // v1 = copy hi // out64 = merge-values (v0, v1) // static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { … } // Lower an MSA copy intrinsic into the specified SelectionDAG node static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { … } static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { … } static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned = false) { … } static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, bool BigEndian, SelectionDAG &DAG) { … } static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, SDValue Imm, bool BigEndian) { … } static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { … } static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { … } static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { … } SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { … } static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget) { … } SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { … } static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget) { … } SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { … } // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. // // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We // choose to sign-extend but we could have equally chosen zero-extend. The // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT // result into this node later (possibly changing it to a zero-extend in the // process). SDValue MipsSETargetLowering:: lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { … } static bool isConstantOrUndef(const SDValue Op) { … } static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { … } // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the // backend. // // Lowers according to the following rules: // - Constant splats are legal as-is as long as the SplatBitSize is a power of // 2 less than or equal to 64 and the value fits into a signed 10-bit // immediate // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize // is a power of 2 less than or equal to 64 and the value does not fit into a // signed 10-bit immediate // - Non-constant splats are legal as-is. // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. // - All others are illegal and must be expanded. SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { … } // Lower VECTOR_SHUFFLE into SHF (if possible). // // SHF splits the vector into blocks of four elements, then shuffles these // elements according to a <4 x i2> constant (encoded as an integer immediate). // // It is therefore possible to lower into SHF when the mask takes the form: // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> // When undef's appear they are treated as if they were whatever value is // necessary in order to fit the above forms. // // For example: // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, // <8 x i32> <i32 3, i32 2, i32 1, i32 0, // i32 7, i32 6, i32 5, i32 4> // is lowered to: // (SHF_H $w0, $w1, 27) // where the 27 comes from: // 3 + (2 << 2) + (1 << 4) + (0 << 6) static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } /// Determine whether a range fits a regular pattern of values. /// This function accounts for the possibility of jumping over the End iterator. template <typename ValType> static bool fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl<ValType>::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride) { … } // Determine whether VECTOR_SHUFFLE is a SPLATI. // // It is a SPLATI when the mask is: // <x, x, x, ...> // where x is any valid index. // // When undef's appear in the mask they are treated as if they were whatever // value is necessary in order to fit the above form. static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into ILVEV (if possible). // // ILVEV interleaves the even elements from each vector. // // It is possible to lower into ILVEV when the mask consists of two of the // following forms interleaved: // <0, 2, 4, ...> // <n, n+2, n+4, ...> // where n is the number of elements in the vector. // For example: // <0, 0, 2, 2, 4, 4, ...> // <0, n, 2, n+2, 4, n+4, ...> // // When undef's appear in the mask they are treated as if they were whatever // value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into ILVOD (if possible). // // ILVOD interleaves the odd elements from each vector. // // It is possible to lower into ILVOD when the mask consists of two of the // following forms interleaved: // <1, 3, 5, ...> // <n+1, n+3, n+5, ...> // where n is the number of elements in the vector. // For example: // <1, 1, 3, 3, 5, 5, ...> // <1, n+1, 3, n+3, 5, n+5, ...> // // When undef's appear in the mask they are treated as if they were whatever // value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into ILVR (if possible). // // ILVR interleaves consecutive elements from the right (lowest-indexed) half of // each vector. // // It is possible to lower into ILVR when the mask consists of two of the // following forms interleaved: // <0, 1, 2, ...> // <n, n+1, n+2, ...> // where n is the number of elements in the vector. // For example: // <0, 0, 1, 1, 2, 2, ...> // <0, n, 1, n+1, 2, n+2, ...> // // When undef's appear in the mask they are treated as if they were whatever // value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into ILVL (if possible). // // ILVL interleaves consecutive elements from the left (highest-indexed) half // of each vector. // // It is possible to lower into ILVL when the mask consists of two of the // following forms interleaved: // <x, x+1, x+2, ...> // <n+x, n+x+1, n+x+2, ...> // where n is the number of elements in the vector and x is half n. // For example: // <x, x, x+1, x+1, x+2, x+2, ...> // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> // // When undef's appear in the mask they are treated as if they were whatever // value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into PCKEV (if possible). // // PCKEV copies the even elements of each vector into the result vector. // // It is possible to lower into PCKEV when the mask consists of two of the // following forms concatenated: // <0, 2, 4, ...> // <n, n+2, n+4, ...> // where n is the number of elements in the vector. // For example: // <0, 2, 4, ..., 0, 2, 4, ...> // <0, 2, 4, ..., n, n+2, n+4, ...> // // When undef's appear in the mask they are treated as if they were whatever // value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into PCKOD (if possible). // // PCKOD copies the odd elements of each vector into the result vector. // // It is possible to lower into PCKOD when the mask consists of two of the // following forms concatenated: // <1, 3, 5, ...> // <n+1, n+3, n+5, ...> // where n is the number of elements in the vector. // For example: // <1, 3, 5, ..., 1, 3, 5, ...> // <1, 3, 5, ..., n+1, n+3, n+5, ...> // // When undef's appear in the mask they are treated as if they were whatever // value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector<int, 16> Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into VSHF. // // This mostly consists of converting the shuffle indices in Indices into a // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, // if the type is v8i16 and all the indices are less than 8 then the second // operand is unused and can be replaced with anything. We choose to replace it // with the used operand since this reduces the number of instructions overall. static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, const SmallVector<int, 16> &Indices, SelectionDAG &DAG) { … } // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the // indices in the shuffle. SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { … } MachineBasicBlock * MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, MachineBasicBlock *BB) const { … } MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { … } // Emit the COPY_FW pseudo instruction. // // copy_fw_pseudo $fd, $ws, n // => // copy_u_w $rt, $ws, $n // mtc1 $rt, $fd // // When n is zero, the equivalent operation can be performed with (potentially) // zero instructions due to register overlaps. This optimization is never valid // for lane 1 because it would require FR=0 mode which isn't supported by MSA. MachineBasicBlock * MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the COPY_FD pseudo instruction. // // copy_fd_pseudo $fd, $ws, n // => // splati.d $wt, $ws, $n // copy $fd, $wt:sub_64 // // When n is zero, the equivalent operation can be performed with (potentially) // zero instructions due to register overlaps. This optimization is always // valid because FR=1 mode which is the only supported mode in MSA. MachineBasicBlock * MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the INSERT_FW pseudo instruction. // // insert_fw_pseudo $wd, $wd_in, $n, $fs // => // subreg_to_reg $wt:sub_lo, $fs // insve_w $wd[$n], $wd_in, $wt[0] MachineBasicBlock * MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the INSERT_FD pseudo instruction. // // insert_fd_pseudo $wd, $fs, n // => // subreg_to_reg $wt:sub_64, $fs // insve_d $wd[$n], $wd_in, $wt[0] MachineBasicBlock * MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. // // For integer: // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) // => // (SLL $lanetmp1, $lane, <log2size) // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) // (NEG $lanetmp2, $lanetmp1) // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) // // For floating point: // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) // => // (SUBREG_TO_REG $wt, $fs, <subreg>) // (SLL $lanetmp1, $lane, <log2size) // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) // (NEG $lanetmp2, $lanetmp1) // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, bool IsFP) const { … } // Emit the FILL_FW pseudo instruction. // // fill_fw_pseudo $wd, $fs // => // implicit_def $wt1 // insert_subreg $wt2:subreg_lo, $wt1, $fs // splati.w $wd, $wt2[0] MachineBasicBlock * MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the FILL_FD pseudo instruction. // // fill_fd_pseudo $wd, $fs // => // implicit_def $wt1 // insert_subreg $wt2:subreg_64, $wt1, $fs // splati.d $wd, $wt2[0] MachineBasicBlock * MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA // register. // // STF16 MSA128F16:$wd, mem_simm10:$addr // => // copy_u.h $rtemp,$wd[0] // sh $rtemp, $addr // // Safety: We can't use st.h & co as they would over write the memory after // the destination. It would require half floats be allocated 16 bytes(!) of // space. MachineBasicBlock * MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. // // LD_F16 MSA128F16:$wd, mem_simm10:$addr // => // lh $rtemp, $addr // fill.h $wd, $rtemp // // Safety: We can't use ld.h & co as they over-read from the source. // Additionally, if the address is not modulo 16, 2 cases can occur: // a) Segmentation fault as the load instruction reads from a memory page // memory it's not supposed to. // b) The load crosses an implementation specific boundary, requiring OS // intervention. MachineBasicBlock * MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the FPROUND_PSEUDO instruction. // // Round an FGR64Opnd, FGR32Opnd to an f16. // // Safety: Cycle the operand through the GPRs so the result always ends up // the correct MSA register. // // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register // (which they can be, as the MSA registers are defined to alias the // FPU's 64 bit and 32 bit registers) the result can be accessed using // the correct register class. That requires operands be tie-able across // register classes which have a sub/super register class relationship. // // For FPG32Opnd: // // FPROUND MSA128F16:$wd, FGR32Opnd:$fs // => // mfc1 $rtemp, $fs // fill.w $rtemp, $wtemp // fexdo.w $wd, $wtemp, $wtemp // // For FPG64Opnd on mips32r2+: // // FPROUND MSA128F16:$wd, FGR64Opnd:$fs // => // mfc1 $rtemp, $fs // fill.w $rtemp, $wtemp // mfhc1 $rtemp2, $fs // insert.w $wtemp[1], $rtemp2 // insert.w $wtemp[3], $rtemp2 // fexdo.w $wtemp2, $wtemp, $wtemp // fexdo.h $wd, $temp2, $temp2 // // For FGR64Opnd on mips64r2+: // // FPROUND MSA128F16:$wd, FGR64Opnd:$fs // => // dmfc1 $rtemp, $fs // fill.d $rtemp, $wtemp // fexdo.w $wtemp2, $wtemp, $wtemp // fexdo.h $wd, $wtemp2, $wtemp2 // // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the // undef bits are "just right" and the exception enable bits are // set. By using fill.w to replicate $fs into all elements over // insert.w for one element, we avoid that potiential case. If // fexdo.[hw] causes an exception in, the exception is valid and it // occurs for all elements. MachineBasicBlock * MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, MachineBasicBlock *BB, bool IsFGR64) const { … } // Emit the FPEXTEND_PSEUDO instruction. // // Expand an f16 to either a FGR32Opnd or FGR64Opnd. // // Safety: Cycle the result through the GPRs so the result always ends up // the correct floating point register. // // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register // (which they can be, as the MSA registers are defined to alias the // FPU's 64 bit and 32 bit registers) the result can be accessed using // the correct register class. That requires operands be tie-able across // register classes which have a sub/super register class relationship. I // haven't checked. // // For FGR32Opnd: // // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws // => // fexupr.w $wtemp, $ws // copy_s.w $rtemp, $ws[0] // mtc1 $rtemp, $fd // // For FGR64Opnd on Mips64: // // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws // => // fexupr.w $wtemp, $ws // fexupr.d $wtemp2, $wtemp // copy_s.d $rtemp, $wtemp2s[0] // dmtc1 $rtemp, $fd // // For FGR64Opnd on Mips32: // // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws // => // fexupr.w $wtemp, $ws // fexupr.d $wtemp2, $wtemp // copy_s.w $rtemp, $wtemp2[0] // mtc1 $rtemp, $ftemp // copy_s.w $rtemp2, $wtemp2[1] // $fd = mthc1 $rtemp2, $ftemp MachineBasicBlock * MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, MachineBasicBlock *BB, bool IsFGR64) const { … } // Emit the FEXP2_W_1 pseudo instructions. // // fexp2_w_1_pseudo $wd, $wt // => // ldi.w $ws, 1 // fexp2.w $wd, $ws, $wt MachineBasicBlock * MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, MachineBasicBlock *BB) const { … } // Emit the FEXP2_D_1 pseudo instructions. // // fexp2_d_1_pseudo $wd, $wt // => // ldi.d $ws, 1 // fexp2.d $wd, $ws, $wt MachineBasicBlock * MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, MachineBasicBlock *BB) const { … }