AArch64MIPeepholeOpt.cpp | Explore in Territory

//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass performs below peephole optimizations on MIR level.
//
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
//    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
//
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
//    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
//
// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
//    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
//
//    The mov pseudo instruction could be expanded to multiple mov instructions
//    later. In this case, we could try to split the constant  operand of mov
//    instruction into two immediates which can be directly encoded into
//    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
//    multiple `mov` + `and/add/sub` instructions.
//
// 4. Remove redundant ORRWrs which is generated by zero-extend.
//
//    %3:gpr32 = ORRWrs $wzr, %2, 0
//    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
//
//    If AArch64's 32-bit form of instruction defines the source operand of
//    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
//    operand are set to zero.
//
// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
//     ==> %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
//
// 6. %intermediate:gpr32 = COPY %src:fpr128
//    %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
//     ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
//
//    In cases where a source FPR is copied to a GPR in order to be copied
//    to a destination FPR, we can directly copy the values between the FPRs,
//    eliminating the use of the Integer unit. When we match a pattern of
//    INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
//    source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
//    instructions.
//
// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
//    64-bits. For example,
//
//   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
//   %2:fpr64 = MOVID 0
//   %4:fpr128 = IMPLICIT_DEF
//   %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
//   %6:fpr128 = IMPLICIT_DEF
//   %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
//   %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
//   ==>
//   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
//   %6:fpr128 = IMPLICIT_DEF
//   %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
//
// 8. Remove redundant CSELs that select between identical registers, by
//    replacing them with unconditional moves.
//
// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
//    LSR or LSL alias of UBFM.
//
//===----------------------------------------------------------------------===//

#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"

usingnamespacellvm;

#define DEBUG_TYPE …

namespace {

struct AArch64MIPeepholeOpt : public MachineFunctionPass { … };

char AArch64MIPeepholeOpt::ID = …;

} // end anonymous namespace

INITIALIZE_PASS(…)

template <typename T>
static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { … }

template <typename T>
bool AArch64MIPeepholeOpt::visitAND(
    unsigned Opc, MachineInstr &MI) { … }

bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { … }

bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) { … }

bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { … }

template <typename T>
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { … }

template <typename T>
bool AArch64MIPeepholeOpt::visitADDSUB(
    unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { … }

template <typename T>
bool AArch64MIPeepholeOpt::visitADDSSUBS(
    OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { … }

// Checks if the corresponding MOV immediate instruction is applicable for
// this peephole optimization.
bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
                                            MachineInstr *&MovMI,
                                            MachineInstr *&SubregToRegMI) { … }

template <typename T>
bool AArch64MIPeepholeOpt::splitTwoPartImm(
    MachineInstr &MI,
    SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { … }

bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) { … }

// All instructions that set a FPR64 will implicitly zero the top bits of the
// register.
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
                                        MachineRegisterInfo *MRI) { … }

bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) { … }

bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { … }

bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { … }

// Across a basic-block we might have in i32 extract from a value that only
// operates on upper bits (for example a sxtw). We can replace the COPY with a
// new version skipping the sxtw.
bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) { … }

bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { … }

FunctionPass *llvm::createAArch64MIPeepholeOptPass() { … }
llvm/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp