//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass performs below peephole optimizations on MIR level. // // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri // MOVi64imm + ANDXrr ==> ANDXri + ANDXri // // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi // MOVi64imm + ADDXrr ==> ANDXri + ANDXri // // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi // MOVi64imm + SUBXrr ==> SUBXri + SUBXri // // The mov pseudo instruction could be expanded to multiple mov instructions // later. In this case, we could try to split the constant operand of mov // instruction into two immediates which can be directly encoded into // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of // multiple `mov` + `and/add/sub` instructions. // // 4. Remove redundant ORRWrs which is generated by zero-extend. // // %3:gpr32 = ORRWrs $wzr, %2, 0 // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 // // If AArch64's 32-bit form of instruction defines the source operand of // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source // operand are set to zero. // // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx // // 6. %intermediate:gpr32 = COPY %src:fpr128 // %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32 // ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0 // // In cases where a source FPR is copied to a GPR in order to be copied // to a destination FPR, we can directly copy the values between the FPRs, // eliminating the use of the Integer unit. When we match a pattern of // INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR // source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr // instructions. // // 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high // 64-bits. For example, // // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr // %2:fpr64 = MOVID 0 // %4:fpr128 = IMPLICIT_DEF // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub // %6:fpr128 = IMPLICIT_DEF // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0 // ==> // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr // %6:fpr128 = IMPLICIT_DEF // %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub // // 8. Remove redundant CSELs that select between identical registers, by // replacing them with unconditional moves. // // 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit // LSR or LSL alias of UBFM. // //===----------------------------------------------------------------------===// #include "AArch64ExpandImm.h" #include "AArch64InstrInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" usingnamespacellvm; #define DEBUG_TYPE … namespace { struct AArch64MIPeepholeOpt : public MachineFunctionPass { … }; char AArch64MIPeepholeOpt::ID = …; } // end anonymous namespace INITIALIZE_PASS(…) template <typename T> static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { … } template <typename T> bool AArch64MIPeepholeOpt::visitAND( unsigned Opc, MachineInstr &MI) { … } bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { … } bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) { … } bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { … } template <typename T> static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { … } template <typename T> bool AArch64MIPeepholeOpt::visitADDSUB( unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { … } template <typename T> bool AArch64MIPeepholeOpt::visitADDSSUBS( OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { … } // Checks if the corresponding MOV immediate instruction is applicable for // this peephole optimization. bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, MachineInstr *&SubregToRegMI) { … } template <typename T> bool AArch64MIPeepholeOpt::splitTwoPartImm( MachineInstr &MI, SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { … } bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) { … } // All instructions that set a FPR64 will implicitly zero the top bits of the // register. static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI) { … } bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) { … } bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { … } bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { … } // Across a basic-block we might have in i32 extract from a value that only // operates on upper bits (for example a sxtw). We can replace the COPY with a // new version skipping the sxtw. bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) { … } bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { … } FunctionPass *llvm::createAArch64MIPeepholeOptPass() { … }