//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // When profitable, replace GPR targeting i64 instructions with their // AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined // as minimizing the number of cross-class register copies. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // TODO: Graph based predicate heuristics. // Walking the instruction list linearly will get many, perhaps most, of // the cases, but to do a truly thorough job of this, we need a more // wholistic approach. // // This optimization is very similar in spirit to the register allocator's // spill placement, only here we're determining where to place cross-class // register copies rather than spills. As such, a similar approach is // called for. // // We want to build up a set of graphs of all instructions which are candidates // for transformation along with instructions which generate their inputs and // consume their outputs. For each edge in the graph, we assign a weight // based on whether there is a copy required there (weight zero if not) and // the block frequency of the block containing the defining or using // instruction, whichever is less. Our optimization is then a graph problem // to minimize the total weight of all the graphs, then transform instructions // and add or remove copy instructions as called for to implement the // solution. //===----------------------------------------------------------------------===// #include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64RegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" usingnamespacellvm; #define DEBUG_TYPE … // Allow forcing all i64 operations with equivalent SIMD instructions to use // them. For stress-testing the transformation function. static cl::opt<bool> TransformAll("aarch64-simd-scalar-force-all", cl::desc("Force use of AdvSIMD scalar instructions everywhere"), cl::init(false), cl::Hidden); STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used"); STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted"); STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted"); #define AARCH64_ADVSIMD_NAME … namespace { class AArch64AdvSIMDScalar : public MachineFunctionPass { … }; char AArch64AdvSIMDScalar::ID = …; } // end anonymous namespace INITIALIZE_PASS(…) static bool isGPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { … } static bool isFPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { … } // getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64 // copy instruction. Return nullptr if the instruction is not a copy. static MachineOperand *getSrcFromCopy(MachineInstr *MI, const MachineRegisterInfo *MRI, unsigned &SubReg) { … } // getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent // that we're considering transforming to, return that AdvSIMD opcode. For all // others, return the original opcode. static unsigned getTransformOpcode(unsigned Opc) { … } static bool isTransformable(const MachineInstr &MI) { … } // isProfitableToTransform - Predicate function to determine whether an // instruction should be transformed to its equivalent AdvSIMD scalar // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. bool AArch64AdvSIMDScalar::isProfitableToTransform( const MachineInstr &MI) const { … } static MachineInstr *insertCopy(const TargetInstrInfo *TII, MachineInstr &MI, unsigned Dst, unsigned Src, bool IsKill) { … } // transformInstruction - Perform the transformation of an instruction // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs // to be the correct register class, minimizing cross-class copies. void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { … } // processMachineBasicBlock - Main optimzation loop. bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { … } // runOnMachineFunction - Pass entry point from PassManager. bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { … } // createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine // to add the pass to the PassManager. FunctionPass *llvm::createAArch64AdvSIMDScalar() { … }