//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // The Cortex-A15 processor employs a tracking scheme in its register renaming // in order to process each instruction's micro-ops speculatively and // out-of-order with appropriate forwarding. The ARM architecture allows VFP // instructions to read and write 32-bit S-registers. Each S-register // corresponds to one half (upper or lower) of an overlaid 64-bit D-register. // // There are several instruction patterns which can be used to provide this // capability which can provide higher performance than other, potentially more // direct patterns, specifically around when one micro-op reads a D-register // operand that has recently been written as one or more S-register results. // // This file defines a pre-regalloc pass which looks for SPR producers which // are going to be used by a DPR (or QPR) consumers and creates the more // optimized access pattern. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <map> #include <set> usingnamespacellvm; #define DEBUG_TYPE … namespace { struct A15SDOptimizer : public MachineFunctionPass { … }; char A15SDOptimizer::ID = …; } // end anonymous namespace // Returns true if this is a use of a SPR register. bool A15SDOptimizer::usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC) { … } unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { … } // Get the subreg type that is most likely to be coalesced // for an SPR register that will be used in VDUP32d pseudo. unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { … } // MI is known to be dead. Figure out what instructions // are also made dead by this and mark them for removal. void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { … } // Creates the more optimized patterns and generally does all the code // transformations in this pass. unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { … } // Return true if this MachineInstr inserts a scalar (SPR) value into // a D or Q register. bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { … } // Looks through full copies to get the instruction that defines the input // operand for MI. MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { … } // Look through full copies and PHIs to get the set of non-copy MachineInstrs // that can produce MI. void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, SmallVectorImpl<MachineInstr*> &Outs) { … } // Return the DPR virtual registers that are read by this machine instruction // (if any). SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { … } // Creates a DPR register from an SPR one by using a VDUP. unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Reg, unsigned Lane, bool QPR) { … } // Creates a SPR register from a DPR by copying the value in lane 0. unsigned A15SDOptimizer::createExtractSubreg( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned DReg, unsigned Lane, const TargetRegisterClass *TRC) { … } // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. unsigned A15SDOptimizer::createRegSequence( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Reg1, unsigned Reg2) { … } // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) // and merges them into one DPR register. unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1) { … } unsigned A15SDOptimizer::createInsertSubreg( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) { … } unsigned A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL) { … } // This function inserts instructions in order to optimize interactions between // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all // lanes, and the using VEXT instructions to recompose the result. unsigned A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { … } bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { … } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { … } FunctionPass *llvm::createA15SDOptimizerPass() { … }