//===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // For best-case performance on Cortex-A57, we should try to use a balanced // mix of odd and even D-registers when performing a critical sequence of // independent, non-quadword FP/ASIMD floating-point multiply or // multiply-accumulate operations. // // This pass attempts to detect situations where the register allocation may // adversely affect this load balancing and to change the registers used so as // to better utilize the CPU. // // Ideally we'd just take each multiply or multiply-accumulate in turn and // allocate it alternating even or odd registers. However, multiply-accumulates // are most efficiently performed in the same functional unit as their // accumulation operand. Therefore this pass tries to find maximal sequences // ("Chains") of multiply-accumulates linked via their accumulation operand, // and assign them all the same "color" (oddness/evenness). // // This optimization affects S-register and D-register floating point // multiplies and FMADD/FMAs, as well as vector (floating point only) muls and // FMADD/FMA. Q register instructions (and 128-bit vector instructions) are // not affected. //===----------------------------------------------------------------------===// #include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" usingnamespacellvm; #define DEBUG_TYPE … // Enforce the algorithm to use the scavenged register even when the original // destination register is the correct color. Used for testing. static cl::opt<bool> TransformAll("aarch64-a57-fp-load-balancing-force-all", cl::desc("Always modify dest registers regardless of color"), cl::init(false), cl::Hidden); // Never use the balance information obtained from chains - return a specific // color always. Used for testing. static cl::opt<unsigned> OverrideBalance("aarch64-a57-fp-load-balancing-override", cl::desc("Ignore balance information, always return " "(1: Even, 2: Odd)."), cl::init(0), cl::Hidden); //===----------------------------------------------------------------------===// // Helper functions // Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs? static bool isMul(MachineInstr *MI) { … } // Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs? static bool isMla(MachineInstr *MI) { … } //===----------------------------------------------------------------------===// namespace { /// A "color", which is either even or odd. Yes, these aren't really colors /// but the algorithm is conceptually doing two-color graph coloring. enum class Color { … }; #ifndef NDEBUG static const char *ColorNames[2] = { "Even", "Odd" }; #endif class Chain; class AArch64A57FPLoadBalancing : public MachineFunctionPass { … }; } char AArch64A57FPLoadBalancing::ID = …; INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE, "AArch64 A57 FP Load-Balancing", false, false) INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE, "AArch64 A57 FP Load-Balancing", false, false) namespace { /// A Chain is a sequence of instructions that are linked together by /// an accumulation operand. For example: /// /// fmul def d0, ? /// fmla def d1, ?, ?, killed d0 /// fmla def d2, ?, ?, killed d1 /// /// There may be other instructions interleaved in the sequence that /// do not belong to the chain. These other instructions must not use /// the "chain" register at any point. /// /// We currently only support chains where the "chain" operand is killed /// at each link in the chain for simplicity. /// A chain has three important instructions - Start, Last and Kill. /// * The start instruction is the first instruction in the chain. /// * Last is the final instruction in the chain. /// * Kill may or may not be defined. If defined, Kill is the instruction /// where the outgoing value of the Last instruction is killed. /// This information is important as if we know the outgoing value is /// killed with no intervening uses, we can safely change its register. /// /// Without a kill instruction, we must assume the outgoing value escapes /// beyond our model and either must not change its register or must /// create a fixup FMOV to keep the old register value consistent. /// class Chain { … }; } // end anonymous namespace //===----------------------------------------------------------------------===// bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) { … } bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) { … } Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L) { … } bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB, int &Parity) { … } int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB) { … } bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, MachineBasicBlock &MBB) { … } void AArch64A57FPLoadBalancing::scanInstruction( MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains, std::vector<std::unique_ptr<Chain>> &AllChains) { … } void AArch64A57FPLoadBalancing:: maybeKillChain(MachineOperand &MO, unsigned Idx, std::map<unsigned, Chain*> &ActiveChains) { … } Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) { … } // Factory function used by AArch64TargetMachine to add the pass to the passmanager. FunctionPass *llvm::createAArch64A57FPLoadBalancing() { … }