//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Perform peephole optimizations on the machine code: // // - Optimize Extensions // // Optimization of sign / zero extension instructions. It may be extended to // handle other instructions with similar properties. // // On some targets, some instructions, e.g. X86 sign / zero extension, may // leave the source value in the lower part of the result. This optimization // will replace some uses of the pre-extension value with uses of the // sub-register of the results. // // - Optimize Comparisons // // Optimization of comparison instructions. For instance, in this code: // // sub r1, 1 // cmp r1, 0 // bz L1 // // If the "sub" instruction all ready sets (or could be modified to set) the // same flag that the "cmp" instruction sets and that "bz" uses, then we can // eliminate the "cmp" instruction. // // Another instance, in this code: // // sub r1, r3 | sub r1, imm // cmp r3, r1 or cmp r1, r3 | cmp r1, imm // bge L1 // // If the branch instruction can use flag from "sub", then we can replace // "sub" with "subs" and eliminate the "cmp" instruction. // // - Optimize Loads: // // Loads that can be folded into a later instruction. A load is foldable // if it loads to virtual registers and the virtual register defined has // a single use. // // - Optimize Copies and Bitcast (more generally, target specific copies): // // Rewrite copies and bitcasts to avoid cross register bank copies // when possible. // E.g., Consider the following example, where capital and lower // letters denote different register file: // b = copy A <-- cross-bank copy // C = copy b <-- cross-bank copy // => // b = copy A <-- cross-bank copy // C = copy A <-- same-bank copy // // E.g., for bitcast: // b = bitcast A <-- cross-bank copy // C = bitcast b <-- cross-bank copy // => // b = bitcast A <-- cross-bank copy // C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> #include <memory> #include <utility> usingnamespacellvm; RegSubRegPair; RegSubRegPairAndIdx; #define DEBUG_TYPE … // Optimize Extensions static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization")); static cl::opt<bool> DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); /// Specifiy whether or not the value tracking looks through /// complex instructions. When this is true, the value tracker /// bails on everything that is not a copy or a bitcast. static cl::opt<bool> DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); static cl::opt<bool> DisableNAPhysCopyOpt( "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization")); // Limit the number of PHI instructions to process // in PeepholeOptimizer::getNextSource. static cl::opt<unsigned> RewritePHILimit( "rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup")); // Limit the length of recurrence chain when evaluating the benefit of // commuting operands. static cl::opt<unsigned> MaxRecurrenceChain( "recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands")); STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { class ValueTrackerResult; class RecurrenceInstr; class PeepholeOptimizer : public MachineFunctionPass, private MachineFunction::Delegate { … }; /// Helper class to hold instructions that are inside recurrence cycles. /// The recurrence cycle is formulated around 1) a def operand and its /// tied use operand, or 2) a def operand and a use operand that is commutable /// with another use operand which is tied to the def operand. In the latter /// case, index of the tied use operand and the commutable use operand are /// maintained with CommutePair. class RecurrenceInstr { … }; /// Helper class to hold a reply for ValueTracker queries. /// Contains the returned sources for a given search and the instructions /// where the sources were tracked from. class ValueTrackerResult { … }; /// Helper class to track the possible sources of a value defined by /// a (chain of) copy related instructions. /// Given a definition (instruction and definition index), this class /// follows the use-def chain to find successive suitable sources. /// The given source can be used to rewrite the definition into /// def = COPY src. /// /// For instance, let us consider the following snippet: /// v0 = /// v2 = INSERT_SUBREG v1, v0, sub0 /// def = COPY v2.sub0 /// /// Using a ValueTracker for def = COPY v2.sub0 will give the following /// suitable sources: /// v2.sub0 and v0. /// Then, def can be rewritten into def = COPY v0. class ValueTracker { … }; } // end anonymous namespace char PeepholeOptimizer::ID = …; char &llvm::PeepholeOptimizerID = …; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) /// If instruction is a copy-like instruction, i.e. it reads a single register /// and writes a single register and it does not modify the source, and if the /// source value is preserved as a sub-register of the result, then replace all /// reachable uses of the source with the subreg of the result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, SmallPtrSetImpl<MachineInstr*> &LocalMIs) { … } /// If the instruction is a compare and the previous instruction it's comparing /// against already sets (or could be modified to set) the same flag as the /// compare, then we can remove the comparison and use the flag from the /// previous instruction. bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { … } /// Optimize a select instruction. bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { … } /// Check if a simpler conditional branch can be generated. bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) { … } /// Try to find the next source that share the same register file /// for the value defined by \p Reg and \p SubReg. /// When true is returned, the \p RewriteMap can be used by the client to /// retrieve all Def -> Use along the way up to the next source. Any found /// Use that is not itself a key for another entry, is the next source to /// use. During the search for the next source, multiple sources can be found /// given multiple incoming sources of a PHI instruction. In this case, we /// look in each PHI source for the next source; all found next sources must /// share the same register file as \p Reg and \p SubReg. The client should /// then be capable to rewrite all intermediate PHIs to get the next source. /// \return False if no alternative sources are available. True otherwise. bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap) { … } /// Insert a PHI instruction with incoming edges \p SrcRegs that are /// guaranteed to have the same register class. This is necessary whenever we /// successfully traverse a PHI instruction and find suitable sources coming /// from its edges. By inserting a new PHI, we provide a rewritten PHI def /// suitable to be used in a new COPY instruction. static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl<RegSubRegPair> &SrcRegs, MachineInstr &OrigPHI) { … } namespace { /// Interface to query instructions amenable to copy rewriting. class Rewriter { … }; /// Rewriter for COPY instructions. class CopyRewriter : public Rewriter { … }; /// Helper class to rewrite uncoalescable copy like instructions /// into new COPY (coalescable friendly) instructions. class UncoalescableRewriter : public Rewriter { … }; /// Specialized rewriter for INSERT_SUBREG instruction. class InsertSubregRewriter : public Rewriter { … }; /// Specialized rewriter for EXTRACT_SUBREG instruction. class ExtractSubregRewriter : public Rewriter { … }; /// Specialized rewriter for REG_SEQUENCE instruction. class RegSequenceRewriter : public Rewriter { … }; } // end anonymous namespace /// Get the appropriated Rewriter for \p MI. /// \return A pointer to a dynamically allocated Rewriter or nullptr if no /// rewriter works for \p MI. static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) { … } /// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find /// the new source to use for rewrite. If \p HandleMultipleSources is true and /// multiple sources for a given \p Def are found along the way, we found a /// PHI instructions that needs to be rewritten. /// TODO: HandleMultipleSources should be removed once we test PHI handling /// with coalescable copies. static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources = true) { … } /// Optimize generic copy instructions to avoid cross register bank copy. /// The optimization looks through a chain of copies and tries to find a source /// that has a compatible register class. /// Two register classes are considered to be compatible if they share the same /// register bank. /// New copies issued by this optimization are register allocator /// friendly. This optimization does not remove any copy as it may /// overconstrain the register allocator, but replaces some operands /// when possible. /// \pre isCoalescableCopy(*MI) is true. /// \return True, when \p MI has been rewritten. False otherwise. bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { … } /// Rewrite the source found through \p Def, by using the \p RewriteMap /// and create a new COPY instruction. More info about RewriteMap in /// PeepholeOptimizer::findNextSource. Right now this is only used to handle /// Uncoalescable copies, since they are copy like instructions that aren't /// recognized by the register allocator. MachineInstr & PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def, RewriteMapTy &RewriteMap) { … } /// Optimize copy-like instructions to create /// register coalescer friendly instruction. /// The optimization tries to kill-off the \p MI by looking /// through a chain of copies to find a source that has a compatible /// register class. /// If such a source is found, it replace \p MI by a generic COPY /// operation. /// \pre isUncoalescableCopy(*MI) is true. /// \return True, when \p MI has been optimized. In that case, \p MI has /// been removed from its parent. /// All COPY instructions created, are inserted in \p LocalMIs. bool PeepholeOptimizer::optimizeUncoalescableCopy( MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { … } /// Check whether MI is a candidate for folding into a later instruction. /// We only fold loads to virtual registers and the virtual register defined /// has a single user. bool PeepholeOptimizer::isLoadFoldable( MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) { … } bool PeepholeOptimizer::isMoveImmediate( MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, DenseMap<Register, MachineInstr *> &ImmDefMIs) { … } /// Try folding register operands that are defined by move immediate /// instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. bool PeepholeOptimizer::foldImmediate( MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) { … } // FIXME: This is very simple and misses some cases which should be handled when // motivating examples are found. // // The copy rewriting logic should look at uses as well as defs and be able to // eliminate copies across blocks. // // Later copies that are subregister extracts will also not be eliminated since // only the first copy is considered. // // e.g. // %1 = COPY %0 // %2 = COPY %0:sub1 // // Should replace %2 uses with %1:sub1 bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) { … } bool PeepholeOptimizer::isNAPhysCopy(Register Reg) { … } bool PeepholeOptimizer::foldRedundantNAPhysCopy( MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) { … } /// \bried Returns true if \p MO is a virtual register operand. static bool isVirtualRegisterOperand(MachineOperand &MO) { … } bool PeepholeOptimizer::findTargetRecurrence( Register Reg, const SmallSet<Register, 2> &TargetRegs, RecurrenceCycle &RC) { … } /// Phi instructions will eventually be lowered to copy instructions. /// If phi is in a loop header, a recurrence may formulated around the source /// and destination of the phi. For such case commuting operands of the /// instructions in the recurrence may enable coalescing of the copy instruction /// generated from the phi. For example, if there is a recurrence of /// /// LoopHeader: /// %1 = phi(%0, %100) /// LoopLatch: /// %0<def, tied1> = ADD %2<def, tied0>, %1 /// /// , the fact that %0 and %2 are in the same tied operands set makes /// the coalescing of copy instruction generated from the phi in /// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and /// %2 have overlapping live range. This introduces additional move /// instruction to the final assembly. However, if we commute %2 and /// %1 of ADD instruction, the redundant move instruction can be /// avoided. bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { … } bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { … } ValueTrackerResult ValueTracker::getNextSourceFromCopy() { … } ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { … } ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { … } ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { … } ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { … } ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { … } /// Explore each PHI incoming operand and return its sources. ValueTrackerResult ValueTracker::getNextSourceFromPHI() { … } ValueTrackerResult ValueTracker::getNextSourceImpl() { … } ValueTrackerResult ValueTracker::getNextSource() { … }