//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the AArch64ConditionalCompares pass which reduces // branching and code size by using the conditional compare instructions CCMP, // CCMN, and FCMP. // // The CFG transformations for forming conditional compares are very similar to // if-conversion, and this pass should run immediately before the early // if-conversion pass. // //===----------------------------------------------------------------------===// #include "AArch64.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" usingnamespacellvm; #define DEBUG_TYPE … // Absolute maximum number of instructions allowed per speculated block. // This bypasses all other heuristics, so it should be set fairly high. static cl::opt<unsigned> BlockInstrLimit( "aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block.")); // Stress testing mode - disable heuristics. static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11")); STATISTIC(NumConsidered, "Number of ccmps considered"); STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)"); STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)"); STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)"); STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)"); STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)"); STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)"); STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)"); STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)"); STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)"); STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)"); STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)"); STATISTIC(NumConverted, "Number of ccmp instructions created"); STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted"); //===----------------------------------------------------------------------===// // SSACCmpConv //===----------------------------------------------------------------------===// // // The SSACCmpConv class performs ccmp-conversion on SSA form machine code // after determining if it is possible. The class contains no heuristics; // external code should be used to determine when ccmp-conversion is a good // idea. // // CCmp-formation works on a CFG representing chained conditions, typically // from C's short-circuit || and && operators: // // From: Head To: Head // / | CmpBB // / | / | // | CmpBB / | // | / | Tail | // | / | | | // Tail | | | // | | | | // ... ... ... ... // // The Head block is terminated by a br.cond instruction, and the CmpBB block // contains compare + br.cond. Tail must be a successor of both. // // The cmp-conversion turns the compare instruction in CmpBB into a conditional // compare, and merges CmpBB into Head, speculatively executing its // instructions. The AArch64 conditional compare instructions have an immediate // operand that specifies the NZCV flag values when the condition is false and // the compare isn't executed. This makes it possible to chain compares with // different condition codes. // // Example: // // if (a == 5 || b == 17) // foo(); // // Head: // cmp w0, #5 // b.eq Tail // CmpBB: // cmp w1, #17 // b.eq Tail // ... // Tail: // bl _foo // // Becomes: // // Head: // cmp w0, #5 // ccmp w1, #17, 4, ne ; 4 = nZcv // b.eq Tail // ... // Tail: // bl _foo // // The ccmp condition code is the one that would cause the Head terminator to // branch to CmpBB. // // FIXME: It should also be possible to speculate a block on the critical edge // between Head and Tail, just like if-converting a diamond. // // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion). namespace { class SSACCmpConv { … }; } // end anonymous namespace // Check that all PHIs in Tail are selecting the same value from Head and CmpBB. // This means that no if-conversion is required when merging CmpBB into Head. bool SSACCmpConv::trivialTailPHIs() { … } // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply // removing the CmpBB operands. The Head operands will be identical. void SSACCmpConv::updateTailPHIs() { … } // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares // are still writing virtual registers without any uses. bool SSACCmpConv::isDeadDef(unsigned DstReg) { … } // Parse a condition code returned by analyzeBranch, and compute the CondCode // corresponding to TBB. // Return static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) { … } MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { … } /// Determine if all the instructions in MBB can safely /// be speculated. The terminators are not considered. /// /// Only CmpMI is allowed to clobber the flags. /// bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI) { … } /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential /// candidate for cmp-conversion. Fill out the internal state. /// bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { … } void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { … } int SSACCmpConv::expectedCodeSizeDelta() const { … } //===----------------------------------------------------------------------===// // AArch64ConditionalCompares Pass //===----------------------------------------------------------------------===// namespace { class AArch64ConditionalCompares : public MachineFunctionPass { … }; } // end anonymous namespace char AArch64ConditionalCompares::ID = …; INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) FunctionPass *llvm::createAArch64ConditionalCompares() { … } void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { … } /// Update the dominator tree after if-conversion erased some blocks. void AArch64ConditionalCompares::updateDomTree( ArrayRef<MachineBasicBlock *> Removed) { … } /// Update LoopInfo after if-conversion. void AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) { … } /// Invalidate MachineTraceMetrics before if-conversion. void AArch64ConditionalCompares::invalidateTraces() { … } /// Apply cost model and heuristics to the if-conversion in IfConv. /// Return true if the conversion is a good idea. /// bool AArch64ConditionalCompares::shouldConvert() { … } bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { … } bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { … }