//===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This pass performs exec mask handling peephole optimizations which needs /// to be done before register allocation to reduce register pressure. /// //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" usingnamespacellvm; #define DEBUG_TYPE … namespace { class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { … }; } // End anonymous namespace. INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) char SIOptimizeExecMaskingPreRA::ID = …; char &llvm::SIOptimizeExecMaskingPreRAID = …; FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() { … } // See if there is a def between \p AndIdx and \p SelIdx that needs to live // beyond \p AndIdx. static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, SlotIndex SelIdx) { … } // FIXME: Why do we bother trying to handle physical registers here? static bool isDefBetween(const SIRegisterInfo &TRI, LiveIntervals *LIS, Register Reg, const MachineInstr &Sel, const MachineInstr &And) { … } // Optimize sequence // %sel = V_CNDMASK_B32_e64 0, 1, %cc // %cmp = V_CMP_NE_U32 1, %sel // $vcc = S_AND_B64 $exec, %cmp // S_CBRANCH_VCC[N]Z // => // $vcc = S_ANDN2_B64 $exec, %cc // S_CBRANCH_VCC[N]Z // // It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the // rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but // only 3 first instructions are really needed. S_AND_B64 with exec is a // required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive // lanes. // // Returns true on success. bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { … } // Optimize sequence // %dst = S_OR_SAVEEXEC %src // ... instructions not modifying exec ... // %tmp = S_AND $exec, %dst // $exec = S_XOR_term $exec, %tmp // => // %dst = S_OR_SAVEEXEC %src // ... instructions not modifying exec ... // $exec = S_XOR_term $exec, %dst // // Clean up potentially unnecessary code added for safety during // control flow lowering. // // Return whether any changes were made to MBB. bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) { … } bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { … }