//===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else /// structures and waterfall loops. /// /// When we do structurization, we usually transform an if-else into two /// successive if-then (with a flow block to do predicate inversion). Consider a /// simple case after structurization: A divergent value %a was defined before /// if-else and used in both THEN (use in THEN is optional) and ELSE part: /// bb.if: /// %a = ... /// ... /// bb.then: /// ... = op %a /// ... // %a can be dead here /// bb.flow: /// ... /// bb.else: /// ... = %a /// ... /// bb.endif /// /// As register allocator has no idea of the thread-control-flow, it will just /// assume %a would be alive in the whole range of bb.then because of a later /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect /// to exec mask. For this if-else case, the lanes active in bb.then will be /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead /// after the last use in bb.then until the end of the block. The reason is /// the instructions in bb.then will only overwrite lanes that will never be /// accessed in bb.else. /// /// This pass aims to tell register allocator that %a is in-fact dead, /// through inserting a phi-node in bb.flow saying that %a is undef when coming /// from bb.then, and then replace the uses in the bb.else with the result of /// newly inserted phi. /// /// Two key conditions must be met to ensure correctness: /// 1.) The def-point should be in the same loop-level as if-else-endif to make /// sure the second loop iteration still get correct data. /// 2.) There should be no further uses after the IF-ELSE region. /// /// /// Waterfall loops get inserted around instructions that use divergent values /// but can only be executed with a uniform value. For example an indirect call /// to a divergent address: /// bb.start: /// %a = ... /// %fun = ... /// ... /// bb.loop: /// call %fun (%a) /// ... // %a can be dead here /// loop %bb.loop /// /// The loop block is executed multiple times, but it is run exactly once for /// each active lane. Similar to the if-else case, the register allocator /// assumes that %a is live throughout the loop as it is used again in the next /// iteration. If %a is a VGPR that is unused after the loop, it does not need /// to be live after its last use in the loop block. By inserting a phi-node at /// the start of bb.loop that is undef when coming from bb.loop, the register /// allocation knows that the value of %a does not need to be preserved through /// iterations of the loop. /// // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/InitializePasses.h" usingnamespacellvm; #define DEBUG_TYPE … namespace { class SIOptimizeVGPRLiveRange : public MachineFunctionPass { … }; } // end anonymous namespace // Check whether the MBB is a else flow block and get the branching target which // is the Endif block MachineBasicBlock * SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { … } void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( MachineBasicBlock *Flow, MachineBasicBlock *Endif, SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { … } /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( Register Reg, MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Uses) const { … } /// Collect the killed registers in the ELSE region which are not alive through /// the whole THEN region. void SIOptimizeVGPRLiveRange::collectCandidateRegisters( MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, SmallVectorImpl<Register> &CandidateRegs) const { … } /// Collect the registers used in the waterfall loop block that are defined /// before. void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, SmallSetVector<Register, 16> &CandidateRegs, SmallSetVector<MachineBasicBlock *, 2> &Blocks, SmallVectorImpl<MachineInstr *> &Instructions) const { … } // Re-calculate the liveness of \p Reg in the THEN-region void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { … } void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( Register Reg, Register NewReg, MachineBasicBlock *Flow, MachineBasicBlock *Endif, SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { … } void SIOptimizeVGPRLiveRange::optimizeLiveRange( Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { … } void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( Register Reg, MachineBasicBlock *LoopHeader, SmallSetVector<MachineBasicBlock *, 2> &Blocks, SmallVectorImpl<MachineInstr *> &Instructions) const { … } char SIOptimizeVGPRLiveRange::ID = …; INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE, "SI Optimize VGPR LiveRange", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass) INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE, "SI Optimize VGPR LiveRange", false, false) char &llvm::SIOptimizeVGPRLiveRangeID = …; FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() { … } bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { … }