//===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Copies from VGPR to SGPR registers are illegal and the register coalescer /// will sometimes generate these illegal copies in situations like this: /// /// Register Class <vsrc> is the union of <vgpr> and <sgpr> /// /// BB0: /// %0 <sgpr> = SCALAR_INST /// %1 <vsrc> = COPY %0 <sgpr> /// ... /// BRANCH %cond BB1, BB2 /// BB1: /// %2 <vgpr> = VECTOR_INST /// %3 <vsrc> = COPY %2 <vgpr> /// BB2: /// %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1> /// %5 <vgpr> = VECTOR_INST %4 <vsrc> /// /// /// The coalescer will begin at BB0 and eliminate its copy, then the resulting /// code will look like this: /// /// BB0: /// %0 <sgpr> = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: /// %2 <vgpr> = VECTOR_INST /// %3 <vsrc> = COPY %2 <vgpr> /// BB2: /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1> /// %5 <vgpr> = VECTOR_INST %4 <sgpr> /// /// Now that the result of the PHI instruction is an SGPR, the register /// allocator is now forced to constrain the register class of %3 to /// <sgpr> so we end up with final code like this: /// /// BB0: /// %0 <sgpr> = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: /// %2 <vgpr> = VECTOR_INST /// %3 <sgpr> = COPY %2 <vgpr> /// BB2: /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1> /// %5 <vgpr> = VECTOR_INST %4 <sgpr> /// /// Now this code contains an illegal copy from a VGPR to an SGPR. /// /// In order to avoid this problem, this pass searches for PHI instructions /// which define a <vsrc> register and constrains its definition class to /// <vgpr> if the user of the PHI's definition register is a vector instruction. /// If the PHI's definition class is constrained to <vgpr> then the coalescer /// will be unable to perform the COPY removal from the above example which /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// #include "SIFixSGPRCopies.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" usingnamespacellvm; #define DEBUG_TYPE … static cl::opt<bool> EnableM0Merge( "amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), cl::init(true)); namespace { class V2SCopyInfo { … }; class SIFixSGPRCopies { … }; class SIFixSGPRCopiesLegacy : public MachineFunctionPass { … }; } // end anonymous namespace INITIALIZE_PASS_BEGIN(SIFixSGPRCopiesLegacy, DEBUG_TYPE, "SI Fix SGPR copies", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_END(SIFixSGPRCopiesLegacy, DEBUG_TYPE, "SI Fix SGPR copies", false, false) char SIFixSGPRCopiesLegacy::ID = …; char &llvm::SIFixSGPRCopiesLegacyID = …; FunctionPass *llvm::createSIFixSGPRCopiesLegacyPass() { … } static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { … } static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI) { … } static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI) { … } static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII) { … } // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. // // SGPRx = ... // SGPRy = REG_SEQUENCE SGPRx, sub0 ... // VGPRz = COPY SGPRy // // ==> // // VGPRx = COPY SGPRx // VGPRz = REG_SEQUENCE VGPRx, sub0 // // This exposes immediate folding opportunities when materializing 64-bit // immediates. static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI) { … } static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm) { … } template <class UnaryPredicate> bool searchPredecessors(const MachineBasicBlock *MBB, const MachineBasicBlock *CutOff, UnaryPredicate Predicate) { … } // Checks if there is potential path From instruction To instruction. // If CutOff is specified and it sits in between of that path we ignore // a higher portion of the path and report it is not reachable. static bool isReachable(const MachineInstr *From, const MachineInstr *To, const MachineBasicBlock *CutOff, MachineDominatorTree &MDT) { … } // Return the first non-prologue instruction in the block. static MachineBasicBlock::iterator getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) { … } // Hoist and merge identical SGPR initializations into a common predecessor. // This is intended to combine M0 initializations, but can work with any // SGPR. A VGPR cannot be processed since we cannot guarantee vector // executioon. static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII) { … } bool SIFixSGPRCopies::run(MachineFunction &MF) { … } void SIFixSGPRCopies::processPHINode(MachineInstr &MI) { … } bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR( MachineOperand &MaybeVGPRConstMO, Register DstReg, MachineBasicBlock *BlockToInsertTo, MachineBasicBlock::iterator PointToInsertTo) { … } bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, MachineBasicBlock::iterator &I) { … } void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) { … } // The main function that computes the VGPR to SGPR copy score // and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU bool SIFixSGPRCopies::needToBeConvertedToVALU(V2SCopyInfo *Info) { … } void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) { … } void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) { … } PreservedAnalyses SIFixSGPRCopiesPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { … }