//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning // of a MachineFunction. // // mov %SPL, %depot // cvta.local %SP, %SPL // // Because Frame Index is a generic address and alloca can only return generic // pointer, without this pass the instructions producing alloca'ed address will // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on // this address with their .local versions, but this may introduce a lot of // cvta.to.local instructions. Performance can be improved if we avoid casting // address back and forth and directly calculate local address based on %SPL. // This peephole pass optimizes these cases, for example // // It will transform the following pattern // %0 = LEA_ADDRi64 %VRFrame64, 4 // %1 = cvta_to_local_64 %0 // // into // %1 = LEA_ADDRi64 %VRFrameLocal64, 4 // // %VRFrameLocal64 is the virtual register name of %SPL // //===----------------------------------------------------------------------===// #include "NVPTX.h" #include "NVPTXRegisterInfo.h" #include "NVPTXSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" usingnamespacellvm; #define DEBUG_TYPE … namespace llvm { void initializeNVPTXPeepholePass(PassRegistry &); } namespace { struct NVPTXPeephole : public MachineFunctionPass { … }; } char NVPTXPeephole::ID = …; INITIALIZE_PASS(…) static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { … } static void CombineCVTAToLocal(MachineInstr &Root) { … } bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { … } MachineFunctionPass *llvm::createNVPTXPeephole() { … }