//===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// // // This pass analyzes vector computations and removes unnecessary // doubleword swaps (xxswapd instructions). This pass is performed // only for little-endian VSX code generation. // // For this specific case, loads and stores of v4i32, v4f32, v2i64, // and v2f64 vectors are inefficient. These are implemented using // the lxvd2x and stxvd2x instructions, which invert the order of // doublewords in a vector register. Thus code generation inserts // an xxswapd after each such load, and prior to each such store. // // The extra xxswapd instructions reduce performance. The purpose // of this pass is to reduce the number of xxswapd instructions // required for correctness. // // The primary insight is that much code that operates on vectors // does not care about the relative order of elements in a register, // so long as the correct memory order is preserved. If we have a // computation where all input values are provided by lxvd2x/xxswapd, // all outputs are stored using xxswapd/lxvd2x, and all intermediate // computations are lane-insensitive (independent of element order), // then all the xxswapd instructions associated with the loads and // stores may be removed without changing observable semantics. // // This pass uses standard equivalence class infrastructure to create // maximal webs of computations fitting the above description. Each // such web is then optimized by removing its unnecessary xxswapd // instructions. // // There are some lane-sensitive operations for which we can still // permit the optimization, provided we modify those operations // accordingly. Such operations are identified as using "special // handling" within this module. // //===---------------------------------------------------------------------===// #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" usingnamespacellvm; #define DEBUG_TYPE … namespace { // A PPCVSXSwapEntry is created for each machine instruction that // is relevant to a vector computation. struct PPCVSXSwapEntry { … }; enum SHValues { … }; struct PPCVSXSwapRemoval : public MachineFunctionPass { … }; // Initialize data structures for this pass. In particular, clear the // swap vector and allocate the equivalence class mapping before // processing each function. void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) { … } // Create an entry in the swap vector for each instruction that mentions // a full vector register, recording various characteristics of the // instructions there. bool PPCVSXSwapRemoval::gatherVectorInstructions() { … } // Add an entry to the swap vector and swap map, and make a // singleton equivalence class for the entry. int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI, PPCVSXSwapEntry& SwapEntry) { … } // This is used to find the "true" source register for an // XXPERMDI instruction, since MachineCSE does not handle the // "copy-like" operations (Copy and SubregToReg). Returns // the original SrcReg unless it is the target of a copy-like // operation, in which case we chain backwards through all // such operations to the ultimate source register. If a // physical register is encountered, we stop the search and // flag the swap entry indicated by VecIdx (the original // XXPERMDI) as mentioning a physical register. unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, unsigned VecIdx) { … } // Generate equivalence classes for related computations (webs) by // def-use relationships of virtual registers. Mention of a physical // register terminates the generation of equivalence classes as this // indicates a use of a parameter, definition of a return value, use // of a value returned from a call, or definition of a parameter to a // call. Computations with physical register mentions are flagged // as such so their containing webs will not be optimized. void PPCVSXSwapRemoval::formWebs() { … } // Walk the swap vector entries looking for conditions that prevent their // containing computations from being optimized. When such conditions are // found, mark the representative of the computation's equivalence class // as rejected. void PPCVSXSwapRemoval::recordUnoptimizableWebs() { … } // Walk the swap vector entries looking for swaps fed by permuting loads // and swaps that feed permuting stores. If the containing computation // has not been marked rejected, mark each such swap for removal. // (Removal is delayed in case optimization has disturbed the pattern, // such that multiple loads feed the same swap, etc.) void PPCVSXSwapRemoval::markSwapsForRemoval() { … } // Create an xxswapd instruction and insert it prior to the given point. // MI is used to determine basic block and debug loc information. // FIXME: When inserting a swap, we should check whether SrcReg is // defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so, // then instead we should generate a copy from Reg to DstReg. void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint, unsigned DstReg, unsigned SrcReg) { … } // The identified swap entry requires special handling to allow its // containing computation to be optimized. Perform that handling // here. // FIXME: Additional opportunities will be phased in with subsequent // patches. void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { … } // Walk the swap vector and replace each entry marked for removal with // a copy operation. bool PPCVSXSwapRemoval::removeSwaps() { … } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // For debug purposes, dump the contents of the swap vector. LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() { for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; int ID = SwapVector[EntryIdx].VSEId; dbgs() << format("%6d", ID); dbgs() << format("%6d", EC->getLeaderValue(ID)); dbgs() << format(" %bb.%3d", MI->getParent()->getNumber()); dbgs() << format(" %14s ", TII->getName(MI->getOpcode()).str().c_str()); if (SwapVector[EntryIdx].IsLoad) dbgs() << "load "; if (SwapVector[EntryIdx].IsStore) dbgs() << "store "; if (SwapVector[EntryIdx].IsSwap) dbgs() << "swap "; if (SwapVector[EntryIdx].MentionsPhysVR) dbgs() << "physreg "; if (SwapVector[EntryIdx].MentionsPartialVR) dbgs() << "partialreg "; if (SwapVector[EntryIdx].IsSwappable) { dbgs() << "swappable "; switch(SwapVector[EntryIdx].SpecialHandling) { default: dbgs() << "special:**unknown**"; break; case SH_NONE: break; case SH_EXTRACT: dbgs() << "special:extract "; break; case SH_INSERT: dbgs() << "special:insert "; break; case SH_NOSWAP_LD: dbgs() << "special:load "; break; case SH_NOSWAP_ST: dbgs() << "special:store "; break; case SH_SPLAT: dbgs() << "special:splat "; break; case SH_XXPERMDI: dbgs() << "special:xxpermdi "; break; case SH_COPYWIDEN: dbgs() << "special:copywiden "; break; } } if (SwapVector[EntryIdx].WebRejected) dbgs() << "rejected "; if (SwapVector[EntryIdx].WillRemove) dbgs() << "remove "; dbgs() << "\n"; // For no-asserts builds. (void)MI; (void)ID; } dbgs() << "\n"; } #endif } // end default namespace INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE, "PowerPC VSX Swap Removal", false, false) INITIALIZE_PASS_END(PPCVSXSwapRemoval, DEBUG_TYPE, "PowerPC VSX Swap Removal", false, false) char PPCVSXSwapRemoval::ID = …; FunctionPass* llvm::createPPCVSXSwapRemovalPass() { … }