llvm/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp

//===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// This pass analyzes vector computations and removes unnecessary
// doubleword swaps (xxswapd instructions).  This pass is performed
// only for little-endian VSX code generation.
//
// For this specific case, loads and stores of v4i32, v4f32, v2i64,
// and v2f64 vectors are inefficient.  These are implemented using
// the lxvd2x and stxvd2x instructions, which invert the order of
// doublewords in a vector register.  Thus code generation inserts
// an xxswapd after each such load, and prior to each such store.
//
// The extra xxswapd instructions reduce performance.  The purpose
// of this pass is to reduce the number of xxswapd instructions
// required for correctness.
//
// The primary insight is that much code that operates on vectors
// does not care about the relative order of elements in a register,
// so long as the correct memory order is preserved.  If we have a
// computation where all input values are provided by lxvd2x/xxswapd,
// all outputs are stored using xxswapd/lxvd2x, and all intermediate
// computations are lane-insensitive (independent of element order),
// then all the xxswapd instructions associated with the loads and
// stores may be removed without changing observable semantics.
//
// This pass uses standard equivalence class infrastructure to create
// maximal webs of computations fitting the above description.  Each
// such web is then optimized by removing its unnecessary xxswapd
// instructions.
//
// There are some lane-sensitive operations for which we can still
// permit the optimization, provided we modify those operations
// accordingly.  Such operations are identified as using "special
// handling" within this module.
//
//===---------------------------------------------------------------------===//

#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"

usingnamespacellvm;

#define DEBUG_TYPE

namespace {

// A PPCVSXSwapEntry is created for each machine instruction that
// is relevant to a vector computation.
struct PPCVSXSwapEntry {};

enum SHValues {};

struct PPCVSXSwapRemoval : public MachineFunctionPass {};

// Initialize data structures for this pass.  In particular, clear the
// swap vector and allocate the equivalence class mapping before
// processing each function.
void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {}

// Create an entry in the swap vector for each instruction that mentions
// a full vector register, recording various characteristics of the
// instructions there.
bool PPCVSXSwapRemoval::gatherVectorInstructions() {}

// Add an entry to the swap vector and swap map, and make a
// singleton equivalence class for the entry.
int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
                                  PPCVSXSwapEntry& SwapEntry) {}

// This is used to find the "true" source register for an
// XXPERMDI instruction, since MachineCSE does not handle the
// "copy-like" operations (Copy and SubregToReg).  Returns
// the original SrcReg unless it is the target of a copy-like
// operation, in which case we chain backwards through all
// such operations to the ultimate source register.  If a
// physical register is encountered, we stop the search and
// flag the swap entry indicated by VecIdx (the original
// XXPERMDI) as mentioning a physical register.
unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
                                             unsigned VecIdx) {}

// Generate equivalence classes for related computations (webs) by
// def-use relationships of virtual registers.  Mention of a physical
// register terminates the generation of equivalence classes as this
// indicates a use of a parameter, definition of a return value, use
// of a value returned from a call, or definition of a parameter to a
// call.  Computations with physical register mentions are flagged
// as such so their containing webs will not be optimized.
void PPCVSXSwapRemoval::formWebs() {}

// Walk the swap vector entries looking for conditions that prevent their
// containing computations from being optimized.  When such conditions are
// found, mark the representative of the computation's equivalence class
// as rejected.
void PPCVSXSwapRemoval::recordUnoptimizableWebs() {}

// Walk the swap vector entries looking for swaps fed by permuting loads
// and swaps that feed permuting stores.  If the containing computation
// has not been marked rejected, mark each such swap for removal.
// (Removal is delayed in case optimization has disturbed the pattern,
// such that multiple loads feed the same swap, etc.)
void PPCVSXSwapRemoval::markSwapsForRemoval() {}

// Create an xxswapd instruction and insert it prior to the given point.
// MI is used to determine basic block and debug loc information.
// FIXME: When inserting a swap, we should check whether SrcReg is
// defined by another swap:  SrcReg = XXPERMDI Reg, Reg, 2;  If so,
// then instead we should generate a copy from Reg to DstReg.
void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
                                   MachineBasicBlock::iterator InsertPoint,
                                   unsigned DstReg, unsigned SrcReg) {}

// The identified swap entry requires special handling to allow its
// containing computation to be optimized.  Perform that handling
// here.
// FIXME: Additional opportunities will be phased in with subsequent
// patches.
void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {}

// Walk the swap vector and replace each entry marked for removal with
// a copy operation.
bool PPCVSXSwapRemoval::removeSwaps() {}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// For debug purposes, dump the contents of the swap vector.
LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() {

  for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {

    MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
    int ID = SwapVector[EntryIdx].VSEId;

    dbgs() << format("%6d", ID);
    dbgs() << format("%6d", EC->getLeaderValue(ID));
    dbgs() << format(" %bb.%3d", MI->getParent()->getNumber());
    dbgs() << format("  %14s  ", TII->getName(MI->getOpcode()).str().c_str());

    if (SwapVector[EntryIdx].IsLoad)
      dbgs() << "load ";
    if (SwapVector[EntryIdx].IsStore)
      dbgs() << "store ";
    if (SwapVector[EntryIdx].IsSwap)
      dbgs() << "swap ";
    if (SwapVector[EntryIdx].MentionsPhysVR)
      dbgs() << "physreg ";
    if (SwapVector[EntryIdx].MentionsPartialVR)
      dbgs() << "partialreg ";

    if (SwapVector[EntryIdx].IsSwappable) {
      dbgs() << "swappable ";
      switch(SwapVector[EntryIdx].SpecialHandling) {
      default:
        dbgs() << "special:**unknown**";
        break;
      case SH_NONE:
        break;
      case SH_EXTRACT:
        dbgs() << "special:extract ";
        break;
      case SH_INSERT:
        dbgs() << "special:insert ";
        break;
      case SH_NOSWAP_LD:
        dbgs() << "special:load ";
        break;
      case SH_NOSWAP_ST:
        dbgs() << "special:store ";
        break;
      case SH_SPLAT:
        dbgs() << "special:splat ";
        break;
      case SH_XXPERMDI:
        dbgs() << "special:xxpermdi ";
        break;
      case SH_COPYWIDEN:
        dbgs() << "special:copywiden ";
        break;
      }
    }

    if (SwapVector[EntryIdx].WebRejected)
      dbgs() << "rejected ";
    if (SwapVector[EntryIdx].WillRemove)
      dbgs() << "remove ";

    dbgs() << "\n";

    // For no-asserts builds.
    (void)MI;
    (void)ID;
  }

  dbgs() << "\n";
}
#endif

} // end default namespace

INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
                      "PowerPC VSX Swap Removal", false, false)
INITIALIZE_PASS_END(PPCVSXSwapRemoval, DEBUG_TYPE,
                    "PowerPC VSX Swap Removal", false, false)

char PPCVSXSwapRemoval::ID =;
FunctionPass*
llvm::createPPCVSXSwapRemovalPass() {}