llvm/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

//===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass adds instructions to enable whole quad mode (strict or non-strict)
/// for pixel shaders, and strict whole wavefront mode for all programs.
///
/// The "strict" prefix indicates that inactive lanes do not take part in
/// control flow, specifically an inactive lane enabled by a strict WQM/WWM will
/// always be enabled irrespective of control flow decisions. Conversely in
/// non-strict WQM inactive lanes may control flow decisions.
///
/// Whole quad mode is required for derivative computations, but it interferes
/// with shader side effects (stores and atomics). It ensures that WQM is
/// enabled when necessary, but disabled around stores and atomics.
///
/// When necessary, this pass creates a function prolog
///
///   S_MOV_B64 LiveMask, EXEC
///   S_WQM_B64 EXEC, EXEC
///
/// to enter WQM at the top of the function and surrounds blocks of Exact
/// instructions by
///
///   S_AND_SAVEEXEC_B64 Tmp, LiveMask
///   ...
///   S_MOV_B64 EXEC, Tmp
///
/// We also compute when a sequence of instructions requires strict whole
/// wavefront mode (StrictWWM) and insert instructions to save and restore it:
///
///   S_OR_SAVEEXEC_B64 Tmp, -1
///   ...
///   S_MOV_B64 EXEC, Tmp
///
/// When a sequence of instructions requires strict whole quad mode (StrictWQM)
/// we use a similar save and restore mechanism and force whole quad mode for
/// those instructions:
///
///  S_MOV_B64 Tmp, EXEC
///  S_WQM_B64 EXEC, EXEC
///  ...
///  S_MOV_B64 EXEC, Tmp
///
/// In order to avoid excessive switching during sequences of Exact
/// instructions, the pass first analyzes which instructions must be run in WQM
/// (aka which instructions produce values that lead to derivative
/// computations).
///
/// Basic blocks are always exited in WQM as long as some successor needs WQM.
///
/// There is room for improvement given better control flow analysis:
///
///  (1) at the top level (outside of control flow statements, and as long as
///      kill hasn't been used), one SGPR can be saved by recovering WQM from
///      the LiveMask (this is implemented for the entry block).
///
///  (2) when entire regions (e.g. if-else blocks or entire loops) only
///      consist of exact and don't-care instructions, the switch only has to
///      be done at the entry and exit points rather than potentially in each
///      block of the region.
///
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/raw_ostream.h"

usingnamespacellvm;

#define DEBUG_TYPE

namespace {

enum {};

struct PrintState {};

#ifndef NDEBUG
static raw_ostream &operator<<(raw_ostream &OS, const PrintState &PS) {

  static const std::pair<char, const char *> Mapping[] = {
      std::pair(StateWQM, "WQM"), std::pair(StateStrictWWM, "StrictWWM"),
      std::pair(StateStrictWQM, "StrictWQM"), std::pair(StateExact, "Exact")};
  char State = PS.State;
  for (auto M : Mapping) {
    if (State & M.first) {
      OS << M.second;
      State &= ~M.first;

      if (State)
        OS << '|';
    }
  }
  assert(State == 0);
  return OS;
}
#endif

struct InstrInfo {};

struct BlockInfo {};

struct WorkItem {};

class SIWholeQuadMode : public MachineFunctionPass {};

} // end anonymous namespace

char SIWholeQuadMode::ID =;

INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
                      false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
                    false)

char &llvm::SIWholeQuadModeID =;

FunctionPass *llvm::createSIWholeQuadModePass() {}

#ifndef NDEBUG
LLVM_DUMP_METHOD void SIWholeQuadMode::printInfo() {
  for (const auto &BII : Blocks) {
    dbgs() << "\n"
           << printMBBReference(*BII.first) << ":\n"
           << "  InNeeds = " << PrintState(BII.second.InNeeds)
           << ", Needs = " << PrintState(BII.second.Needs)
           << ", OutNeeds = " << PrintState(BII.second.OutNeeds) << "\n\n";

    for (const MachineInstr &MI : *BII.first) {
      auto III = Instructions.find(&MI);
      if (III != Instructions.end()) {
        dbgs() << "  " << MI << "    Needs = " << PrintState(III->second.Needs)
               << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n';
      }
    }
  }
}
#endif

void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
                                      std::vector<WorkItem> &Worklist) {}

/// Mark all relevant definitions of register \p Reg in usage \p UseMI.
void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
                               Register Reg, unsigned SubReg, char Flag,
                               std::vector<WorkItem> &Worklist) {}

void SIWholeQuadMode::markOperand(const MachineInstr &MI,
                                  const MachineOperand &Op, char Flag,
                                  std::vector<WorkItem> &Worklist) {}

/// Mark all instructions defining the uses in \p MI with \p Flag.
void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
                                          std::vector<WorkItem> &Worklist) {}

// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
                                       std::vector<WorkItem> &Worklist) {}

void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
                                           std::vector<WorkItem>& Worklist) {}

void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
                                     std::vector<WorkItem>& Worklist) {}

char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {}

MachineBasicBlock::iterator
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
                         MachineBasicBlock::iterator Before) {}

MachineBasicBlock *SIWholeQuadMode::splitBlock(MachineBasicBlock *BB,
                                               MachineInstr *TermMI) {}

MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
                                            MachineInstr &MI) {}

MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
                                           MachineInstr &MI, bool IsWQM) {}

// Replace (or supplement) instructions accessing live mask.
// This can only happen once all the live mask registers have been created
// and the execute state (WQM/StrictWWM/Exact) of instructions is known.
void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) {}

// Return an iterator in the (inclusive) range [First, Last] at which
// instructions can be safely inserted, keeping in mind that some of the
// instructions we want to add necessarily clobber SCC.
MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
    MachineBasicBlock::iterator Last, bool PreferLast, bool SaveSCC) {}

void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator Before,
                              Register SaveWQM) {}

void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator Before,
                            Register SavedWQM) {}

void SIWholeQuadMode::toStrictMode(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator Before,
                                   Register SaveOrig, char StrictStateNeeded) {}

void SIWholeQuadMode::fromStrictMode(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator Before,
                                     Register SavedOrig, char NonStrictState,
                                     char CurrentStrictState) {}

void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {}

bool SIWholeQuadMode::lowerLiveMaskQueries() {}

bool SIWholeQuadMode::lowerCopyInstrs() {}

bool SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {}

void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {}

/// Lower INIT_EXEC instructions. Return a suitable insert point in \p Entry
/// for instructions that depend on EXEC.
MachineBasicBlock::iterator
SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {}

bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {}