MVETPAndVPTOptimisationsPass.cpp | Explore in Territory

//===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This pass does a few optimisations related to Tail predicated loops
/// and MVE VPT blocks before register allocation is performed. For VPT blocks
/// the goal is to maximize the sizes of the blocks that will be created by the
/// MVE VPT Block Insertion pass (which runs after register allocation). For
/// tail predicated loops we transform the loop into something that will
/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
///
//===----------------------------------------------------------------------===//

#include "ARM.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include <cassert>

usingnamespacellvm;

#define DEBUG_TYPE …

static cl::opt<bool>
MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
    cl::desc("Enable merging Loop End and Dec instructions."),
    cl::init(true));

static cl::opt<bool>
SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
    cl::desc("Enable setting lr as a predicate in tail predication regions."),
    cl::init(true));

namespace {
class MVETPAndVPTOptimisations : public MachineFunctionPass { … };

char MVETPAndVPTOptimisations::ID = …;

} // end anonymous namespace

INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
                      "ARM MVE TailPred and VPT Optimisations pass", false,
                      false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
                    "ARM MVE TailPred and VPT Optimisations pass", false, false)

static MachineInstr *LookThroughCOPY(MachineInstr *MI,
                                     MachineRegisterInfo *MRI) { … }

// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
// at the moment, returning a t2DoLoopStart in LoopStart.
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
                               MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
                               MachineInstr *&LoopDec, MachineInstr *&LoopEnd) { … }

static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) { … }

// The Hardware Loop insertion and ISel Lowering produce the pseudos for the
// start of a while loop:
//   %a:gprlr = t2WhileLoopSetup %Cnt
//   t2WhileLoopStart %a, %BB
// We want to convert those to a single instruction which, like t2LoopEndDec and
// t2DoLoopStartTP is both a terminator and produces a value:
//   %a:grplr: t2WhileLoopStartLR %Cnt, %BB
//
// Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
// t2WhileLoopStart are not valid past regalloc.
bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) { … }

// Return true if this instruction is invalid in a low overhead loop, usually
// because it clobbers LR.
static bool IsInvalidTPInstruction(MachineInstr &MI) { … }

// Starting from PreHeader, search for invalid instructions back until the
// LoopStart block is reached. If invalid instructions are found, the loop start
// is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
// return the new DLS LoopStart if updated.
MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
    MachineBasicBlock *PreHeader, MachineInstr *LoopStart) { … }

// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
// will be valid to be used for the low overhead loop, which means nothing else
// is using LR (especially calls) and there are no superfluous copies in the
// loop. The t2LoopEndDec is a branching terminator that produces a value (the
// decrement) around the loop edge, which means we need to be careful that they
// will be valid to allocate without any spilling.
bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { … }

// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
// instruction, making the backend ARMLowOverheadLoops passes job of finding the
// VCTP operand much simpler.
bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
                                              MachineDominatorTree *DT) { … }

// Returns true if Opcode is any VCMP Opcode.
static bool IsVCMP(unsigned Opcode) { … }

// Returns true if a VCMP with this Opcode can have its operands swapped.
// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
// and VCMPr instructions (since the r is always on the right).
static bool CanHaveSwappedOperands(unsigned Opcode) { … }

// Returns the CondCode of a VCMP Instruction.
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { … }

// Returns true if Cond is equivalent to a VPNOT instruction on the result of
// Prev. Cond and Prev must be VCMPs.
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { … }

// Returns true if Instr writes to VCCR.
static bool IsWritingToVCCR(MachineInstr &Instr) { … }

// Transforms
//    <Instr that uses %A ('User' Operand)>
// Into
//    %K = VPNOT %Target
//    <Instr that uses %K ('User' Operand)>
// And returns the newly inserted VPNOT.
// This optimization is done in the hopes of preventing spills/reloads of VPR by
// reducing the number of VCCR values with overlapping lifetimes.
MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
    MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
    Register Target) { … }

// Moves a VPNOT before its first user if an instruction that uses Reg is found
// in-between the VPNOT and its user.
// Returns true if there is at least one user of the VPNOT in the block.
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator Iter,
                                     Register Reg) { … }

// This optimisation attempts to reduce the number of overlapping lifetimes of
// VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
// this replaces
//    %A:vccr = (something)
//    %B:vccr = VPNOT %A
//    %Foo = (some op that uses %B)
//    %Bar = (some op that uses %A)
// With
//    %A:vccr = (something)
//    %B:vccr = VPNOT %A
//    %Foo = (some op that uses %B)
//    %TMP2:vccr = VPNOT %B
//    %Bar = (some op that uses %A)
bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { … }

// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { … }

bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
                                                    MachineDominatorTree *DT) { … }

// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
// somewhat blunt approximation to allow tail predicated with vpsel
// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
// different semantics under tail predication. Until that is modelled we just
// convert to a VMOVT (via a predicated VORR) instead.
bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { … }

// Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
// the instruction may be removable as a noop.
bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { … }

bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { … }

/// createMVETPAndVPTOptimisationsPass
FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() { … }
llvm/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp