llvm/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that performs load / store related peephole
// optimizations. This pass should be run after register allocation.
//
// The pass runs after the PrologEpilogInserter where we emit the CFI
// instructions. In order to preserve the correctness of the unwind informaiton,
// the pass should not change the order of any two instructions, one of which
// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
// to unwind information.
//
//===----------------------------------------------------------------------===//

#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
#include <optional>

usingnamespacellvm;

#define DEBUG_TYPE

STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
STATISTIC(NumPostFolded, "Number of post-index updates folded");
STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
          "Number of load/store from unscaled generated");
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
                                   "not passed the alignment check");
STATISTIC(NumConstOffsetFolded,
          "Number of const offset of index address folded");

DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
              "Controls which pairs are considered for renaming");

// The LdStLimit limits how far we search for load/store pairs.
static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
                                   cl::init(20), cl::Hidden);

// The UpdateLimit limits how far we search for update instructions when we form
// pre-/post-index instructions.
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
                                     cl::Hidden);

// The LdStConstLimit limits how far we search for const offset instructions
// when we form index address load/store instructions.
static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
                                        cl::init(10), cl::Hidden);

// Enable register renaming to find additional store pairing opportunities.
static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
                                    cl::init(true), cl::Hidden);

#define AARCH64_LOAD_STORE_OPT_NAME

namespace {

using LdStPairFlags = struct LdStPairFlags {};

struct AArch64LoadStoreOpt : public MachineFunctionPass {};

char AArch64LoadStoreOpt::ID =;

} // end anonymous namespace

INITIALIZE_PASS()

static bool isNarrowStore(unsigned Opc) {}

// These instruction set memory tag and either keep memory contents unchanged or
// set it to zero, ignoring the address part of the source register.
static bool isTagStore(const MachineInstr &MI) {}

static unsigned getMatchingNonSExtOpcode(unsigned Opc,
                                         bool *IsValidLdStrOpc = nullptr) {}

static unsigned getMatchingWideOpcode(unsigned Opc) {}

static unsigned getMatchingPairOpcode(unsigned Opc) {}

static unsigned isMatchingStore(MachineInstr &LoadInst,
                                MachineInstr &StoreInst) {}

static unsigned getPreIndexedOpcode(unsigned Opc) {}

static unsigned getBaseAddressOpcode(unsigned Opc) {}

static unsigned getPostIndexedOpcode(unsigned Opc) {}

static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {}

// Returns the scale and offset range of pre/post indexed variants of MI.
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
                                       int &MinOffset, int &MaxOffset) {}

static MachineOperand &getLdStRegOp(MachineInstr &MI,
                                    unsigned PairedRegOp = 0) {}

static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
                                  MachineInstr &StoreInst,
                                  const AArch64InstrInfo *TII) {}

static bool isPromotableZeroStoreInst(MachineInstr &MI) {}

static bool isPromotableLoadFromStore(MachineInstr &MI) {}

static bool isMergeableLdStUpdate(MachineInstr &MI) {}

// Make sure this is a reg+reg Ld/St
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {}

static bool isRewritableImplicitDef(unsigned Opc) {}

MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
                                           MachineBasicBlock::iterator MergeMI,
                                           const LdStPairFlags &Flags) {}

// Apply Fn to all instructions between MI and the beginning of the block, until
// a def for DefReg is reached. Returns true, iff Fn returns true for all
// visited instructions. Stop after visiting Limit iterations.
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
                              const TargetRegisterInfo *TRI, unsigned Limit,
                              std::function<bool(MachineInstr &, bool)> &Fn) {}

static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units,
                                   const TargetRegisterInfo *TRI) {}

MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                      MachineBasicBlock::iterator Paired,
                                      const LdStPairFlags &Flags) {}

MachineBasicBlock::iterator
AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
                                          MachineBasicBlock::iterator StoreI) {}

static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {}

// Do alignment, specialized to power of 2 and for signed ints,
// avoiding having to do a C-style cast from uint_64t to int when
// using alignTo from include/llvm/Support/MathExtras.h.
// FIXME: Move this function to include/MathExtras.h?
static int alignTo(int Num, int PowOf2) {}

static bool mayAlias(MachineInstr &MIa,
                     SmallVectorImpl<MachineInstr *> &MemInsns,
                     AliasAnalysis *AA) {}

bool AArch64LoadStoreOpt::findMatchingStore(
    MachineBasicBlock::iterator I, unsigned Limit,
    MachineBasicBlock::iterator &StoreI) {}

static bool needsWinCFI(const MachineFunction *MF) {}

// Returns true if FirstMI and MI are candidates for merging or pairing.
// Otherwise, returns false.
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
                                       LdStPairFlags &Flags,
                                       const AArch64InstrInfo *TII) {}

static bool canRenameMOP(const MachineOperand &MOP,
                         const TargetRegisterInfo *TRI) {}

static bool
canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
                 SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
                 const TargetRegisterInfo *TRI) {}

// We want to merge the second load into the first by rewriting the usages of
// the same reg between first (incl.) and second (excl.). We don't need to care
// about any insns before FirstLoad or after SecondLoad.
// 1. The second load writes new value into the same reg.
//    - The renaming is impossible to impact later use of the reg.
//    - The second load always trash the value written by the first load which
//      means the reg must be killed before the second load.
// 2. The first load must be a def for the same reg so we don't need to look
//    into anything before it.
static bool canRenameUntilSecondLoad(
    MachineInstr &FirstLoad, MachineInstr &SecondLoad,
    LiveRegUnits &UsedInBetween,
    SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
    const TargetRegisterInfo *TRI) {}

// Check if we can find a physical register for renaming \p Reg. This register
// must:
// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
//   defined registers up to the point where the renamed register will be used,
// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
//   registers in the range the rename register will be used,
// * is available in all used register classes (checked using RequiredClasses).
static std::optional<MCPhysReg> tryToFindRegisterToRename(
    const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
    LiveRegUnits &UsedInBetween,
    SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
    const TargetRegisterInfo *TRI) {}

// For store pairs: returns a register from FirstMI to the beginning of the
// block that can be renamed.
// For load pairs: returns a register from FirstMI to MI that can be renamed.
static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
    std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
    Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
    SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
    const TargetRegisterInfo *TRI) {}

/// Scan the instructions looking for a load/store that can be combined with the
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
                                      LdStPairFlags &Flags, unsigned Limit,
                                      bool FindNarrowMerge) {}

static MachineBasicBlock::iterator
maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {}

std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
    MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update,
    bool IsForward, bool IsPreIdx, bool MergeEither) {}

MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
                                          MachineBasicBlock::iterator Update,
                                          unsigned Offset, int Scale) {}

bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
                                               MachineInstr &MI,
                                               unsigned BaseReg, int Offset) {}

bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
                                                 MachineInstr &MI,
                                                 unsigned IndexReg,
                                                 unsigned &Offset) {}

MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
    MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {}

MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
    MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {}

MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
    MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {}

bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
    MachineBasicBlock::iterator &MBBI) {}

// Merge adjacent zero stores into a wider store.
bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
    MachineBasicBlock::iterator &MBBI) {}

// Find loads and stores that can be merged into a single load or store pair
// instruction.
bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {}

bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
    (MachineBasicBlock::iterator &MBBI) {}

bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
                                              int Scale) {}

bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
                                        bool EnableNarrowZeroStOpt) {}

bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {}

// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
// stores near one another?  Note: The pre-RA instruction scheduler already has
// hooks to try and schedule pairable loads/stores together to improve pairing
// opportunities.  Thus, pre-RA pairing pass may not be worth the effort.

// FIXME: When pairing store instructions it's very possible for this pass to
// hoist a store with a KILL marker above another use (without a KILL marker).
// The resulting IR is invalid, but nothing uses the KILL markers after this
// pass, so it's never caused a problem in practice.

/// createAArch64LoadStoreOptimizationPass - returns an instance of the
/// load / store optimization pass.
FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {}