//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the AArch64 implementation of TargetFrameLowering class. // // On AArch64, stack frames are structured as follows: // // The stack grows downward. // // All of the individual frame areas on the frame below are optional, i.e. it's // possible to create a function so that the particular area isn't present // in the frame. // // At function entry, the "frame" looks as follows: // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| <- sp // | | Lower address // // // After the prologue has run, the frame has the following general structure. // Note that this doesn't depict the case where a red-zone is used. Also, // technically the last frame area (VLAs) doesn't get created until in the // main function body, after the prologue is run. However, it's depicted here // for completeness. // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| // | | // | (Win64 only) varargs from reg | // | | // |-----------------------------------| // | | // | callee-saved gpr registers | <--. // | | | On Darwin platforms these // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped, // | prev_lr | | (frame record first) // | prev_fp | <--' // | async context if needed | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) // | <hazard padding> | // |-----------------------------------| // | | // | callee-saved fp/simd/SVE regs | // | | // |-----------------------------------| // | | // | SVE stack objects | // | | // |-----------------------------------| // |.empty.space.to.make.part.below....| // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at // |.the.standard.16-byte.alignment....| compile time; if present) // |-----------------------------------| // | local variables of fixed size | // | including spill slots | // | <FPR> | // | <hazard padding> | // | <GPR> | // |-----------------------------------| <- bp(not defined by ABI, // |.variable-sized.local.variables....| LLVM chooses X19) // |.(VLAs)............................| (size of this area is unknown at // |...................................| compile time) // |-----------------------------------| <- sp // | | Lower address // // // To access the data in a frame, at-compile time, a constant offset must be // computable from one of the pointers (fp, bp, sp) to access it. The size // of the areas with a dotted background cannot be computed at compile-time // if they are present, making it required to have all three of fp, bp and // sp to be set up to be able to access all contents in the frame areas, // assuming all of the frame areas are non-empty. // // For most functions, some of the frame areas are empty. For those functions, // it may not be necessary to set up fp or bp: // * A base pointer is definitely needed when there are both VLAs and local // variables with more-than-default alignment requirements. // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // // For Darwin platforms the frame-record (fp, lr) is stored at the top of the // callee-saved area, since the unwind encoding does not allow for encoding // this dynamically and existing tools depend on this layout. For other // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved // area to allow SVE stack objects (allocated directly below the callee-saves, // if available) to be accessed directly from the framepointer. // The SVE spill/fill instructions have VL-scaled addressing modes such // as: // ldr z8, [fp, #-7 mul vl] // For SVE the size of the vector length (VL) is not known at compile-time, so // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this // layout, we don't need to add an unscaled offset to the framepointer before // accessing the SVE object in the frame. // // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads // or stores. // // Outgoing function arguments must be at the bottom of the stack frame when // calling another function. If we do not have variable-sized stack objects, we // can allocate a "reserved call frame" area at the bottom of the local // variable area, large enough for all outgoing calls. If we do have VLAs, then // the stack pointer must be decremented and incremented around each call to // make space for the arguments below the VLAs. // // FIXME: also explain the redzone concept. // // About stack hazards: Under some SME contexts, a coprocessor with its own // separate cache can used for FP operations. This can create hazards if the CPU // and the SME unit try to access the same area of memory, including if the // access is to an area of the stack. To try to alleviate this we attempt to // introduce extra padding into the stack frame between FP and GPR accesses, // controlled by the StackHazardSize option. Without changing the layout of the // stack frame in the diagram above, a stack object of size StackHazardSize is // added between GPR and FPR CSRs. Another is added to the stack objects // section, and stack objects are sorted so that FPR > Hazard padding slot > // GPRs (where possible). Unfortunately some things are not handled well (VLA // area, arguments on the stack, object with both GPR and FPR accesses), but if // those are controlled by the user then the entire stack frame becomes GPR at // the start/end with FPR in the middle, surrounded by Hazard padding. // // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 // //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <cassert> #include <cstdint> #include <iterator> #include <optional> #include <vector> usingnamespacellvm; #define DEBUG_TYPE … static cl::opt<bool> EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden); static cl::opt<bool> StackTaggingMergeSetTag( "stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden); static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden); cl::opt<bool> EnableHomogeneousPrologEpilog( "homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)")); // Stack hazard padding size. 0 = disabled. static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size", cl::init(0), cl::Hidden); // Stack hazard size for analysis remarks. StackHazardSize takes precedence. static cl::opt<unsigned> StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), cl::Hidden); // Whether to insert padding into non-streaming functions (for testing). static cl::opt<bool> StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", cl::init(false), cl::Hidden); static cl::opt<bool> DisableMultiVectorSpillFill( "aarch64-disable-multivector-spill-fill", cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden); STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); /// Returns how much of the incoming argument stack area (in bytes) we should /// clean up in an epilogue. For the C calling convention this will be 0, for /// guaranteed tail call conventions it can be positive (a normal return or a /// tail call to a function that uses less stack space for arguments) or /// negative (for a tail call to a function that needs more stack space than us /// for arguments). static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB) { … } static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); /// Returns true if a homogeneous prolog or epilog code can be emitted /// for the size optimization. If possible, a frame helper call is injected. /// When Exit block is given, this check is for epilog. bool AArch64FrameLowering::homogeneousPrologEpilog( MachineFunction &MF, MachineBasicBlock *Exit) const { … } /// Returns true if CSRs should be paired. bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const { … } /// This is the biggest offset to the stack pointer we can encode in aarch64 /// instructions (without using a separate calculation and a temp register). /// Note that the exception here are vector stores/loads which cannot encode any /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). static const unsigned DefaultSafeSPDisplacement = …; /// Look at each instruction that references stack frames and return the stack /// size limit beyond which some of these instructions will require a scratch /// register during their expansion later. static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { … } TargetStackID::Value AArch64FrameLowering::getStackIDForScalableVectors() const { … } /// Returns the size of the fixed object area (allocated next to sp on entry) /// On Win64 this may include a var args area and an UnwindHelp object for EH. static unsigned getFixedObjectSize(const MachineFunction &MF, const AArch64FunctionInfo *AFI, bool IsWin64, bool IsFunclet) { … } /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { … } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { … } /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { … } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. bool AArch64FrameLowering::hasReservedCallFrame( const MachineFunction &MF) const { … } MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { … } void AArch64FrameLowering::emitCalleeSavedGPRLocations( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { … } void AArch64FrameLowering::emitCalleeSavedSVELocations( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { … } static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, unsigned DwarfReg) { … } void AArch64FrameLowering::resetCFIToInitialState( MachineBasicBlock &MBB) const { … } static void emitCalleeSavedRestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool SVE) { … } void AArch64FrameLowering::emitCalleeSavedGPRRestores( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { … } void AArch64FrameLowering::emitCalleeSavedSVERestores( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { … } // Return the maximum possible number of bytes for `Size` due to the // architectural limit on the size of a SVE register. static int64_t upperBound(StackOffset Size) { … } void AArch64FrameLowering::allocateStackSpace( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset, bool FollowupAllocs) const { … } static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { … } void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, MachineBasicBlock &MBB) const { … } static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { … } // Find a scratch register that we can use at the start of the prologue to // re-align the stack pointer. We avoid using callee-save registers since they // may appear to be free when this is called from canUseAsPrologue (during // shrink wrapping), but then no longer be free when this is called from // emitPrologue. // // FIXME: This is a bit conservative, since in the above case we could use one // of the callee-save registers as a scratch temp to re-align the stack pointer, // but we would then have to make sure that we were in fact saving at least one // callee-save register in the prologue, which is additional complexity that // doesn't seem worth the benefit. static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { … } bool AArch64FrameLowering::canUseAsPrologue( const MachineBasicBlock &MBB) const { … } static bool windowsRequiresStackProbe(MachineFunction &MF, uint64_t StackSizeInBytes) { … } static bool needsWinCFI(const MachineFunction &MF) { … } bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, uint64_t StackBumpBytes) const { … } bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( MachineBasicBlock &MBB, unsigned StackBumpBytes) const { … } // Given a load or a store instruction, generate an appropriate unwinding SEH // code on Windows. static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, MachineInstr::MIFlag Flag) { … } // Fix up the SEH opcode associated with the save/restore instruction. static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize) { … } bool requiresGetVGCall(MachineFunction &MF) { … } static bool requiresSaveVG(MachineFunction &MF) { … } bool isVGInstruction(MachineBasicBlock::iterator MBBI) { … } // Convert callee-save register save/restore instruction to do stack pointer // decrement/increment to allocate/deallocate the callee-save stack area by // converting store/load to use pre/post increment version. static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup, int CFAOffset = 0) { … } // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI) { … } static bool isTargetWindows(const MachineFunction &MF) { … } // Convenience function to determine whether I is an SVE callee save. static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { … } static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool NeedsWinCFI, bool NeedsUnwindInfo) { … } static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL) { … } // Define the current CFA rule to use the provided FP. static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned FixedObject) { … } #ifndef NDEBUG /// Collect live registers from the end of \p MI's parent up to (including) \p /// MI in \p LiveRegs. static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs) { MachineBasicBlock &MBB = *MI.getParent(); LiveRegs.addLiveOuts(MBB); for (const MachineInstr &MI : reverse(make_range(MI.getIterator(), MBB.instr_end()))) LiveRegs.stepBackward(MI); } #endif void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { … } static bool isFuncletReturnInstr(const MachineInstr &MI) { … } void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { … } bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const { … } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for /// debug info. It's the same as what we use for resolving the code-gen /// references for now. FIXME: This can go wrong when references are /// SP-relative and simple call frames aren't used. StackOffset AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const { … } StackOffset AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI) const { … } StackOffset AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const { … } static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) { … } static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset) { … } // TODO: This function currently does not work for scalable vectors. int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const { … } StackOffset AArch64FrameLowering::resolveFrameIndexReference( const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const { … } StackOffset AArch64FrameLowering::resolveFrameOffsetReference( const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, Register &FrameReg, bool PreferFP, bool ForSimm) const { … } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { … } static bool produceCompactUnwindFrame(MachineFunction &MF) { … } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, bool IsFirst, const TargetRegisterInfo *TRI) { … } /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. /// WindowsCFI requires that only consecutive registers can be paired. /// LR and FP need to be allocated together when the frame needs to save /// the frame-record. This means any other register pairing with LR is invalid. static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, bool IsFirst, const TargetRegisterInfo *TRI) { … } namespace { struct RegPairInfo { … }; } // end anonymous namespace unsigned findFreePredicateReg(BitVector &SavedRegs) { … } // The multivector LD/ST are available only for SME or SVE2p1 targets bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, MachineFunction &MF) { … } static void computeCalleeSaveRegisterPairs( MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs, bool NeedsFrameRecord) { … } bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { … } bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { … } // Return the FrameID for a MMO. static std::optional<int> getMMOFrameID(MachineMemOperand *MMO, const MachineFrameInfo &MFI) { … } // Return the FrameID for a Load/Store instruction by looking at the first MMO. static std::optional<int> getLdStFrameID(const MachineInstr &MI, const MachineFrameInfo &MFI) { … } // Check if a Hazard slot is needed for the current function, and if so create // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex, // which can be used to determine if any hazard padding is needed. void AArch64FrameLowering::determineStackHazardSlot( MachineFunction &MF, BitVector &SavedRegs) const { … } void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { … } bool AArch64FrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *RegInfo, std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const { … } bool AArch64FrameLowering::enableStackSlotScavenging( const MachineFunction &MF) const { … } /// returns true if there are any SVE callee saves. static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max) { … } // Process all the SVE stack objects and determine offsets for each // object. If AssignOffsets is true, the offsets get assigned. // Fills in the first and last callee-saved frame indices into // Min/MaxCSFrameIndex, respectively. // Returns the size of the stack. static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex, bool AssignOffsets) { … } int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( MachineFrameInfo &MFI) const { … } int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { … } void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { … } namespace { struct TagStoreInstr { … }; class TagStoreEdit { … }; void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) { … } void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) { … } // Check if *II is a register update that can be merged into STGloop that ends // at (Reg + Size). RemainingOffset is the required adjustment to Reg after the // end of the loop. bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg, int64_t Size, int64_t *TotalOffset) { … } void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE, SmallVectorImpl<MachineMemOperand *> &MemRefs) { … } void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, const AArch64FrameLowering *TFI, bool TryMergeSPUpdate) { … } bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset, int64_t &Size, bool &ZeroData) { … } // Detect a run of memory tagging instructions for adjacent stack frame slots, // and replace them with a shorter instruction sequence: // * replace STG + STG with ST2G // * replace STGloop + STGloop with STGloop // This code needs to run when stack slot offsets are already known, but before // FrameIndex operands in STG instructions are eliminated. MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, const AArch64FrameLowering *TFI, RegScavenger *RS) { … } } // namespace MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, const AArch64FrameLowering *TFI) { … } void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { … } /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP /// before the update. This is easily retrieved as it is exactly the offset /// that is set in processFunctionBeforeFrameFinalized. StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const { … } /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve /// the parent's frame pointer unsigned AArch64FrameLowering::getWinEHParentFrameOffset( const MachineFunction &MF) const { … } /// Funclets only need to account for space for the callee saved registers, /// as the locals are accounted for in the parent's stack frame. unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( const MachineFunction &MF) const { … } namespace { struct FrameObject { … }; class GroupBuilder { … }; bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { … } } // namespace void AArch64FrameLowering::orderFrameObjects( const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { … } /// Emit a loop to decrement SP until it is equal to TargetReg, with probes at /// least every ProbeSize bytes. Returns an iterator of the first instruction /// after the loop. The difference between SP and TargetReg must be an exact /// multiple of ProbeSize. MachineBasicBlock::iterator AArch64FrameLowering::inlineStackProbeLoopExactMultiple( MachineBasicBlock::iterator MBBI, int64_t ProbeSize, Register TargetReg) const { … } void AArch64FrameLowering::inlineStackProbeFixed( MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize, StackOffset CFAOffset) const { … } void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &MBB) const { … } struct StackAccess { … }; static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) { … } void AArch64FrameLowering::emitRemarks( const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const { … }