//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the ARM implementation of TargetFrameLowering class. // //===----------------------------------------------------------------------===// // // This file contains the ARM implementation of TargetFrameLowering class. // // On ARM, stack frames are structured as follows: // // The stack grows downward. // // All of the individual frame areas on the frame below are optional, i.e. it's // possible to create a function so that the particular area isn't present // in the frame. // // At function entry, the "frame" looks as follows: // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| <- sp // | | Lower address // // // After the prologue has run, the frame has the following general structure. // Technically the last frame area (VLAs) doesn't get created until in the // main function body, after the prologue is run. However, it's depicted here // for completeness. // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| <- (sp at function entry) // | | // | varargs from registers | // | | // |-----------------------------------| // | | // | prev_lr | // | prev_fp | // | (a.k.a. "frame record") | // | | // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11) // | | // | callee-saved gpr registers | // | | // |-----------------------------------| // | | // | callee-saved fp/simd regs | // | | // |-----------------------------------| // |.empty.space.to.make.part.below....| // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at // |.the.standard.8-byte.alignment.....| compile time; if present) // |-----------------------------------| // | | // | local variables of fixed size | // | including spill slots | // |-----------------------------------| <- base pointer (not defined by ABI, // |.variable-sized.local.variables....| LLVM chooses r6) // |.(VLAs)............................| (size of this area is unknown at // |...................................| compile time) // |-----------------------------------| <- sp // | | Lower address // // // To access the data in a frame, at-compile time, a constant offset must be // computable from one of the pointers (fp, bp, sp) to access it. The size // of the areas with a dotted background cannot be computed at compile-time // if they are present, making it required to have all three of fp, bp and // sp to be set up to be able to access all contents in the frame areas, // assuming all of the frame areas are non-empty. // // For most functions, some of the frame areas are empty. For those functions, // it may not be necessary to set up fp or bp: // * A base pointer is definitely needed when there are both VLAs and local // variables with more-than-default alignment requirements. // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads // or stores. // // The frame pointer might be chosen to be r7 or r11, depending on the target // architecture and operating system. See ARMSubtarget::getFramePointerReg for // details. // // Outgoing function arguments must be at the bottom of the stack frame when // calling another function. If we do not have variable-sized stack objects, we // can allocate a "reserved call frame" area at the bottom of the local // variable area, large enough for all outgoing calls. If we do have VLAs, then // the stack pointer must be decremented and incremented around each call to // make space for the arguments below the VLAs. // //===----------------------------------------------------------------------===// #include "ARMFrameLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "Utils/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> #include <iterator> #include <utility> #include <vector> #define DEBUG_TYPE … usingnamespacellvm; static cl::opt<bool> SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog")); static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs); ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) : … { … } bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const { … } /// Returns true if the target can safely skip saving callee-saved registers /// for noreturn nounwind functions. bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const { … } /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { … } /// isFPReserved - Return true if the frame pointer register should be /// considered a reserved register on the scope of the specified function. bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const { … } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { … } /// canSimplifyCallFramePseudos - If there is a reserved call frame, the /// call frame pseudos can be simplified. Unlike most targets, having a FP /// is not sufficient here since we still may reference some objects via SP /// even when FP is available in Thumb2 mode. bool ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { … } // Returns how much of the incoming argument stack area we should clean up in an // epilogue. For the C calling convention this will be 0, for guaranteed tail // call conventions it can be positive (a normal return or a tail call to a // function that uses less stack space for arguments) or negative (for a tail // call to a function that needs more stack space than us for arguments). static int getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB) { … } static bool needsWinCFI(const MachineFunction &MF) { … } // Given a load or a store instruction, generate an appropriate unwinding SEH // code on Windows. static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags) { … } static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) { … } static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags) { … } static void emitRegPlusImmediate( bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { … } static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { … } static int sizeOfSPAdjustment(const MachineInstr &MI) { … } static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes) { … } namespace { struct StackAdjustingInsts { … }; } // end anonymous namespace /// Emit an instruction sequence that will align the address in /// register Reg by zero-ing out the lower bits. For versions of the /// architecture that support Neon, this must be done in a single /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a /// single instruction. That function only gets called when optimizing /// spilling of D registers on a core with the Neon instruction set /// present. static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction) { … } /// We need the offset of the frame pointer relative to other MachineFrameInfo /// offsets which are encoded relative to SP at function begin. /// See also emitPrologue() for how the FP is set up. /// Unfortunately we cannot determine this value in determineCalleeSaves() yet /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use /// this to produce a conservative estimate that we check in an assert() later. static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF) { … } void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { … } void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { … } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for /// debug info. It's the same as what we use for resolving the code-gen /// references for now. FIXME: This can go wrong when references are /// SP-relative and simple call frames aren't used. StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const { … } int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const { … } void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef<CalleeSavedInfo> CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, bool (*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs, unsigned MIFlags) const { … } void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool (*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const { … } /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers /// starting from d8. Also insert stack realignment code and leave the stack /// pointer pointing to the d8 spill slot. static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) { … } /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an /// iterator to the following instruction. static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs) { … } /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers /// starting from d8. These instructions are assumed to execute while the /// stack is still aligned, unlike the code inserted by emitPopInst. static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) { … } bool ARMFrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { … } bool ARMFrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { … } // FIXME: Make generic? static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII) { … } /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these /// instructions will require a scratch register during their expansion later. // FIXME: Move to TII? static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI, bool &HasNonSPFrameIndex) { … } // In functions that realign the stack, it can be an advantage to spill the // callee-saved vector registers after realigning the stack. The vst1 and vld1 // instructions take alignment hints that can improve performance. static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { … } bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { … } bool ARMFrameLowering::requiresAAPCSFrameRecord( const MachineFunction &MF) const { … } // Thumb1 may require a spill when storing to a frame index through FP (or any // access with execute-only), for cases where FP is a high register (R11). This // scans the function for cases where this may happen. static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI) { … } void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { … } void ARMFrameLowering::updateLRRestored(MachineFunction &MF) { … } void ARMFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { … } void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const { … } bool ARMFrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { … } const TargetFrameLowering::SpillSlot * ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { … } MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { … } /// Get the minimum constant for ARM that is greater than or equal to the /// argument. In ARM, constants can have any value that can be produced by /// rotating an 8-bit value to the right by an even number of bits within a /// 32-bit word. static uint32_t alignToARMConstant(uint32_t Value) { … } // The stack limit in the TCB is set to this many bytes above the actual // stack limit. static const uint64_t kSplitStackAvailable = …; // Adjust the function prologue to enable split stacks. This currently only // supports android and linux. // // The ABI of the segmented stack prologue is a little arbitrarily chosen, but // must be well defined in order to allow for consistent implementations of the // __morestack helper function. The ABI is also not a normal ABI in that it // doesn't follow the normal calling conventions because this allows the // prologue of each function to be optimized further. // // Currently, the ABI looks like (when calling __morestack) // // * r4 holds the minimum stack size requested for this function call // * r5 holds the stack size of the arguments to the function // * the beginning of the function is 3 instructions after the call to // __morestack // // Implementations of __morestack should use r4 to allocate a new stack, r5 to // place the arguments on to the new stack, and the 3-instruction knowledge to // jump directly to the body of the function when working on the new stack. // // An old (and possibly no longer compatible) implementation of __morestack for // ARM can be found at [1]. // // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S void ARMFrameLowering::adjustForSegmentedStacks( MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { … }