//===- ARM64.cpp ----------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Arch/ARM64Common.h" #include "InputFiles.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" #include "mach-o/compact_unwind_encoding.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" usingnamespacellvm; usingnamespacellvm::MachO; usingnamespacellvm::support::endian; usingnamespacelld; usingnamespacelld::macho; namespace { struct ARM64 : ARM64Common { … }; } // namespace // Random notes on reloc types: // ADDEND always pairs with BRANCH26, PAGE21, or PAGEOFF12 // POINTER_TO_GOT: ld64 supports a 4-byte pc-relative form as well as an 8-byte // absolute version of this relocation. The semantics of the absolute relocation // are weird -- it results in the value of the GOT slot being written, instead // of the address. Let's not support it unless we find a real-world use case. static constexpr std::array<RelocAttrs, 11> relocAttrsArray{ … }; static constexpr uint32_t stubCode[] = …; void ARM64::writeStub(uint8_t *buf8, const Symbol &sym, uint64_t pointerVA) const { … } static constexpr uint32_t stubHelperHeaderCode[] = …; void ARM64::writeStubHelperHeader(uint8_t *buf8) const { … } static constexpr uint32_t stubHelperEntryCode[] = …; void ARM64::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym, uint64_t entryVA) const { … } static constexpr uint32_t objcStubsFastCode[] = …; static constexpr uint32_t objcStubsSmallCode[] = …; void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, uint64_t &stubOffset, uint64_t selrefVA, Symbol *objcMsgSend) const { … } // A thunk is the relaxed variation of stubCode. We don't need the // extra indirection through a lazy pointer because the target address // is known at link time. static constexpr uint32_t thunkCode[] = …; void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) { … } // Just a single direct branch to the target function. static constexpr uint32_t icfSafeThunkCode[] = …; void ARM64::initICFSafeThunkBody(InputSection *thunk, InputSection *branchTarget) const { … } uint32_t ARM64::getICFSafeThunkSize() const { … } ARM64::ARM64() : … { … } namespace { struct Adrp { … }; struct Add { … }; enum ExtendType { … }; struct Ldr { … }; } // namespace static bool parseAdrp(uint32_t insn, Adrp &adrp) { … } static bool parseAdd(uint32_t insn, Add &add) { … } static bool parseLdr(uint32_t insn, Ldr &ldr) { … } static bool isValidAdrOffset(int32_t delta) { … } static void writeAdr(void *loc, uint32_t dest, int32_t delta) { … } static void writeNop(void *loc) { … } static bool isLiteralLdrEligible(const Ldr &ldr) { … } static void writeLiteralLdr(void *loc, const Ldr &ldr) { … } static bool isImmediateLdrEligible(const Ldr &ldr) { … } static void writeImmediateLdr(void *loc, const Ldr &ldr) { … } // Transforms a pair of adrp+add instructions into an adr instruction if the // target is within the +/- 1 MiB range allowed by the adr's 21 bit signed // immediate offset. // // adrp xN, _foo@PAGE // add xM, xN, _foo@PAGEOFF // -> // adr xM, _foo // nop static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec, uint64_t offset1, uint64_t offset2) { … } // Transforms two adrp instructions into a single adrp if their referent // addresses are located on the same 4096 byte page. // // adrp xN, _foo@PAGE // adrp xN, _bar@PAGE // -> // adrp xN, _foo@PAGE // nop static void applyAdrpAdrp(uint8_t *buf, const ConcatInputSection *isec, uint64_t offset1, uint64_t offset2) { … } // Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal) // load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB, // as ldr can encode a signed 19-bit offset that gets multiplied by 4. // // adrp xN, _foo@PAGE // ldr xM, [xN, _foo@PAGEOFF] // -> // nop // ldr xM, _foo static void applyAdrpLdr(uint8_t *buf, const ConcatInputSection *isec, uint64_t offset1, uint64_t offset2) { … } // GOT loads are emitted by the compiler as a pair of adrp and ldr instructions, // but they may be changed to adrp+add by relaxGotLoad(). This hint performs // the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed. static void applyAdrpLdrGot(uint8_t *buf, const ConcatInputSection *isec, uint64_t offset1, uint64_t offset2) { … } // Optimizes an adrp+add+ldr sequence used for loading from a local symbol's // address by loading directly if it's close enough, or to an adrp(p)+ldr // sequence if it's not. // // adrp x0, _foo@PAGE // add x1, x0, _foo@PAGEOFF // ldr x2, [x1, #off] static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec, uint64_t offset1, uint64_t offset2, uint64_t offset3) { … } // Relaxes a GOT-indirect load. // If the referenced symbol is external and its GOT entry is within +/- 1 MiB, // the GOT entry can be loaded with a single literal ldr instruction. // If the referenced symbol is local and thus has been relaxed to adrp+add+ldr, // we perform the AdrpAddLdr transformation. static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec, uint64_t offset1, uint64_t offset2, uint64_t offset3) { … } static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) { … } template <typename Callback> static void forEachHint(ArrayRef<uint8_t> data, Callback callback) { … } // On RISC architectures like arm64, materializing a memory address generally // takes multiple instructions. If the referenced symbol is located close enough // in memory, fewer instructions are needed. // // Linker optimization hints record where addresses are computed. After // addresses have been assigned, if possible, we change them to a shorter // sequence of instructions. The size of the binary is not modified; the // eliminated instructions are replaced with NOPs. This still leads to faster // code as the CPU can skip over NOPs quickly. // // LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which // points to a sequence of ULEB128-encoded numbers. Each entry specifies a // transformation kind, and 2 or 3 addresses where the instructions are located. void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const { … } TargetInfo *macho::createARM64TargetInfo() { … }