//===- SyntheticSections.h -------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H #define LLD_MACHO_SYNTHETIC_SECTIONS_H #include "Config.h" #include "ExportTrie.h" #include "InputSection.h" #include "OutputSection.h" #include "OutputSegment.h" #include "Target.h" #include "Writer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <unordered_map> namespace llvm { class DWARFUnit; } // namespace llvm namespace lld::macho { class Defined; class DylibSymbol; class LoadCommand; class ObjFile; class UnwindInfoSection; class SyntheticSection : public OutputSection { … }; // All sections in __LINKEDIT should inherit from this. class LinkEditSection : public SyntheticSection { … }; // The header of the Mach-O file, which must have a file offset of zero. class MachHeaderSection final : public SyntheticSection { … }; // A hidden section that exists solely for the purpose of creating the // __PAGEZERO segment, which is used to catch null pointer dereferences. class PageZeroSection final : public SyntheticSection { … }; // This is the base class for the GOT and TLVPointer sections, which are nearly // functionally identical -- they will both be populated by dyld with addresses // to non-lazily-loaded dylib symbols. The main difference is that the // TLVPointerSection stores references to thread-local variables. class NonLazyPointerSectionBase : public SyntheticSection { … }; class GotSection final : public NonLazyPointerSectionBase { … }; class TlvPointerSection final : public NonLazyPointerSectionBase { … }; struct Location { … }; // Stores rebase opcodes, which tell dyld where absolute addresses have been // encoded in the binary. If the binary is not loaded at its preferred address, // dyld has to rebase these addresses by adding an offset to them. class RebaseSection final : public LinkEditSection { … }; struct BindingEntry { … }; BindingsMap; // Stores bind opcodes for telling dyld which symbols to load non-lazily. class BindingSection final : public LinkEditSection { … }; // Stores bind opcodes for telling dyld which weak symbols need coalescing. // There are two types of entries in this section: // // 1) Non-weak definitions: This is a symbol definition that weak symbols in // other dylibs should coalesce to. // // 2) Weak bindings: These tell dyld that a given symbol reference should // coalesce to a non-weak definition if one is found. Note that unlike the // entries in the BindingSection, the bindings here only refer to these // symbols by name, but do not specify which dylib to load them from. class WeakBindingSection final : public LinkEditSection { … }; // The following sections implement lazy symbol binding -- very similar to the // PLT mechanism in ELF. // // ELF's .plt section is broken up into two sections in Mach-O: StubsSection // and StubHelperSection. Calls to functions in dylibs will end up calling into // StubsSection, which contains indirect jumps to addresses stored in the // LazyPointerSection (the counterpart to ELF's .plt.got). // // We will first describe how non-weak symbols are handled. // // At program start, the LazyPointerSection contains addresses that point into // one of the entry points in the middle of the StubHelperSection. The code in // StubHelperSection will push on the stack an offset into the // LazyBindingSection. The push is followed by a jump to the beginning of the // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder. // dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in // the GOT. // // The stub binder will look up the bind opcodes in the LazyBindingSection at // the given offset. The bind opcodes will tell the binder to update the // address in the LazyPointerSection to point to the symbol, so that subsequent // calls don't have to redo the symbol resolution. The binder will then jump to // the resolved symbol. // // With weak symbols, the situation is slightly different. Since there is no // "weak lazy" lookup, function calls to weak symbols are always non-lazily // bound. We emit both regular non-lazy bindings as well as weak bindings, in // order that the weak bindings may overwrite the non-lazy bindings if an // appropriate symbol is found at runtime. However, the bound addresses will // still be written (non-lazily) into the LazyPointerSection. // // Symbols are always bound eagerly when chained fixups are used. In that case, // StubsSection contains indirect jumps to addresses stored in the GotSection. // The GOT directly contains the fixup entries, which will be replaced by the // address of the target symbols on load. LazyPointerSection and // StubHelperSection are not used. class StubsSection final : public SyntheticSection { … }; class StubHelperSection final : public SyntheticSection { … }; class ObjCSelRefsHelper { … }; // Objective-C stubs are hoisted objc_msgSend calls per selector called in the // program. Apple Clang produces undefined symbols to each stub, such as // '_objc_msgSend$foo', which are then synthesized by the linker. The stubs // load the particular selector 'foo' from __objc_selrefs, setting it to the // first argument of the objc_msgSend call, and then jumps to objc_msgSend. The // actual stub contents are mirrored from ld64. class ObjCStubsSection final : public SyntheticSection { … }; // Note that this section may also be targeted by non-lazy bindings. In // particular, this happens when branch relocations target weak symbols. class LazyPointerSection final : public SyntheticSection { … }; class LazyBindingSection final : public LinkEditSection { … }; // Stores a trie that describes the set of exported symbols. class ExportSection final : public LinkEditSection { … }; // Stores 'data in code' entries that describe the locations of data regions // inside code sections. This is used by llvm-objdump to distinguish jump tables // and stop them from being disassembled as instructions. class DataInCodeSection final : public LinkEditSection { … }; // Stores ULEB128 delta encoded addresses of functions. class FunctionStartsSection final : public LinkEditSection { … }; // Stores the strings referenced by the symbol table. class StringTableSection final : public LinkEditSection { … }; struct SymtabEntry { … }; struct StabsEntry { … }; // Symbols of the same type must be laid out contiguously: we choose to emit // all local symbols first, then external symbols, and finally undefined // symbols. For each symbol type, the LC_DYSYMTAB load command will record the // range (start index and total number) of those symbols in the symbol table. class SymtabSection : public LinkEditSection { … }; template <class LP> SymtabSection *makeSymtabSection(StringTableSection &); // The indirect symbol table is a list of 32-bit integers that serve as indices // into the (actual) symbol table. The indirect symbol table is a // concatenation of several sub-arrays of indices, each sub-array belonging to // a separate section. The starting offset of each sub-array is stored in the // reserved1 header field of the respective section. // // These sub-arrays provide symbol information for sections that store // contiguous sequences of symbol references. These references can be pointers // (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g. // function stubs). class IndirectSymtabSection final : public LinkEditSection { … }; // The code signature comes at the very end of the linked output file. class CodeSignatureSection final : public LinkEditSection { … }; class CStringSection : public SyntheticSection { … }; class DeduplicatedCStringSection final : public CStringSection { … }; /* * This section contains deduplicated literal values. The 16-byte values are * laid out first, followed by the 8- and then the 4-byte ones. */ class WordLiteralSection final : public SyntheticSection { … }; class ObjCImageInfoSection final : public SyntheticSection { … }; // This section stores 32-bit __TEXT segment offsets of initializer functions. // // The compiler stores pointers to initializers in __mod_init_func. These need // to be fixed up at load time, which takes time and dirties memory. By // synthesizing InitOffsetsSection from them, this data can live in the // read-only __TEXT segment instead. This section is used by default when // chained fixups are enabled. // // There is no similar counterpart to __mod_term_func, as that section is // deprecated, and static destructors are instead handled by registering them // via __cxa_atexit from an autogenerated initializer function (see D121736). class InitOffsetsSection final : public SyntheticSection { … }; // This SyntheticSection is for the __objc_methlist section, which contains // relative method lists if the -objc_relative_method_lists option is enabled. class ObjCMethListSection final : public SyntheticSection { … }; // Chained fixups are a replacement for classic dyld opcodes. In this format, // most of the metadata necessary for binding symbols and rebasing addresses is // stored directly in the memory location that will have the fixup applied. // // The fixups form singly linked lists; each one covering a single page in // memory. The __LINKEDIT,__chainfixups section stores the page offset of the // first fixup of each page; the rest can be found by walking the chain using // the offset that is embedded in each entry. // // This setup allows pages to be relocated lazily at page-in time and without // being dirtied. The kernel can discard and load them again as needed. This // technique, called page-in linking, was introduced in macOS 13. // // The benefits of this format are: // - smaller __LINKEDIT segment, as most of the fixup information is stored in // the data segment // - faster startup, since not all relocations need to be done upfront // - slightly lower memory usage, as fewer pages are dirtied // // Userspace x86_64 and arm64 binaries have two types of fixup entries: // - Rebase entries contain an absolute address, to which the object's load // address will be added to get the final value. This is used for loading // the address of a symbol defined in the same binary. // - Binding entries are mostly used for symbols imported from other dylibs, // but for weakly bound and interposable symbols as well. They are looked up // by a (symbol name, library) pair stored in __chainfixups. This import // entry also encodes whether the import is weak (i.e. if the symbol is // missing, it should be set to null instead of producing a load error). // The fixup encodes an ordinal associated with the import, and an optional // addend. // // The entries are tightly packed 64-bit bitfields. One of the bits specifies // which kind of fixup to interpret them as. // // LLD generates the fixup data in 5 stages: // 1. While scanning relocations, we make a note of each location that needs // a fixup by calling addRebase() or addBinding(). During this, we assign // a unique ordinal for each (symbol name, library, addend) import tuple. // 2. After addresses have been assigned to all sections, and thus the memory // layout of the linked image is final; finalizeContents() is called. Here, // the page offsets of the chain start entries are calculated. // 3. ChainedFixupsSection::writeTo() writes the page start offsets and the // imports table to the output file. // 4. Each section's fixup entries are encoded and written to disk in // ConcatInputSection::writeTo(), but without writing the offsets that form // the chain. // 5. Finally, each page's (which might correspond to multiple sections) // fixups are linked together in Writer::buildFixupChains(). class ChainedFixupsSection final : public LinkEditSection { … }; void writeChainedRebase(uint8_t *buf, uint64_t targetVA); void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); struct InStruct { … }; extern InStruct in; extern std::vector<SyntheticSection *> syntheticSections; void createSyntheticSymbols(); } // namespace lld::macho #endif