//===- ConcatOutputSection.cpp --------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ConcatOutputSection.h" #include "Config.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/CommonLinkerContext.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/TimeProfiler.h" usingnamespacellvm; usingnamespacellvm::MachO; usingnamespacelld; usingnamespacelld::macho; MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections; void ConcatOutputSection::addInput(ConcatInputSection *input) { … } // Branch-range extension can be implemented in two ways, either through ... // // (1) Branch islands: Single branch instructions (also of limited range), // that might be chained in multiple hops to reach the desired // destination. On ARM64, as 16 branch islands are needed to hop between // opposite ends of a 2 GiB program. LD64 uses branch islands exclusively, // even when it needs excessive hops. // // (2) Thunks: Instruction(s) to load the destination address into a scratch // register, followed by a register-indirect branch. Thunks are // constructed to reach any arbitrary address, so need not be // chained. Although thunks need not be chained, a program might need // multiple thunks to the same destination distributed throughout a large // program so that all call sites can have one within range. // // The optimal approach is to mix islands for destinations within two hops, // and use thunks for destinations at greater distance. For now, we only // implement thunks. TODO: Adding support for branch islands! // // Internally -- as expressed in LLD's data structures -- a // branch-range-extension thunk consists of: // // (1) new Defined symbol for the thunk named // <FUNCTION>.thunk.<SEQUENCE>, which references ... // (2) new InputSection, which contains ... // (3.1) new data for the instructions to load & branch to the far address + // (3.2) new Relocs on instructions to load the far address, which reference ... // (4.1) existing Defined symbol for the real function in __text, or // (4.2) existing DylibSymbol for the real function in a dylib // // Nearly-optimal thunk-placement algorithm features: // // * Single pass: O(n) on the number of call sites. // // * Accounts for the exact space overhead of thunks - no heuristics // // * Exploits the full range of call instructions - forward & backward // // Data: // // * DenseMap<Symbol *, ThunkInfo> thunkMap: Maps the function symbol // to its thunk bookkeeper. // // * struct ThunkInfo (bookkeeper): Call instructions have limited range, and // distant call sites might be unable to reach the same thunk, so multiple // thunks are necessary to serve all call sites in a very large program. A // thunkInfo stores state for all thunks associated with a particular // function: // (a) thunk symbol // (b) input section containing stub code, and // (c) sequence number for the active thunk incarnation. // When an old thunk goes out of range, we increment the sequence number and // create a new thunk named <FUNCTION>.thunk.<SEQUENCE>. // // * A thunk consists of // (a) a Defined symbol pointing to // (b) an InputSection holding machine code (similar to a MachO stub), and // (c) relocs referencing the real function for fixing up the stub code. // // * std::vector<InputSection *> MergedInputSection::thunks: A vector parallel // to the inputs vector. We store new thunks via cheap vector append, rather // than costly insertion into the inputs vector. // // Control Flow: // // * During address assignment, MergedInputSection::finalize() examines call // sites by ascending address and creates thunks. When a function is beyond // the range of a call site, we need a thunk. Place it at the largest // available forward address from the call site. Call sites increase // monotonically and thunks are always placed as far forward as possible; // thus, we place thunks at monotonically increasing addresses. Once a thunk // is placed, it and all previous input-section addresses are final. // // * ConcatInputSection::finalize() and ConcatInputSection::writeTo() merge // the inputs and thunks vectors (both ordered by ascending address), which // is simple and cheap. DenseMap<Symbol *, ThunkInfo> lld::macho::thunkMap; // Determine whether we need thunks, which depends on the target arch -- RISC // (i.e., ARM) generally does because it has limited-range branch/call // instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs // thunks for programs so large that branch source & destination addresses // might differ more than the range of branch instruction(s). bool TextOutputSection::needsThunks() const { … } // Since __stubs is placed after __text, we must estimate the address // beyond which stubs are within range of a simple forward branch. // This is called exactly once, when the last input section has been finalized. uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const { … } void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) { … } void ConcatOutputSection::finalizeContents() { … } void TextOutputSection::finalize() { … } void ConcatOutputSection::writeTo(uint8_t *buf) const { … } void TextOutputSection::writeTo(uint8_t *buf) const { … } void ConcatOutputSection::finalizeFlags(InputSection *input) { … } ConcatOutputSection * ConcatOutputSection::getOrCreateForInput(const InputSection *isec) { … } NamePair macho::maybeRenameSection(NamePair key) { … }