//===- Writer.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Writer.h" #include "COFFLinkerContext.h" #include "CallGraphSort.h" #include "Config.h" #include "DLL.h" #include "InputFiles.h" #include "LLDMapFile.h" #include "MapFile.h" #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include <algorithm> #include <cstdio> #include <map> #include <memory> #include <utility> usingnamespacellvm; usingnamespacellvm::COFF; usingnamespacellvm::object; usingnamespacellvm::support; usingnamespacellvm::support::endian; usingnamespacelld; usingnamespacelld::coff; /* To re-generate DOSProgram: $ cat > /tmp/DOSProgram.asm org 0 ; Copy cs to ds. push cs pop ds ; Point ds:dx at the $-terminated string. mov dx, str ; Int 21/AH=09h: Write string to standard output. mov ah, 0x9 int 0x21 ; Int 21/AH=4Ch: Exit with return code (in AL). mov ax, 0x4C01 int 0x21 str: db 'This program cannot be run in DOS mode.$' align 8, db 0 $ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin $ xxd -i /tmp/DOSProgram.bin */ static unsigned char dosProgram[] = …; static_assert …; static const int dosStubSize = …; static_assert …; static const int numberOfDataDirectory = …; namespace { class DebugDirectoryChunk : public NonSectionChunk { … }; class CVDebugRecordChunk : public NonSectionChunk { … }; class ExtendedDllCharacteristicsChunk : public NonSectionChunk { … }; // PartialSection represents a group of chunks that contribute to an // OutputSection. Collating a collection of PartialSections of same name and // characteristics constitutes the OutputSection. class PartialSectionKey { … }; struct ChunkRange { … }; // The writer writes a SymbolTable result to a file. class Writer { … }; } // anonymous namespace void lld::coff::writeResult(COFFLinkerContext &ctx) { … } void OutputSection::addChunk(Chunk *c) { … } void OutputSection::insertChunkAtStart(Chunk *c) { … } void OutputSection::setPermissions(uint32_t c) { … } void OutputSection::merge(OutputSection *other) { … } // Write the section header to a given buffer. void OutputSection::writeHeaderTo(uint8_t *buf, bool isDebug) { … } void OutputSection::addContributingPartialSection(PartialSection *sec) { … } // Check whether the target address S is in range from a relocation // of type relType at address P. bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin, MachineTypes machine) { … } // Return the last thunk for the given target if it is in range, // or create a new one. std::pair<Defined *, bool> Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target, uint64_t p, uint16_t type, int margin, MachineTypes machine) { … } // This checks all relocations, and for any relocation which isn't in range // it adds a thunk after the section chunk that contains the relocation. // If the latest thunk for the specific target is in range, that is used // instead of creating a new thunk. All range checks are done with the // specified margin, to make sure that relocations that originally are in // range, but only barely, also get thunks - in case other added thunks makes // the target go out of range. // // After adding thunks, we verify that all relocations are in range (with // no extra margin requirements). If this failed, we restart (throwing away // the previously created thunks) and retry with a wider margin. bool Writer::createThunks(OutputSection *os, int margin) { … } // Create a code map for CHPE metadata. void Writer::createECCodeMap() { … } // Verify that all relocations are in range, with no extra margin requirements. bool Writer::verifyRanges(const std::vector<Chunk *> chunks) { … } // Assign addresses and add thunks if necessary. void Writer::finalizeAddresses() { … } void Writer::writePEChecksum() { … } // The main function of the writer. void Writer::run() { … } static StringRef getOutputSectionName(StringRef name) { … } // For /order. void Writer::sortBySectionOrder(std::vector<Chunk *> &chunks) { … } // Change the characteristics of existing PartialSections that belong to the // section Name to Chars. void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) { … } // Sort concrete section chunks from GNU import libraries. // // GNU binutils doesn't use short import files, but instead produces import // libraries that consist of object files, with section chunks for the .idata$* // sections. These are linked just as regular static libraries. Each import // library consists of one header object, one object file for every imported // symbol, and one trailer object. In order for the .idata tables/lists to // be formed correctly, the section chunks within each .idata$* section need // to be grouped by library, and sorted alphabetically within each library // (which makes sure the header comes first and the trailer last). bool Writer::fixGnuImportChunks() { … } // Add generated idata chunks, for imported symbols and DLLs, and a // terminator in .idata$2. void Writer::addSyntheticIdata() { … } // Locate the first Chunk and size of the import directory list and the // IAT. void Writer::locateImportTables() { … } // Return whether a SectionChunk's suffix (the dollar and any trailing // suffix) should be removed and sorted into the main suffixless // PartialSection. static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name, bool isMinGW) { … } void Writer::sortSections() { … } // Create output section objects and add them to OutputSections. void Writer::createSections() { … } void Writer::createMiscChunks() { … } // Create .idata section for the DLL-imported symbol table. // The format of this section is inherently Windows-specific. // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { … } void Writer::appendImportThunks() { … } void Writer::createExportTable() { … } void Writer::removeUnusedSections() { … } // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { … } void Writer::assignOutputSectionIndices() { … } size_t Writer::addEntryToStringTable(StringRef str) { … } std::optional<coff_symbol16> Writer::createSymbol(Defined *def) { … } void Writer::createSymbolAndStringTable() { … } void Writer::mergeSections() { … } // EC targets may have chunks of various architectures mixed together at this // point. Group code chunks of the same architecture together by sorting chunks // by their EC range type. void Writer::sortECChunks() { … } // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { … } template <typename PEHeaderTy> void Writer::writeHeader() { … } void Writer::openFile(StringRef path) { … } void Writer::createSEHTable() { … } // Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set // cannot contain duplicates. Therefore, the set is uniqued by Chunk and the // symbol's offset into that Chunk. static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) { … } // Given a symbol, add it to the GFIDs table if it is a live, defined, function // symbol in an executable section. static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms, Symbol *s) { … } // Visit all relocations from all section contributions of this object file and // mark the relocation target as address-taken. void Writer::markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols) { … } // Create the guard function id table. This is a table of RVAs of all // address-taken functions. It is sorted and uniqued, just like the safe SEH // table. void Writer::createGuardCFTables() { … } // Take a list of input sections containing symbol table indices and add those // symbols to a vector. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. void Writer::getSymbolsFromSections(ObjFile *file, ArrayRef<SectionChunk *> symIdxChunks, std::vector<Symbol *> &symbols) { … } // Take a list of input sections containing symbol table indices and add those // symbols to an RVA table. void Writer::markSymbolsForRVATable(ObjFile *file, ArrayRef<SectionChunk *> symIdxChunks, SymbolRVASet &tableSymbols) { … } // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym, bool hasFlag) { … } // Create CHPE metadata chunks. void Writer::createECChunks() { … } // MinGW specific. Gather all relocations that are imported from a DLL even // though the code didn't expect it to, produce the table that the runtime // uses for fixing them up, and provide the synthetic symbols that the // runtime uses for finding the table. void Writer::createRuntimePseudoRelocs() { … } // MinGW specific. // The MinGW .ctors and .dtors lists have sentinels at each end; // a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end. // There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__ // and __DTOR_LIST__ respectively. void Writer::insertCtorDtorSymbols() { … } // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { … } // Set symbols used by ARM64EC metadata. void Writer::setECSymbols() { … } // Write section contents to a mmap'ed file. void Writer::writeSections() { … } void Writer::writeBuildId() { … } // Sort .pdata section contents according to PE/COFF spec 5.5. template <typename T> void Writer::sortExceptionTable(ChunkRange &exceptionTable) { … } // Sort .pdata section contents according to PE/COFF spec 5.5. void Writer::sortExceptionTables() { … } // The CRT section contains, among other things, the array of function // pointers that initialize every global variable that is not trivially // constructed. The CRT calls them one after the other prior to invoking // main(). // // As per C++ spec, 3.6.2/2.3, // "Variables with ordered initialization defined within a single // translation unit shall be initialized in the order of their definitions // in the translation unit" // // It is therefore critical to sort the chunks containing the function // pointers in the order that they are listed in the object file (top to // bottom), otherwise global objects might not be initialized in the // correct order. void Writer::sortCRTSectionChunks(std::vector<Chunk *> &chunks) { … } OutputSection *Writer::findSection(StringRef name) { … } uint32_t Writer::getSizeOfInitializedData() { … } // Add base relocations to .reloc section. void Writer::addBaserels() { … } // Add addresses to .reloc section. Note that addresses are grouped by page. void Writer::addBaserelBlocks(std::vector<Baserel> &v) { … } PartialSection *Writer::createPartialSection(StringRef name, uint32_t outChars) { … } PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) { … } void Writer::fixTlsAlignment() { … } void Writer::prepareLoadConfig() { … } template <typename T> void Writer::prepareLoadConfig(T *loadConfig) { … } template <typename T> void Writer::checkLoadConfigGuardData(const T *loadConfig) { … }