Writer.cpp | Explore in Territory

//===- Writer.cpp ---------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "Writer.h"
#include "COFFLinkerContext.h"
#include "CallGraphSort.h"
#include "Config.h"
#include "DLL.h"
#include "InputFiles.h"
#include "LLDMapFile.h"
#include "MapFile.h"
#include "PDB.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "lld/Common/Timer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <cstdio>
#include <map>
#include <memory>
#include <utility>

usingnamespacellvm;
usingnamespacellvm::COFF;
usingnamespacellvm::object;
usingnamespacellvm::support;
usingnamespacellvm::support::endian;
usingnamespacelld;
usingnamespacelld::coff;

/* To re-generate DOSProgram:
$ cat > /tmp/DOSProgram.asm
org 0
        ; Copy cs to ds.
        push cs
        pop ds
        ; Point ds:dx at the $-terminated string.
        mov dx, str
        ; Int 21/AH=09h: Write string to standard output.
        mov ah, 0x9
        int 0x21
        ; Int 21/AH=4Ch: Exit with return code (in AL).
        mov ax, 0x4C01
        int 0x21
str:
        db 'This program cannot be run in DOS mode.$'
align 8, db 0
$ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin
$ xxd -i /tmp/DOSProgram.bin
*/
static unsigned char dosProgram[] = …;
static_assert …;

static const int dosStubSize = …;
static_assert …;

static const int numberOfDataDirectory = …;

namespace {

class DebugDirectoryChunk : public NonSectionChunk { … };

class CVDebugRecordChunk : public NonSectionChunk { … };

class ExtendedDllCharacteristicsChunk : public NonSectionChunk { … };

// PartialSection represents a group of chunks that contribute to an
// OutputSection. Collating a collection of PartialSections of same name and
// characteristics constitutes the OutputSection.
class PartialSectionKey { … };

struct ChunkRange { … };

// The writer writes a SymbolTable result to a file.
class Writer { … };
} // anonymous namespace

void lld::coff::writeResult(COFFLinkerContext &ctx) { … }

void OutputSection::addChunk(Chunk *c) { … }

void OutputSection::insertChunkAtStart(Chunk *c) { … }

void OutputSection::setPermissions(uint32_t c) { … }

void OutputSection::merge(OutputSection *other) { … }

// Write the section header to a given buffer.
void OutputSection::writeHeaderTo(uint8_t *buf, bool isDebug) { … }

void OutputSection::addContributingPartialSection(PartialSection *sec) { … }

// Check whether the target address S is in range from a relocation
// of type relType at address P.
bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
                       MachineTypes machine) { … }

// Return the last thunk for the given target if it is in range,
// or create a new one.
std::pair<Defined *, bool>
Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target,
                 uint64_t p, uint16_t type, int margin, MachineTypes machine) { … }

// This checks all relocations, and for any relocation which isn't in range
// it adds a thunk after the section chunk that contains the relocation.
// If the latest thunk for the specific target is in range, that is used
// instead of creating a new thunk. All range checks are done with the
// specified margin, to make sure that relocations that originally are in
// range, but only barely, also get thunks - in case other added thunks makes
// the target go out of range.
//
// After adding thunks, we verify that all relocations are in range (with
// no extra margin requirements). If this failed, we restart (throwing away
// the previously created thunks) and retry with a wider margin.
bool Writer::createThunks(OutputSection *os, int margin) { … }

// Create a code map for CHPE metadata.
void Writer::createECCodeMap() { … }

// Verify that all relocations are in range, with no extra margin requirements.
bool Writer::verifyRanges(const std::vector<Chunk *> chunks) { … }

// Assign addresses and add thunks if necessary.
void Writer::finalizeAddresses() { … }

void Writer::writePEChecksum() { … }

// The main function of the writer.
void Writer::run() { … }

static StringRef getOutputSectionName(StringRef name) { … }

// For /order.
void Writer::sortBySectionOrder(std::vector<Chunk *> &chunks) { … }

// Change the characteristics of existing PartialSections that belong to the
// section Name to Chars.
void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) { … }

// Sort concrete section chunks from GNU import libraries.
//
// GNU binutils doesn't use short import files, but instead produces import
// libraries that consist of object files, with section chunks for the .idata$*
// sections. These are linked just as regular static libraries. Each import
// library consists of one header object, one object file for every imported
// symbol, and one trailer object. In order for the .idata tables/lists to
// be formed correctly, the section chunks within each .idata$* section need
// to be grouped by library, and sorted alphabetically within each library
// (which makes sure the header comes first and the trailer last).
bool Writer::fixGnuImportChunks() { … }

// Add generated idata chunks, for imported symbols and DLLs, and a
// terminator in .idata$2.
void Writer::addSyntheticIdata() { … }

void Writer::appendECImportTables() { … }

// Locate the first Chunk and size of the import directory list and the
// IAT.
void Writer::locateImportTables() { … }

// Return whether a SectionChunk's suffix (the dollar and any trailing
// suffix) should be removed and sorted into the main suffixless
// PartialSection.
static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name,
                                     bool isMinGW) { … }

void Writer::sortSections() { … }

// Create output section objects and add them to OutputSections.
void Writer::createSections() { … }

void Writer::createMiscChunks() { … }

// Create .idata section for the DLL-imported symbol table.
// The format of this section is inherently Windows-specific.
// IdataContents class abstracted away the details for us,
// so we just let it create chunks and add them to the section.
void Writer::createImportTables() { … }

void Writer::appendImportThunks() { … }

void Writer::createExportTable() { … }

void Writer::removeUnusedSections() { … }

// The Windows loader doesn't seem to like empty sections,
// so we remove them if any.
void Writer::removeEmptySections() { … }

void Writer::assignOutputSectionIndices() { … }

size_t Writer::addEntryToStringTable(StringRef str) { … }

std::optional<coff_symbol16> Writer::createSymbol(Defined *def) { … }

void Writer::createSymbolAndStringTable() { … }

void Writer::mergeSections() { … }

// EC targets may have chunks of various architectures mixed together at this
// point. Group code chunks of the same architecture together by sorting chunks
// by their EC range type.
void Writer::sortECChunks() { … }

// Visits all sections to assign incremental, non-overlapping RVAs and
// file offsets.
void Writer::assignAddresses() { … }

template <typename PEHeaderTy> void Writer::writeHeader() { … }

void Writer::openFile(StringRef path) { … }

void Writer::createSEHTable() { … }

// Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set
// cannot contain duplicates. Therefore, the set is uniqued by Chunk and the
// symbol's offset into that Chunk.
static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) { … }

// Given a symbol, add it to the GFIDs table if it is a live, defined, function
// symbol in an executable section.
static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms,
                                         Symbol *s) { … }

// Visit all relocations from all section contributions of this object file and
// mark the relocation target as address-taken.
void Writer::markSymbolsWithRelocations(ObjFile *file,
                                        SymbolRVASet &usedSymbols) { … }

// Create the guard function id table. This is a table of RVAs of all
// address-taken functions. It is sorted and uniqued, just like the safe SEH
// table.
void Writer::createGuardCFTables() { … }

// Take a list of input sections containing symbol table indices and add those
// symbols to a vector. The challenge is that symbol RVAs are not known and
// depend on the table size, so we can't directly build a set of integers.
void Writer::getSymbolsFromSections(ObjFile *file,
                                    ArrayRef<SectionChunk *> symIdxChunks,
                                    std::vector<Symbol *> &symbols) { … }

// Take a list of input sections containing symbol table indices and add those
// symbols to an RVA table.
void Writer::markSymbolsForRVATable(ObjFile *file,
                                    ArrayRef<SectionChunk *> symIdxChunks,
                                    SymbolRVASet &tableSymbols) { … }

// Replace the absolute table symbol with a synthetic symbol pointing to
// tableChunk so that we can emit base relocations for it and resolve section
// relative relocations.
void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
                              StringRef countSym, bool hasFlag) { … }

// Create CHPE metadata chunks.
void Writer::createECChunks() { … }

// MinGW specific. Gather all relocations that are imported from a DLL even
// though the code didn't expect it to, produce the table that the runtime
// uses for fixing them up, and provide the synthetic symbols that the
// runtime uses for finding the table.
void Writer::createRuntimePseudoRelocs() { … }

// MinGW specific.
// The MinGW .ctors and .dtors lists have sentinels at each end;
// a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end.
// There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__
// and __DTOR_LIST__ respectively.
void Writer::insertCtorDtorSymbols() { … }

// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() { … }

// Set symbols used by ARM64EC metadata.
void Writer::setECSymbols() { … }

// Write section contents to a mmap'ed file.
void Writer::writeSections() { … }

void Writer::writeBuildId() { … }

// Sort .pdata section contents according to PE/COFF spec 5.5.
template <typename T>
void Writer::sortExceptionTable(ChunkRange &exceptionTable) { … }

// Sort .pdata section contents according to PE/COFF spec 5.5.
void Writer::sortExceptionTables() { … }

// The CRT section contains, among other things, the array of function
// pointers that initialize every global variable that is not trivially
// constructed. The CRT calls them one after the other prior to invoking
// main().
//
// As per C++ spec, 3.6.2/2.3,
// "Variables with ordered initialization defined within a single
// translation unit shall be initialized in the order of their definitions
// in the translation unit"
//
// It is therefore critical to sort the chunks containing the function
// pointers in the order that they are listed in the object file (top to
// bottom), otherwise global objects might not be initialized in the
// correct order.
void Writer::sortCRTSectionChunks(std::vector<Chunk *> &chunks) { … }

OutputSection *Writer::findSection(StringRef name) { … }

uint32_t Writer::getSizeOfInitializedData() { … }

// Add base relocations to .reloc section.
void Writer::addBaserels() { … }

// Add addresses to .reloc section. Note that addresses are grouped by page.
void Writer::addBaserelBlocks(std::vector<Baserel> &v) { … }

PartialSection *Writer::createPartialSection(StringRef name,
                                             uint32_t outChars) { … }

PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) { … }

void Writer::fixTlsAlignment() { … }

void Writer::prepareLoadConfig() { … }

template <typename T> void Writer::prepareLoadConfig(T *loadConfig) { … }

template <typename T>
void Writer::checkLoadConfigGuardData(const T *loadConfig) { … }
llvm/lld/COFF/Writer.cpp