UnwindInfoSection.cpp | Explore in Territory

//===- UnwindInfoSection.cpp ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "UnwindInfoSection.h"
#include "InputSection.h"
#include "Layout.h"
#include "OutputSection.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"

#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Parallel.h"

#include "mach-o/compact_unwind_encoding.h"

#include <numeric>

usingnamespacellvm;
usingnamespacellvm::MachO;
usingnamespacellvm::support::endian;
usingnamespacelld;
usingnamespacelld::macho;

#define COMMON_ENCODINGS_MAX …
#define COMPACT_ENCODINGS_MAX …

#define SECOND_LEVEL_PAGE_BYTES …
#define SECOND_LEVEL_PAGE_WORDS …
#define REGULAR_SECOND_LEVEL_ENTRIES_MAX …
#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX …

#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS …
#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK …

static_assert …;

constexpr uint64_t DWARF_SECTION_OFFSET = …;

// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
// optimizes space and exception-time lookup.  Most DWARF unwind
// entries can be replaced with Compact Unwind entries, but the ones
// that cannot are retained in DWARF form.
//
// This comment will address macro-level organization of the pre-link
// and post-link compact unwind tables. For micro-level organization
// pertaining to the bitfield layout of the 32-bit compact unwind
// entries, see libunwind/include/mach-o/compact_unwind_encoding.h
//
// Important clarifying factoids:
//
// * __LD,__compact_unwind is the compact unwind format for compiler
// output and linker input. It is never a final output. It could be
// an intermediate output with the `-r` option which retains relocs.
//
// * __TEXT,__unwind_info is the compact unwind format for final
// linker output. It is never an input.
//
// * __TEXT,__eh_frame is the DWARF format for both linker input and output.
//
// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd
// level) by ascending address, and the pages are referenced by an
// index (1st level) in the section header.
//
// * Following the headers in __TEXT,__unwind_info, the bulk of the
// section contains a vector of compact unwind entries
// `{functionOffset, encoding}` sorted by ascending `functionOffset`.
// Adjacent entries with the same encoding can be folded to great
// advantage, achieving a 3-order-of-magnitude reduction in the
// number of entries.
//
// Refer to the definition of unwind_info_section_header in
// compact_unwind_encoding.h for an overview of the format we are encoding
// here.

// TODO(gkm): how do we align the 2nd-level pages?

// The various fields in the on-disk representation of each compact unwind
// entry.
#define FOR_EACH_CU_FIELD …

CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD);

#undef FOR_EACH_CU_FIELD

// LLD's internal representation of a compact unwind entry.
struct CompactUnwindEntry { … };

EncodingMap;

struct SecondLevelPage { … };

// UnwindInfoSectionImpl allows us to avoid cluttering our header file with a
// lengthy definition of UnwindInfoSection.
class UnwindInfoSectionImpl final : public UnwindInfoSection { … };

UnwindInfoSection::UnwindInfoSection()
    : … { … }

// Record function symbols that may need entries emitted in __unwind_info, which
// stores unwind data for address ranges.
//
// Note that if several adjacent functions have the same unwind encoding and
// personality function and no LSDA, they share one unwind entry. For this to
// work, functions without unwind info need explicit "no unwind info" unwind
// entries -- else the unwinder would think they have the unwind info of the
// closest function with unwind info right before in the image. Thus, we add
// function symbols for each unique address regardless of whether they have
// associated unwind info.
void UnwindInfoSection::addSymbol(const Defined *d) { … }

void UnwindInfoSectionImpl::prepare() { … }

// Compact unwind relocations have different semantics, so we handle them in a
// separate code path from regular relocations. First, we do not wish to add
// rebase opcodes for __LD,__compact_unwind, because that section doesn't
// actually end up in the final binary. Second, personality pointers always
// reside in the GOT and must be treated specially.
void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { … }

Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) { … }

// We need to apply the relocations to the pre-link compact unwind section
// before converting it to post-link form. There should only be absolute
// relocations here: since we are not emitting the pre-link CU section, there
// is no source address to make a relative location meaningful.
void UnwindInfoSectionImpl::relocateCompactUnwind(
    std::vector<CompactUnwindEntry> &cuEntries) { … }

// There should only be a handful of unique personality pointers, so we can
// encode them as 2-bit indices into a small array.
void UnwindInfoSectionImpl::encodePersonalities() { … }

static bool canFoldEncoding(compact_unwind_encoding_t encoding) { … }

// Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame.
void UnwindInfoSectionImpl::finalize() { … }

// All inputs are relocated and output addresses are known, so write!

void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { … }

UnwindInfoSection *macho::makeUnwindInfoSection() { … }
llvm/lld/MachO/UnwindInfoSection.cpp