llvm/lld/MachO/EhFrame.h

//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLD_MACHO_EH_FRAME_H
#define LLD_MACHO_EH_FRAME_H

#include "InputSection.h"
#include "Relocations.h"

#include "lld/Common/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"

/*
 * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
 * is closely coupled with other file parsing logic; EhFrame.h just contains a
 * few helpers.
 */

/*
 * === The EH frame format ===
 *
 * EH frames can either be Common Information Entries (CIEs) or Frame
 * Description Entries (FDEs). CIEs contain information that is common amongst
 * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
 * entries together form a forest of two-level trees, with CIEs as the roots
 * and FDEs as the leaves. Note that a CIE must precede the FDEs which point
 * to it.
 *
 * A CIE comprises the following fields in order:
 * 1.   Length of the entry (4 or 12 bytes)
 * 2.   CIE offset (4 bytes; always 0 for CIEs)
 * 3.   CIE version (byte)
 * 4.   Null-terminated augmentation string
 * 5-8. LEB128 values that we don't care about
 * 9.   Augmentation data, to be interpreted using the aug string
 * 10.  DWARF instructions (ignored by LLD)
 *
 * An FDE comprises of the following:
 * 1. Length of the entry (4 or 12 bytes)
 * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
 * 3. Function address (pointer-sized pcrel offset)
 * 4. (std::optional) Augmentation data length
 * 5. (std::optional) LSDA address (pointer-sized pcrel offset)
 * 6. DWARF instructions (ignored by LLD)
 */
namespace lld::macho {

class EhReader {};

// The EH frame format, when emitted by llvm-mc, consists of a number of
// "abs-ified" relocations, i.e. relocations that are implicitly encoded as
// pcrel offsets in the section data. The offsets refer to the locations of
// symbols in the input object file. When we ingest these EH frames, we convert
// these implicit relocations into explicit Relocs.
//
// These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
// However, we need this operation to be cross-platform, and ARM does not have a
// similar relocation that is applicable. We therefore use the more verbose (but
// more generic) subtractor relocation to encode these pcrel values. ld64
// appears to do something similar -- its `-r` output contains these explicit
// subtractor relocations.
class EhRelocator {};

} // namespace lld::macho

#endif