ScriptParser.cpp | Explore in Territory

//===- ScriptParser.cpp ---------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains a recursive-descendent parser for linker scripts.
// Parsed results are stored to Config and Script global objects.
//
//===----------------------------------------------------------------------===//

#include "ScriptParser.h"
#include "Config.h"
#include "Driver.h"
#include "InputFiles.h"
#include "LinkerScript.h"
#include "OutputSections.h"
#include "ScriptLexer.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Target.h"
#include "lld/Common/CommonLinkerContext.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/TimeProfiler.h"
#include <cassert>
#include <limits>
#include <optional>
#include <vector>

usingnamespacellvm;
usingnamespacellvm::ELF;
usingnamespacellvm::support::endian;
usingnamespacelld;
usingnamespacelld::elf;

namespace {
class ScriptParser final : ScriptLexer { … };
} // namespace

static StringRef unquote(StringRef s) { … }

// Some operations only support one non absolute value. Move the
// absolute one to the right hand side for convenience.
static void moveAbsRight(ExprValue &a, ExprValue &b) { … }

static ExprValue add(ExprValue a, ExprValue b) { … }

static ExprValue sub(ExprValue a, ExprValue b) { … }

static ExprValue bitAnd(ExprValue a, ExprValue b) { … }

static ExprValue bitXor(ExprValue a, ExprValue b) { … }

static ExprValue bitOr(ExprValue a, ExprValue b) { … }

void ScriptParser::readDynamicList() { … }

void ScriptParser::readVersionScript() { … }

void ScriptParser::readVersionScriptCommand() { … }

void ScriptParser::readVersion() { … }

void ScriptParser::readLinkerScript() { … }

void ScriptParser::readDefsym() { … }

void ScriptParser::readNoCrossRefs(bool to) { … }

void ScriptParser::addFile(StringRef s) { … }

void ScriptParser::readAsNeeded() { … }

void ScriptParser::readEntry() { … }

void ScriptParser::readExtern() { … }

void ScriptParser::readGroup() { … }

void ScriptParser::readInclude() { … }

void ScriptParser::readInput() { … }

void ScriptParser::readOutput() { … }

void ScriptParser::readOutputArch() { … }

static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) { … }

// Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(default, big, little). Choose
// big if -EB is specified, little if -EL is specified, or default if neither is
// specified.
void ScriptParser::readOutputFormat() { … }

void ScriptParser::readPhdrs() { … }

void ScriptParser::readRegionAlias() { … }

void ScriptParser::readSearchDir() { … }

// This reads an overlay description. Overlays are used to describe output
// sections that use the same virtual memory range and normally would trigger
// linker's sections sanity check failures.
// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() { … }

SectionClassDesc *ScriptParser::readSectionClassDescription() { … }

StringRef ScriptParser::readSectionClassName() { … }

void ScriptParser::readOverwriteSections() { … }

void ScriptParser::readSections() { … }

void ScriptParser::readTarget() { … }

static int precedence(StringRef op) { … }

StringMatcher ScriptParser::readFilePatterns() { … }

SortSectionPolicy ScriptParser::peekSortKind() { … }

SortSectionPolicy ScriptParser::readSortKind() { … }

// Reads SECTIONS command contents in the following form:
//
// <contents> ::= <elem>*
// <elem>     ::= <exclude>? <glob-pattern>
// <exclude>  ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
//
// For example,
//
// *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
//
// is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
// The semantics of that is section .foo in any file, section .bar in
// any file but a.o, and section .baz in any file but b.o.
SmallVector<SectionPattern, 0> ScriptParser::readInputSectionsList() { … }

// Reads contents of "SECTIONS" directive. That directive contains a
// list of glob patterns for input sections. The grammar is as follows.
//
// <patterns> ::= <section-list>
//              | <sort> "(" <section-list> ")"
//              | <sort> "(" <sort> "(" <section-list> ")" ")"
//
// <sort>     ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
//              | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
//
// <section-list> is parsed by readInputSectionsList().
InputSectionDescription *
ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags,
                                    uint64_t withoutFlags) { … }

InputSectionDescription *
ScriptParser::readInputSectionDescription(StringRef tok) { … }

void ScriptParser::readSort() { … }

Expr ScriptParser::readAssert() { … }

#define ECase …
constexpr std::pair<const char *, unsigned> typeMap[] = …;
#undef ECase

// Tries to read the special directive for an output section definition which
// can be one of following: "(NOLOAD)", "(COPY)", "(INFO)", "(OVERLAY)", and
// "(TYPE=<value>)".
bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok) { … }

// Reads an expression and/or the special directive for an output
// section definition. Directive is one of following: "(NOLOAD)",
// "(COPY)", "(INFO)" or "(OVERLAY)".
//
// An output section name can be followed by an address expression
// and/or directive. This grammar is not LL(1) because "(" can be
// interpreted as either the beginning of some expression or beginning
// of directive.
//
// https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
// https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
void ScriptParser::readSectionAddressType(OutputSection *cmd) { … }

static Expr checkAlignment(Expr e, std::string &loc) { … }

OutputDesc *ScriptParser::readOverlaySectionDescription() { … }

OutputDesc *ScriptParser::readOutputSectionDescription(StringRef outSec) { … }

// Reads a `=<fillexp>` expression and returns its value as a big-endian number.
// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
// We do not support using symbols in such expressions.
//
// When reading a hexstring, ld.bfd handles it as a blob of arbitrary
// size, while ld.gold always handles it as a 32-bit big-endian number.
// We are compatible with ld.gold because it's easier to implement.
// Also, we require that expressions with operators must be wrapped into
// round brackets. We did it to resolve the ambiguity when parsing scripts like:
// SECTIONS { .foo : { ... } =120+3 /DISCARD/ : { ... } }
std::array<uint8_t, 4> ScriptParser::readFill() { … }

SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) { … }

// Replace whitespace sequence (including \n) with one single space. The output
// is used by -Map.
static void squeezeSpaces(std::string &str) { … }

SymbolAssignment *ScriptParser::readAssignment(StringRef tok) { … }

StringRef ScriptParser::readName() { … }

SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) { … }

// This is an operator-precedence parser to parse a linker
// script expression.
Expr ScriptParser::readExpr() { … }

Expr ScriptParser::combine(StringRef op, Expr l, Expr r) { … }

// This is a part of the operator-precedence parser. This function
// assumes that the remaining token stream starts with an operator.
Expr ScriptParser::readExpr1(Expr lhs, int minPrec) { … }

Expr ScriptParser::getPageSize() { … }

Expr ScriptParser::readConstant() { … }

// Parses Tok as an integer. It recognizes hexadecimal (prefixed with
// "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
// have "K" (Ki) or "M" (Mi) suffixes.
static std::optional<uint64_t> parseInt(StringRef tok) { … }

ByteCommand *ScriptParser::readByteCommand(StringRef tok) { … }

static std::optional<uint64_t> parseFlag(StringRef tok) { … }

// Reads the '(' <flags> ')' list of section flags in
// INPUT_SECTION_FLAGS '(' <flags> ')' in the
// following form:
// <flags> ::= <flag>
//           | <flags> & flag
// <flag>  ::= Recognized Flag Name, or Integer value of flag.
// If the first character of <flag> is a ! then this means without flag,
// otherwise with flag.
// Example: SHF_EXECINSTR & !SHF_WRITE means with flag SHF_EXECINSTR and
// without flag SHF_WRITE.
std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() { … }

StringRef ScriptParser::readParenName() { … }

static void checkIfExists(const OutputSection &osec, StringRef location) { … }

static bool isValidSymbolName(StringRef s) { … }

Expr ScriptParser::readPrimary() { … }

Expr ScriptParser::readTernary(Expr cond) { … }

Expr ScriptParser::readParenExpr() { … }

SmallVector<StringRef, 0> ScriptParser::readOutputSectionPhdrs() { … }

// Read a program header type name. The next token must be a
// name of a program header type or a constant (e.g. "0x3").
unsigned ScriptParser::readPhdrType() { … }

// Reads an anonymous version declaration.
void ScriptParser::readAnonymousDeclaration() { … }

// Reads a non-anonymous version definition,
// e.g. "VerStr { global: foo; bar; local: *; };".
void ScriptParser::readVersionDeclaration(StringRef verStr) { … }

bool elf::hasWildcard(StringRef s) { … }

// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
std::pair<SmallVector<SymbolVersion, 0>, SmallVector<SymbolVersion, 0>>
ScriptParser::readSymbols() { … }

// Reads an "extern C++" directive, e.g.,
// "extern "C++" { ns::*; "f(int, double)"; };"
//
// The last semicolon is optional. E.g. this is OK:
// "extern "C++" { ns::*; "f(int, double)" };"
SmallVector<SymbolVersion, 0> ScriptParser::readVersionExtern() { … }

Expr ScriptParser::readMemoryAssignment(StringRef s1, StringRef s2,
                                        StringRef s3) { … }

// Parse the MEMORY command as specified in:
// https://sourceware.org/binutils/docs/ld/MEMORY.html
//
// MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
void ScriptParser::readMemory() { … }

// This function parses the attributes used to match against section
// flags when placing output sections in a memory region. These flags
// are only used when an explicit memory region name is not used.
void ScriptParser::readMemoryAttributes(uint32_t &flags, uint32_t &invFlags,
                                        uint32_t &negFlags,
                                        uint32_t &negInvFlags) { … }

void elf::readLinkerScript(MemoryBufferRef mb) { … }

void elf::readVersionScript(MemoryBufferRef mb) { … }

void elf::readDynamicList(MemoryBufferRef mb) { … }

void elf::readDefsym(MemoryBufferRef mb) { … }
llvm/lld/ELF/ScriptParser.cpp