//===- ExportTrie.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This is a partial implementation of the Mach-O export trie format. It's // essentially a symbol table encoded as a compressed prefix trie, meaning that // the common prefixes of each symbol name are shared for a more compact // representation. The prefixes are stored on the edges of the trie, and one // edge can represent multiple characters. For example, given two exported // symbols _bar and _baz, we will have a trie like this (terminal nodes are // marked with an asterisk): // // +-+-+ // | | // root node // +-+-+ // | // | _ba // | // +-+-+ // | | // +-+-+ // r / \ z // / \ // +-+-+ +-+-+ // | * | | * | // +-+-+ +-+-+ // // More documentation of the format can be found in // llvm/tools/obj2yaml/macho2yaml.cpp. // //===----------------------------------------------------------------------===// #include "ExportTrie.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/LEB128.h" #include <optional> usingnamespacellvm; usingnamespacelld; usingnamespacelld::macho; namespace { struct Edge { … }; struct ExportInfo { … }; } // namespace struct macho::TrieNode { … }; // For regular symbols, the node layout (excluding the children) is // // uleb128 terminalSize; // uleb128 flags; // uleb128 address; // // For re-exported symbols, the layout is // // uleb128 terminalSize; // uleb128 flags; // uleb128 ordinal; // char[] originalName; // // If libfoo.dylib is linked against libbar.dylib, and libfoo exports an alias // _foo to a symbol _bar in libbar, then originalName will be "_bar". If libfoo // re-exports _bar directly (i.e. not via an alias), then originalName will be // the empty string. // // TODO: Support aliased re-exports. (Since we don't yet support these, // originalName will always be the empty string.) // // For stub-and-resolver nodes, the layout is // // uleb128 terminalSize; // uleb128 flags; // uleb128 stubAddress; // uleb128 resolverAddress; // // TODO: Support stub-and-resolver nodes. uint32_t TrieNode::getTerminalSize() const { … } bool TrieNode::updateOffset(size_t &nextOffset) { … } void TrieNode::writeTo(uint8_t *buf) const { … } TrieBuilder::~TrieBuilder() { … } TrieNode *TrieBuilder::makeNode() { … } static int charAt(const Symbol *sym, size_t pos) { … } // Build the trie by performing a three-way radix quicksort: We start by sorting // the strings by their first characters, then sort the strings with the same // first characters by their second characters, and so on recursively. Each // time the prefixes diverge, we add a node to the trie. // // node: The most recently created node along this path in the trie (i.e. // the furthest from the root.) // lastPos: The prefix length of the most recently created node, i.e. the number // of characters along its path from the root. // pos: The string index we are currently sorting on. Note that each symbol // S contained in vec has the same prefix S[0...pos). void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec, TrieNode *node, size_t lastPos, size_t pos) { … } size_t TrieBuilder::build() { … } void TrieBuilder::writeTo(uint8_t *buf) const { … } namespace { // Parse a serialized trie and invoke a callback for each entry. class TrieParser { … }; } // namespace void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) { … } void macho::parseTrie(const uint8_t *buf, size_t size, const TrieEntryCallback &callback) { … }