//===- ICF.cpp ------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ICF.h" #include "ConcatOutputSection.h" #include "Config.h" #include "InputSection.h" #include "SymbolTable.h" #include "Symbols.h" #include "UnwindInfoSection.h" #include "lld/Common/CommonLinkerContext.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include <atomic> usingnamespacellvm; usingnamespacelld; usingnamespacelld::macho; static constexpr bool verboseDiagnostics = …; class ICF { … }; ICF::ICF(std::vector<ConcatInputSection *> &inputs) { … } // ICF = Identical Code Folding // // We only fold __TEXT,__text, so this is really "code" folding, and not // "COMDAT" folding. String and scalar constant literals are deduplicated // elsewhere. // // Summary of segments & sections: // // The __TEXT segment is readonly at the MMU. Some sections are already // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are // synthetic and inherently free of duplicates (__TEXT,__stubs & // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, // because doing so induces many test failures. // // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and // thus ineligible for ICF. // // The __DATA_CONST segment is read/write at the MMU, but is logically const to // the application after dyld applies fixups to pointer data. We currently // fold only the __DATA_CONST,__cfstring section. // // The __DATA segment is read/write at the MMU, and as application-writeable // data, none of its sections are eligible for ICF. // // Please see the large block comment in lld/ELF/ICF.cpp for an explanation // of the segregation algorithm. // // FIXME(gkm): implement keep-unique attributes // FIXME(gkm): implement address-significance tables for MachO object files // Compare "non-moving" parts of two ConcatInputSections, namely everything // except references to other ConcatInputSections. bool ICF::equalsConstant(const ConcatInputSection *ia, const ConcatInputSection *ib) { … } // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not // handled by equalsConstant(). bool ICF::equalsVariable(const ConcatInputSection *ia, const ConcatInputSection *ib) { … } // Find the first InputSection after BEGIN whose equivalence class differs size_t ICF::findBoundary(size_t begin, size_t end) { … } // Invoke FUNC on subranges with matching equivalence class void ICF::forEachClassRange(size_t begin, size_t end, llvm::function_ref<void(size_t, size_t)> func) { … } // Given a range of identical icfInputs, replace address significant functions // with a thunk that is just a direct branch to the first function in the // series. This way we keep only one main body of the function but we still // retain the address uniqueness of relevant functions by having them be a // direct branch thunk rather than containing a full copy of the actual function // body. void ICF::applySafeThunksToRange(size_t begin, size_t end) { … } // Split icfInputs into shards, then parallelize invocation of FUNC on subranges // with matching equivalence class void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) { … } void ICF::run() { … } // Split an equivalence class into smaller classes. void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { … } void macho::markSymAsAddrSig(Symbol *s) { … } void macho::markAddrSigSymbols() { … } void macho::foldIdenticalSections(bool onlyCfStrings) { … }