//===- DebugTypes.cpp -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "DebugTypes.h" #include "COFFLinkerContext.h" #include "Chunks.h" #include "Driver.h" #include "InputFiles.h" #include "PDB.h" #include "TypeMerger.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/TimeProfiler.h" usingnamespacellvm; usingnamespacellvm::codeview; usingnamespacelld; usingnamespacelld::coff; namespace { class TypeServerIpiSource; // The TypeServerSource class represents a PDB type server, a file referenced by // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ // files, therefore there must be only once instance per OBJ lot. The file path // is discovered from the dependent OBJ's debug type stream. The // TypeServerSource object is then queued and loaded by the COFF Driver. The // debug type stream for such PDB files will be merged first in the final PDB, // before any dependent OBJ. class TypeServerSource : public TpiSource { … }; // Companion to TypeServerSource. Stores the index map for the IPI stream in the // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the // invariant of one type index space per source. class TypeServerIpiSource : public TpiSource { … }; // This class represents the debug type stream of an OBJ file that depends on a // PDB type server (see TypeServerSource). class UseTypeServerSource : public TpiSource { … }; // This class represents the debug type stream of a Microsoft precompiled // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output // PDB, before any other OBJs that depend on this. Note that only MSVC generate // such files, clang does not. class PrecompSource : public TpiSource { … }; // This class represents the debug type stream of an OBJ file that depends on a // Microsoft precompiled headers OBJ (see PrecompSource). class UsePrecompSource : public TpiSource { … }; } // namespace TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f) : … { … } // Vtable key method. TpiSource::~TpiSource() { … } TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) { … } TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx, PDBInputFile *pdbInputFile) { … } TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx, ObjFile *file, TypeServer2Record ts) { … } TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) { … } TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx, ObjFile *file, PrecompRecord precomp) { … } bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const { … } void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec, ArrayRef<TiReference> typeRefs) { … } void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) { … } bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) { … } // A COFF .debug$H section is currently a clang extension. This function checks // if a .debug$H section is in a format that we expect / understand, so that we // can ignore any sections which are coincidentally also named .debug$H but do // not contain a format we recognize. static bool canUseDebugH(ArrayRef<uint8_t> debugH) { … } static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) { … } static ArrayRef<GloballyHashedType> getHashesFromDebugH(ArrayRef<uint8_t> debugH) { … } // Merge .debug$T for a generic object file. Error TpiSource::mergeDebugT(TypeMerger *m) { … } // Merge types from a type server PDB. Error TypeServerSource::mergeDebugT(TypeMerger *m) { … } Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() { … } Error UseTypeServerSource::mergeDebugT(TypeMerger *m) { … } static bool equalsPath(StringRef path1, StringRef path2) { … } // Find by name an OBJ provided on the command line PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) { … } PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file, PrecompRecord &pr) { … } Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file, PrecompRecord &pr) { … } /// Merges a precompiled headers TPI map into the current TPI map. The /// precompiled headers object will also be loaded and remapped in the /// process. Error UsePrecompSource::mergeInPrecompHeaderObj() { … } Error UsePrecompSource::mergeDebugT(TypeMerger *m) { … } Error PrecompSource::mergeDebugT(TypeMerger *m) { … } void PrecompSource::registerMapping() { … } //===----------------------------------------------------------------------===// // Parellel GHash type merging implementation. //===----------------------------------------------------------------------===// void TpiSource::loadGHashes() { … } // Copies ghashes from a vector into an array. These are long lived, so it's // worth the time to copy these into an appropriately sized vector to reduce // memory usage. void TpiSource::assignGHashesFromVector( std::vector<GloballyHashedType> &&hashVec) { … } // Faster way to iterate type records. forEachTypeChecked is faster than // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops. static void forEachTypeChecked(ArrayRef<uint8_t> types, function_ref<void(const CVType &)> fn) { … } // Walk over file->debugTypes and fill in the isItemIndex bit vector. // TODO: Store this information in .debug$H so that we don't have to recompute // it. This is the main bottleneck slowing down parallel ghashing with one // thread over single-threaded ghashing. void TpiSource::fillIsItemIndexFromDebugT() { … } void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) { … } void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords, TypeIndex beginIndex) { … } void TpiSource::remapTpiWithGHashes(GHashState *g) { … } // PDBs do not actually store global hashes, so when merging a type server // PDB we have to synthesize global hashes. To do this, we first synthesize // global hashes for the TPI stream, since it is independent, then we // synthesize hashes for the IPI stream, using the hashes for the TPI stream // as inputs. void TypeServerSource::loadGHashes() { … } // Flatten discontiguous PDB type arrays to bytes so that we can use // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from // type servers is faster than iterating all object files compiled with /Z7 with // CVTypeArray, which has high overheads due to the virtual interface of // BinaryStream::readBytes. static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) { … } // Merge types from a type server PDB. void TypeServerSource::remapTpiWithGHashes(GHashState *g) { … } void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) { … } void PrecompSource::loadGHashes() { … } void UsePrecompSource::loadGHashes() { … } void UsePrecompSource::remapTpiWithGHashes(GHashState *g) { … } namespace { /// A concurrent hash table for global type hashing. It is based on this paper: /// Concurrent Hash Tables: Fast and General(?)! /// https://dl.acm.org/doi/10.1145/3309206 /// /// This hash table is meant to be used in two phases: /// 1. concurrent insertions /// 2. concurrent reads /// It does not support lookup, deletion, or rehashing. It uses linear probing. /// /// The paper describes storing a key-value pair in two machine words. /// Generally, the values stored in this map are type indices, and we can use /// those values to recover the ghash key from a side table. This allows us to /// shrink the table entries further at the cost of some loads, and sidesteps /// the need for a 128 bit atomic compare-and-swap operation. /// /// During insertion, a priority function is used to decide which insertion /// should be preferred. This ensures that the output is deterministic. For /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred. /// class GHashCell; struct GHashTable { … }; /// A ghash table cell for deduplicating types from TpiSources. class GHashCell { … }; } // namespace namespace lld::coff { /// This type is just a wrapper around GHashTable with external linkage so it /// can be used from a header. struct GHashState { … }; } // namespace lld::coff GHashTable::~GHashTable() { … } void GHashTable::init(uint32_t newTableSize) { … } uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash, GHashCell newCell) { … } TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc) : … { … } TypeMerger::~TypeMerger() = default; void TypeMerger::mergeTypesWithGHash() { … } void TypeMerger::sortDependencies() { … } /// Given the index into the ghash table for a particular type, return the type /// index for that type in the output PDB. static TypeIndex loadPdbTypeIndexFromCell(GHashState *g, uint32_t ghashCellIdx) { … } /// Free heap allocated ghashes. void TypeMerger::clearGHashes() { … } // Fill in a TPI or IPI index map using ghashes. For each source type, use its // ghash to lookup its final type index in the PDB, and store that in the map. void TpiSource::fillMapFromGHashes(GHashState *g) { … }