#ifndef LLVM_PROFILEDATA_MEMPROF_H_ #define LLVM_PROFILEDATA_MEMPROF_H_ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/Support/BLAKE3.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/HashBuilder.h" #include "llvm/Support/raw_ostream.h" #include <bitset> #include <cstdint> #include <optional> namespace llvm { namespace memprof { struct MemProfRecord; // The versions of the indexed MemProf format enum IndexedVersion : uint64_t { … }; constexpr uint64_t MinimumSupportedVersion = …; constexpr uint64_t MaximumSupportedVersion = …; // Verify that the minimum and maximum satisfy the obvious constraint. static_assert …; enum class Meta : uint64_t { … }; MemProfSchema; // Returns the full schema currently in use. MemProfSchema getFullSchema(); // Returns the schema consisting of the fields used for hot cold memory hinting. MemProfSchema getHotColdSchema(); // Holds the actual MemInfoBlock data with all fields. Contents may be read or // written partially by providing an appropriate schema to the serialize and // deserialize methods. struct PortableMemInfoBlock { … }; // A type representing the id generated by hashing the contents of the Frame. FrameId; // A type representing the id to index into the frame array. LinearFrameId; // Describes a call frame for a dynamic allocation context. The contents of // the frame are populated by symbolizing the stack depot call frame from the // compiler runtime. struct Frame { … }; // A type representing the index into the table of call stacks. CallStackId; // A type representing the index into the call stack array. LinearCallStackId; // Holds allocation information in a space efficient format where frames are // represented using unique identifiers. struct IndexedAllocationInfo { … }; // Holds allocation information with frame contents inline. The type should // be used for temporary in-memory instances. struct AllocationInfo { … }; // Holds the memprof profile information for a function. The internal // representation stores frame ids for efficiency. This representation should // be used in the profile conversion and manipulation tools. struct IndexedMemProfRecord { … }; // Holds the memprof profile information for a function. The internal // representation stores frame contents inline. This representation should // be used for small amount of temporary, in memory instances. struct MemProfRecord { … }; // Reads a memprof schema from a buffer. All entries in the buffer are // interpreted as uint64_t. The first entry in the buffer denotes the number of // ids in the schema. Subsequent entries are integers which map to memprof::Meta // enum class entries. After successfully reading the schema, the pointer is one // byte past the schema contents. Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer); // Trait for reading IndexedMemProfRecord data from the on-disk hash table. class RecordLookupTrait { … }; // Trait for writing IndexedMemProfRecord data to the on-disk hash table. class RecordWriterTrait { … }; // Trait for writing frame mappings to the on-disk hash table. class FrameWriterTrait { … }; // Trait for reading frame mappings from the on-disk hash table. class FrameLookupTrait { … }; // Trait for writing call stacks to the on-disk hash table. class CallStackWriterTrait { … }; // Trait for reading call stack mappings from the on-disk hash table. class CallStackLookupTrait { … }; // Compute a CallStackId for a given call stack. CallStackId hashCallStack(ArrayRef<FrameId> CS); namespace detail { // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have // to do so in one of two different ways depending on the type of the hash // table. template <typename value_type, typename IterTy> value_type DerefIterator(IterTy Iter) { … } } // namespace detail // A function object that returns a frame for a given FrameId. template <typename MapTy> struct FrameIdConverter { … }; // A function object that returns a call stack for a given CallStackId. template <typename MapTy> struct CallStackIdConverter { … }; // A function object that returns a Frame stored at a given index into the Frame // array in the profile. struct LinearFrameIdConverter { … }; // A function object that returns a call stack stored at a given index into the // call stack array in the profile. struct LinearCallStackIdConverter { … }; struct IndexedMemProfData { … }; struct FrameStat { … }; // Compute a histogram of Frames in call stacks. llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> &MemProfCallStackData); // Construct a radix tree of call stacks. // // A set of call stacks might look like: // // CallStackId 1: f1 -> f2 -> f3 // CallStackId 2: f1 -> f2 -> f4 -> f5 // CallStackId 3: f1 -> f2 -> f4 -> f6 // CallStackId 4: f7 -> f8 -> f9 // // where each fn refers to a stack frame. // // Since we expect a lot of common prefixes, we can compress the call stacks // into a radix tree like: // // CallStackId 1: f1 -> f2 -> f3 // | // CallStackId 2: +---> f4 -> f5 // | // CallStackId 3: +---> f6 // // CallStackId 4: f7 -> f8 -> f9 // // Now, we are interested in retrieving call stacks for a given CallStackId, so // we just need a pointer from a given call stack to its parent. For example, // CallStackId 2 would point to CallStackId 1 as a parent. // // We serialize the radix tree above into a single array along with the length // of each call stack and pointers to the parent call stacks. // // Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 // Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1 // ^ ^ ^ ^ // | | | | // CallStackId 4: 0 --+ | | | // CallStackId 3: 4 --------------+ | | // CallStackId 2: 7 -----------------------+ | // CallStackId 1: 11 -----------------------------------+ // // - LN indicates the length of a call stack, encoded as ordinary integer N. // // - JN indicates a pointer to the parent, encoded as -N. // // The radix tree allows us to reconstruct call stacks in the leaf-to-root // order as we scan the array from left ro right while following pointers to // parents along the way. // // For example, if we are decoding CallStackId 2, we start a forward traversal // at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3, // picking up f2 and f1. We are done after collecting 4 frames as indicated at // the beginning of the traversal. // // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into // the radix tree array, so we do not explicitly encode mappings like: // "CallStackId 1 -> 11". class CallStackRadixTreeBuilder { … }; // Verify that each CallStackId is computed with hashCallStack. This function // is intended to help transition from CallStack to CSId in // IndexedAllocationInfo. void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record); // Verify that each CallStackId is computed with hashCallStack. This function // is intended to help transition from CallStack to CSId in // IndexedAllocationInfo. void verifyFunctionProfileData( const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &FunctionProfileData); } // namespace memprof } // namespace llvm #endif // LLVM_PROFILEDATA_MEMPROF_H_