//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H #include "ErrorHandling.h" #include "ProfiledBinary.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Regex.h" #include <cstdint> #include <fstream> #include <map> usingnamespacellvm; usingnamespacesampleprof; namespace llvm { class CleanupInstaller; namespace sampleprof { // Stream based trace line iterator class TraceStream { … }; // The type of input format. enum PerfFormat { … }; // The type of perfscript content. enum PerfContent { … }; struct PerfInputFile { … }; // The parsed LBR sample entry. struct LBREntry { … }; #ifndef NDEBUG static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) { for (size_t I = 0; I < LBRStack.size(); I++) { dbgs() << "[" << I << "] "; LBRStack[I].print(); dbgs() << "\n"; } } static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) { for (size_t I = 0; I < CallStack.size(); I++) { dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n"; } } #endif // Hash interface for generic data of type T // Data should implement a \fn getHashCode and a \fn isEqual // Currently getHashCode is non-virtual to avoid the overhead of calling vtable, // i.e we explicitly calculate hash of derived class, assign to base class's // HashCode. This also provides the flexibility for calculating the hash code // incrementally(like rolling hash) during frame stack unwinding since unwinding // only changes the leaf of frame stack. \fn isEqual is a virtual function, // which will have perf overhead. In the future, if we redesign a better hash // function, then we can just skip this or switch to non-virtual function(like // just ignore comparison if hash conflicts probabilities is low) template <class T> class Hashable { … }; struct PerfSample { … }; // After parsing the sample, we record the samples by aggregating them // into this counter. The key stores the sample data and the value is // the sample repeat times. AggregatedCounter; SampleVector; inline bool isValidFallThroughRange(uint64_t Start, uint64_t End, ProfiledBinary *Binary) { … } // The state for the unwinder, it doesn't hold the data but only keep the // pointer/index of the data, While unwinding, the CallStack is changed // dynamicially and will be recorded as the context of the sample struct UnwindState { … }; // Base class for sample counter key with context struct ContextKey { … }; // String based context id struct StringBasedCtxKey : public ContextKey { … }; // Address-based context id struct AddrBasedCtxKey : public ContextKey { … }; // The counter of branch samples for one function indexed by the branch, // which is represented as the source and target offset pair. BranchSample; // The counter of range samples for one function indexed by the range, // which is represented as the start and end offset pair. RangeSample; // Wrapper for sample counters including range counter and branch counter struct SampleCounter { … }; // Sample counter with context to support context-sensitive profile ContextSampleCounterMap; struct FrameStack { … }; struct AddressStack { … }; /* As in hybrid sample we have a group of LBRs and the most recent sampling call stack, we can walk through those LBRs to infer more call stacks which would be used as context for profile. VirtualUnwinder is the class to do the call stack unwinding based on LBR state. Two types of unwinding are processd here: 1) LBR unwinding and 2) linear range unwinding. Specifically, for each LBR entry(can be classified into call, return, regular branch), LBR unwinding will replay the operation by pushing, popping or switching leaf frame towards the call stack and since the initial call stack is most recently sampled, the replay should be in anti-execution order, i.e. for the regular case, pop the call stack when LBR is call, push frame on call stack when LBR is return. After each LBR processed, it also needs to align with the next LBR by going through instructions from previous LBR's target to current LBR's source, which is the linear unwinding. As instruction from linear range can come from different function by inlining, linear unwinding will do the range splitting and record counters by the range with same inline context. Over those unwinding process we will record each call stack as context id and LBR/linear range as sample counter for further CS profile generation. */ class VirtualUnwinder { … }; // Read perf trace to parse the events and samples. class PerfReaderBase { … }; // Read perf script to parse the events and samples. class PerfScriptReader : public PerfReaderBase { … }; /* The reader of LBR only perf script. A typical LBR sample is like: 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... ... 0x4005c8/0x4005dc/P/-/-/0 */ class LBRPerfReader : public PerfScriptReader { … }; /* Hybrid perf script includes a group of hybrid samples(LBRs + call stack), which is used to generate CS profile. An example of hybrid sample: 4005dc # call stack leaf 400634 400684 # call stack root 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries */ class HybridPerfReader : public PerfScriptReader { … }; /* Format of unsymbolized profile: [frame1 @ frame2 @ ...] # If it's a CS profile number of entries in RangeCounter from_1-to_1:count_1 from_2-to_2:count_2 ...... from_n-to_n:count_n number of entries in BranchCounter src_1->dst_1:count_1 src_2->dst_2:count_2 ...... src_n->dst_n:count_n [frame1 @ frame2 @ ...] # Next context ...... Note that non-CS profile doesn't have the empty `[]` context. */ class UnsymbolizedProfileReader : public PerfReaderBase { … }; } // end namespace sampleprof } // end namespace llvm #endif