PerfReader.h | Explore in Territory

//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
#include "ErrorHandling.h"
#include "ProfiledBinary.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Regex.h"
#include <cstdint>
#include <fstream>
#include <map>

usingnamespacellvm;
usingnamespacesampleprof;

namespace llvm {

class CleanupInstaller;

namespace sampleprof {

// Stream based trace line iterator
class TraceStream { … };

// The type of input format.
enum PerfFormat { … };

// The type of perfscript content.
enum PerfContent { … };

struct PerfInputFile { … };

// The parsed LBR sample entry.
struct LBREntry { … };

#ifndef NDEBUG
static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
  for (size_t I = 0; I < LBRStack.size(); I++) {
    dbgs() << "[" << I << "] ";
    LBRStack[I].print();
    dbgs() << "\n";
  }
}

static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
  for (size_t I = 0; I < CallStack.size(); I++) {
    dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
  }
}
#endif

// Hash interface for generic data of type T
// Data should implement a \fn getHashCode and a \fn isEqual
// Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
// i.e we explicitly calculate hash of derived class, assign to base class's
// HashCode. This also provides the flexibility for calculating the hash code
// incrementally(like rolling hash) during frame stack unwinding since unwinding
// only changes the leaf of frame stack. \fn isEqual is a virtual function,
// which will have perf overhead. In the future, if we redesign a better hash
// function, then we can just skip this or switch to non-virtual function(like
// just ignore comparison if hash conflicts probabilities is low)
template <class T> class Hashable { … };

struct PerfSample { … };
// After parsing the sample, we record the samples by aggregating them
// into this counter. The key stores the sample data and the value is
// the sample repeat times.
AggregatedCounter;

SampleVector;

inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
                                    ProfiledBinary *Binary) { … }

// The state for the unwinder, it doesn't hold the data but only keep the
// pointer/index of the data, While unwinding, the CallStack is changed
// dynamicially and will be recorded as the context of the sample
struct UnwindState { … };

// Base class for sample counter key with context
struct ContextKey { … };

// String based context id
struct StringBasedCtxKey : public ContextKey { … };

// Address-based context id
struct AddrBasedCtxKey : public ContextKey { … };

// The counter of branch samples for one function indexed by the branch,
// which is represented as the source and target offset pair.
BranchSample;
// The counter of range samples for one function indexed by the range,
// which is represented as the start and end offset pair.
RangeSample;
// Wrapper for sample counters including range counter and branch counter
struct SampleCounter { … };

// Sample counter with context to support context-sensitive profile
ContextSampleCounterMap;

struct FrameStack { … };

struct AddressStack { … };

/*
As in hybrid sample we have a group of LBRs and the most recent sampling call
stack, we can walk through those LBRs to infer more call stacks which would be
used as context for profile. VirtualUnwinder is the class to do the call stack
unwinding based on LBR state. Two types of unwinding are processd here:
1) LBR unwinding and 2) linear range unwinding.
Specifically, for each LBR entry(can be classified into call, return, regular
branch), LBR unwinding will replay the operation by pushing, popping or
switching leaf frame towards the call stack and since the initial call stack
is most recently sampled, the replay should be in anti-execution order, i.e. for
the regular case, pop the call stack when LBR is call, push frame on call stack
when LBR is return. After each LBR processed, it also needs to align with the
next LBR by going through instructions from previous LBR's target to current
LBR's source, which is the linear unwinding. As instruction from linear range
can come from different function by inlining, linear unwinding will do the range
splitting and record counters by the range with same inline context. Over those
unwinding process we will record each call stack as context id and LBR/linear
range as sample counter for further CS profile generation.
*/
class VirtualUnwinder { … };

// Read perf trace to parse the events and samples.
class PerfReaderBase { … };

// Read perf script to parse the events and samples.
class PerfScriptReader : public PerfReaderBase { … };

/*
  The reader of LBR only perf script.
  A typical LBR sample is like:
    40062f 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
          ... 0x4005c8/0x4005dc/P/-/-/0
*/
class LBRPerfReader : public PerfScriptReader { … };

/*
  Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
  which is used to generate CS profile. An example of hybrid sample:
    4005dc    # call stack leaf
    400634
    400684    # call stack root
    0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
*/
class HybridPerfReader : public PerfScriptReader { … };

/*
   Format of unsymbolized profile:

    [frame1 @ frame2 @ ...]  # If it's a CS profile
      number of entries in RangeCounter
      from_1-to_1:count_1
      from_2-to_2:count_2
      ......
      from_n-to_n:count_n
      number of entries in BranchCounter
      src_1->dst_1:count_1
      src_2->dst_2:count_2
      ......
      src_n->dst_n:count_n
    [frame1 @ frame2 @ ...]  # Next context
      ......

Note that non-CS profile doesn't have the empty `[]` context.
*/
class UnsymbolizedProfileReader : public PerfReaderBase { … };

} // end namespace sampleprof
} // end namespace llvm

#endif
llvm/llvm/tools/llvm-profgen/PerfReader.h