//===- xray-stacks.cpp: XRay Function Call Stack Accounting ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements stack-based accounting. It takes XRay traces, and // collates statistics across these traces to show a breakdown of time spent // at various points of the stack to provide insight into which functions // spend the most time in terms of a call stack. We provide a few // sorting/filtering options for zero'ing in on the useful stacks. // //===----------------------------------------------------------------------===// #include <forward_list> #include <numeric> #include "func-id-helper.h" #include "trie-node.h" #include "xray-registry.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/XRay/Graph.h" #include "llvm/XRay/InstrumentationMap.h" #include "llvm/XRay/Trace.h" usingnamespacellvm; usingnamespacellvm::xray; static cl::SubCommand Stack("stack", "Call stack accounting"); static cl::list<std::string> StackInputs(cl::Positional, cl::desc("<xray trace>"), cl::Required, cl::sub(Stack), cl::OneOrMore); static cl::opt<bool> StackKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), cl::sub(Stack), cl::init(false)); static cl::alias StackKeepGoing2("k", cl::aliasopt(StackKeepGoing), cl::desc("Alias for -keep-going")); // TODO: Does there need to be an option to deduce tail or sibling calls? static cl::opt<std::string> StacksInstrMap( "instr_map", cl::desc("instrumentation map used to identify function ids. " "Currently supports elf file instrumentation maps."), cl::sub(Stack), cl::init("")); static cl::alias StacksInstrMap2("m", cl::aliasopt(StacksInstrMap), cl::desc("Alias for -instr_map")); static cl::opt<bool> SeparateThreadStacks("per-thread-stacks", cl::desc("Report top stacks within each thread id"), cl::sub(Stack), cl::init(false)); static cl::opt<bool> AggregateThreads("aggregate-threads", cl::desc("Aggregate stack times across threads"), cl::sub(Stack), cl::init(false)); static cl::opt<bool> DumpAllStacks("all-stacks", cl::desc("Dump sum of timings for all stacks. " "By default separates stacks per-thread."), cl::sub(Stack), cl::init(false)); static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks), cl::desc("Alias for -all-stacks")); // TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format // possibly with aggregations or just a linear trace of timings. enum StackOutputFormat { … }; static cl::opt<StackOutputFormat> StacksOutputFormat( "stack-format", cl::desc("The format that output stacks should be " "output in. Only applies with all-stacks."), cl::values( clEnumValN(HUMAN, "human", "Human readable output. Only valid without -all-stacks."), clEnumValN(FLAMETOOL, "flame", "Format consumable by Brendan Gregg's FlameGraph tool. " "Only valid with -all-stacks.")), cl::sub(Stack), cl::init(HUMAN)); // Types of values for each stack in a CallTrie. enum class AggregationType { … }; static cl::opt<AggregationType> RequestedAggregation( "aggregation-type", cl::desc("The type of aggregation to do on call stacks."), cl::values( clEnumValN( AggregationType::TOTAL_TIME, "time", "Capture the total time spent in an all invocations of a stack."), clEnumValN(AggregationType::INVOCATION_COUNT, "count", "Capture the number of times a stack was invoked. " "In flamegraph mode, this count also includes invocations " "of all callees.")), cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME)); /// A helper struct to work with formatv and XRayRecords. Makes it easier to /// use instrumentation map names or addresses in formatted output. struct format_xray_record : public FormatAdapter<XRayRecord> { … }; /// The stack command will take a set of XRay traces as arguments, and collects /// information about the stacks of instrumented functions that appear in the /// traces. We track the following pieces of information: /// /// - Total time: amount of time/cycles accounted for in the traces. /// - Stack count: number of times a specific stack appears in the /// traces. Only instrumented functions show up in stacks. /// - Cumulative stack time: amount of time spent in a stack accumulated /// across the invocations in the traces. /// - Cumulative local time: amount of time spent in each instrumented /// function showing up in a specific stack, accumulated across the traces. /// /// Example output for the kind of data we'd like to provide looks like the /// following: /// /// Total time: 3.33234 s /// Stack ID: ... /// Stack Count: 2093 /// # Function Local Time (%) Stack Time (%) /// 0 main 2.34 ms 0.07% 3.33234 s 100% /// 1 foo() 3.30000 s 99.02% 3.33 s 99.92% /// 2 bar() 30 ms 0.90% 30 ms 0.90% /// /// We can also show distributions of the function call durations with /// statistics at each level of the stack. This works by doing the following /// algorithm: /// /// 1. When unwinding, record the duration of each unwound function associated /// with the path up to which the unwinding stops. For example: /// /// Step Duration (? means has start time) /// /// push a <start time> a = ? /// push b <start time> a = ?, a->b = ? /// push c <start time> a = ?, a->b = ?, a->b->c = ? /// pop c <end time> a = ?, a->b = ?, emit duration(a->b->c) /// pop b <end time> a = ?, emit duration(a->b) /// push c <start time> a = ?, a->c = ? /// pop c <end time> a = ?, emit duration(a->c) /// pop a <end time> emit duration(a) /// /// 2. We then account for the various stacks we've collected, and for each of /// them will have measurements that look like the following (continuing /// with the above simple example): /// /// c : [<id("a->b->c"), [durations]>, <id("a->c"), [durations]>] /// b : [<id("a->b"), [durations]>] /// a : [<id("a"), [durations]>] /// /// This allows us to compute, for each stack id, and each function that /// shows up in the stack, some important statistics like: /// /// - median /// - 99th percentile /// - mean + stddev /// - count /// /// 3. For cases where we don't have durations for some of the higher levels /// of the stack (perhaps instrumentation wasn't activated when the stack was /// entered), we can mark them appropriately. /// /// Computing this data also allows us to implement lookup by call stack nodes, /// so that we can find functions that show up in multiple stack traces and /// show the statistical properties of that function in various contexts. We /// can compute information similar to the following: /// /// Function: 'c' /// Stacks: 2 / 2 /// Stack ID: ... /// Stack Count: ... /// # Function ... /// 0 a ... /// 1 b ... /// 2 c ... /// /// Stack ID: ... /// Stack Count: ... /// # Function ... /// 0 a ... /// 1 c ... /// ----------------... /// /// Function: 'b' /// Stacks: 1 / 2 /// Stack ID: ... /// Stack Count: ... /// # Function ... /// 0 a ... /// 1 b ... /// 2 c ... /// /// /// To do this we require a Trie data structure that will allow us to represent /// all the call stacks of instrumented functions in an easily traversible /// manner when we do the aggregations and lookups. For instrumented call /// sequences like the following: /// /// a() /// b() /// c() /// d() /// c() /// /// We will have a representation like so: /// /// a -> b -> c /// | | /// | +--> d /// | /// +--> c /// /// We maintain a sequence of durations on the leaves and in the internal nodes /// as we go through and process every record from the XRay trace. We also /// maintain an index of unique functions, and provide a means of iterating /// through all the instrumented call stacks which we know about. namespace { struct StackDuration { … }; } // namespace static StackDuration mergeStackDuration(const StackDuration &Left, const StackDuration &Right) { … } StackTrieNode; template <AggregationType AggType> static std::size_t GetValueForStack(const StackTrieNode *Node); // When computing total time spent in a stack, we're adding the timings from // its callees and the timings from when it was a leaf. template <> std::size_t GetValueForStack<AggregationType::TOTAL_TIME>(const StackTrieNode *Node) { … } // Calculates how many times a function was invoked. // TODO: Hook up option to produce stacks template <> std::size_t GetValueForStack<AggregationType::INVOCATION_COUNT>(const StackTrieNode *Node) { … } // Make sure there are implementations for each enum value. template <AggregationType T> struct DependentFalseType : std::false_type { … }; template <AggregationType AggType> std::size_t GetValueForStack(const StackTrieNode *Node) { … } class StackTrie { … }; static std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error, const XRayRecord &Record, const FuncIdConversionHelper &Converter) { … } static CommandRegistration Unused(&Stack, []() -> Error { … });