llvm/compiler-rt/lib/xray/xray_function_call_trie.h

//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// This file defines the interface for a function call trie.
//
//===----------------------------------------------------------------------===//
#ifndef XRAY_FUNCTION_CALL_TRIE_H
#define XRAY_FUNCTION_CALL_TRIE_H

#include "xray_buffer_queue.h"
#include "xray_defs.h"
#include "xray_profiling_flags.h"
#include "xray_segmented_array.h"
#include <limits>
#include <memory> // For placement new.
#include <utility>

namespace __xray {

/// A FunctionCallTrie represents the stack traces of XRay instrumented
/// functions that we've encountered, where a node corresponds to a function and
/// the path from the root to the node its stack trace. Each node in the trie
/// will contain some useful values, including:
///
///   * The cumulative amount of time spent in this particular node/stack.
///   * The number of times this stack has appeared.
///   * A histogram of latencies for that particular node.
///
/// Each node in the trie will also contain a list of callees, represented using
/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
/// ID of the callee, and a pointer to the node.
///
/// If we visualise this data structure, we'll find the following potential
/// representation:
///
///   [function id node] -> [callees] [cumulative time]
///                         [call counter] [latency histogram]
///
/// As an example, when we have a function in this pseudocode:
///
///   func f(N) {
///     g()
///     h()
///     for i := 1..N { j() }
///   }
///
/// We may end up with a trie of the following form:
///
///   f -> [ g, h, j ] [...] [1] [...]
///   g -> [ ... ] [...] [1] [...]
///   h -> [ ... ] [...] [1] [...]
///   j -> [ ... ] [...] [N] [...]
///
/// If for instance the function g() called j() like so:
///
///   func g() {
///     for i := 1..10 { j() }
///   }
///
/// We'll find the following updated trie:
///
///   f -> [ g, h, j ] [...] [1] [...]
///   g -> [ j' ] [...] [1] [...]
///   h -> [ ... ] [...] [1] [...]
///   j -> [ ... ] [...] [N] [...]
///   j' -> [ ... ] [...] [10] [...]
///
/// Note that we'll have a new node representing the path `f -> g -> j'` with
/// isolated data. This isolation gives us a means of representing the stack
/// traces as a path, as opposed to a key in a table. The alternative
/// implementation here would be to use a separate table for the path, and use
/// hashes of the path as an identifier to accumulate the information. We've
/// moved away from this approach as it takes a lot of time to compute the hash
/// every time we need to update a function's call information as we're handling
/// the entry and exit events.
///
/// This approach allows us to maintain a shadow stack, which represents the
/// currently executing path, and on function exits quickly compute the amount
/// of time elapsed from the entry, then update the counters for the node
/// already represented in the trie. This necessitates an efficient
/// representation of the various data structures (the list of callees must be
/// cache-aware and efficient to look up, and the histogram must be compact and
/// quick to update) to enable us to keep the overheads of this implementation
/// to the minimum.
class FunctionCallTrie {};

} // namespace __xray

#endif // XRAY_FUNCTION_CALL_TRIE_H