BottleneckAnalysis.h | Explore in Territory

//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file implements the bottleneck analysis view.
///
/// This view internally observes backend pressure increase events in order to
/// identify problematic data dependencies and processor resource interferences.
///
/// Example of bottleneck analysis report for a dot-product on X86 btver2:
///
/// Cycles with backend pressure increase [ 40.76% ]
/// Throughput Bottlenecks: 
///   Resource Pressure       [ 39.34% ]
///   - JFPA  [ 39.34% ]
///   - JFPU0  [ 39.34% ]
///   Data Dependencies:      [ 1.42% ]
///   - Register Dependencies [ 1.42% ]
///   - Memory Dependencies   [ 0.00% ]
///
/// According to the example, backend pressure increased during the 40.76% of
/// the simulated cycles.  In particular, the major cause of backend pressure
/// increases was the contention on floating point adder JFPA accessible from
/// pipeline resource JFPU0.
///
/// At the end of each cycle, if pressure on the simulated out-of-order buffers
/// has increased, a backend pressure event is reported.
/// In particular, this occurs when there is a delta between the number of uOps
/// dispatched and the number of uOps issued to the underlying pipelines.
///
/// The bottleneck analysis view is also responsible for identifying and
/// printing the most "critical" sequence of dependent instructions according to
/// the simulated run.
///
/// Below is the critical sequence computed for the dot-product example on
/// btver2:
///
///              Instruction                     Dependency Information
/// +----< 2.    vhaddps %xmm3, %xmm3, %xmm4
/// |
/// |    < loop carried > 
/// |
/// |      0.    vmulps	 %xmm0, %xmm0, %xmm2
/// +----> 1.    vhaddps %xmm2, %xmm2, %xmm3     ## RESOURCE interference:  JFPA [ probability: 73% ]
/// +----> 2.    vhaddps %xmm3, %xmm3, %xmm4     ## REGISTER dependency:  %xmm3
/// |
/// |    < loop carried > 
/// |
/// +----> 1.    vhaddps %xmm2, %xmm2, %xmm3     ## RESOURCE interference:  JFPA [ probability: 73% ]
///
///
/// The algorithm that computes the critical sequence is very similar to a
/// critical path analysis.
/// 
/// A dependency graph is used internally to track dependencies between nodes.
/// Nodes of the graph represent instructions from the input assembly sequence,
/// and edges of the graph represent data dependencies or processor resource
/// interferences.
///
/// Edges are dynamically 'discovered' by observing instruction state
/// transitions and backend pressure increase events. Edges are internally
/// ranked based on their "criticality". A dependency is considered to be
/// critical if it takes a long time to execute, and if it contributes to
/// backend pressure increases. Criticality is internally measured in terms of
/// cycles; it is computed for every edge in the graph as a function of the edge
/// latency and the number of backend pressure increase cycles contributed by
/// that edge.
///
/// At the end of simulation, costs are propagated to nodes through the edges of
/// the graph, and the most expensive path connecting the root-set (a
/// set of nodes with no predecessors) to a leaf node is reported as critical
/// sequence.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H

#include "Views/InstructionView.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"

namespace llvm {
namespace mca {

class PressureTracker { … };

// A dependency edge.
struct DependencyEdge { … };

// A dependency graph used by the bottleneck analysis to describe data
// dependencies and processor resource interferences between instructions.
//
// There is a node (an instance of struct DGNode) for every instruction in the
// input assembly sequence. Edges of the graph represent dependencies between
// instructions.
//
// Each edge of the graph is associated with a cost value which is used
// internally to rank dependency based on their impact on the runtime
// performance (see field DependencyEdge::Dependency::Cost). In general, the
// higher the cost of an edge, the higher the impact on performance.
//
// The cost of a dependency is a function of both the latency and the number of
// cycles where the dependency has been seen as critical (i.e. contributing to
// back-pressure increases).
//
// Loop carried dependencies are carefully expanded by the bottleneck analysis
// to guarantee that the graph stays acyclic. To this end, extra nodes are
// pre-allocated at construction time to describe instructions from "past and
// future" iterations. The graph is kept acyclic mainly because it simplifies
// the complexity of the algorithm that computes the critical sequence.
class DependencyGraph { … };

/// A view that collects and prints a few performance numbers.
class BottleneckAnalysis : public InstructionView { … };

} // namespace mca
} // namespace llvm

#endif
llvm/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h