llvm/bolt/include/bolt/Passes/DataflowAnalysis.h

//===- bolt/Passes/DataflowAnalysis.h ---------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PASSES_DATAFLOWANALYSIS_H
#define BOLT_PASSES_DATAFLOWANALYSIS_H

#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/Support/Errc.h"
#include <optional>
#include <queue>

namespace llvm {
namespace bolt {

/// Represents a given program point as viewed by a dataflow analysis. This
/// point is a location that may be either an instruction or a basic block.
///  Example:
///
///    BB1:    --> ProgramPoint 1  (stored as bb *)
///      add   --> ProgramPoint 2  (stored as inst *)
///      sub   --> ProgramPoint 3  (stored as inst *)
///      jmp   --> ProgramPoint 4  (stored as inst *)
///
/// ProgramPoints allow us to attach a state to any location in the program
/// and is a core concept used in the dataflow analysis engine.
///
/// A dataflow analysis will associate a state with a program point. In
/// analyses whose direction is forward, this state tracks what happened after
/// the execution of an instruction, and the BB tracks the state of what
/// happened before the execution of the first instruction in this BB. For
/// backwards dataflow analyses, state tracks what happened before the
/// execution of a given instruction, while the state associated with a BB
/// tracks what happened after the execution of the last instruction of a BB.
class ProgramPoint {};

/// Convenience function to operate on all predecessors of a BB, as viewed
/// by a dataflow analysis. This includes throw sites if it is a landing pad.
void doForAllPreds(const BinaryBasicBlock &BB,
                   std::function<void(ProgramPoint)> Task);

/// Operates on all successors of a basic block.
void doForAllSuccs(const BinaryBasicBlock &BB,
                   std::function<void(ProgramPoint)> Task);

/// Default printer for State data.
template <typename StateTy> class StatePrinter {};

/// Printer for State data that is a BitVector of registers.
class RegStatePrinter {};

/// Base class for dataflow analyses. Depends on the type of whatever object is
/// stored as the state (StateTy) at each program point. The dataflow then
/// updates the state at each program point depending on the instruction being
/// processed, iterating until all points converge and agree on a state value.
/// Remember that depending on how you formulate your dataflow equation, this
/// may not converge and will loop indefinitely.
/// /p Backward indicates the direction of the dataflow. If false, direction is
/// forward.
///
/// Example: Compute the set of live registers at each program point.
///
///   Modelling:
///     Let State be the set of registers that are live. The kill set of a
///     point is the set of all registers clobbered by the instruction at this
///     program point. The gen set is the set of all registers read by it.
///
///       out{b} = Union (s E succs{b}) {in{s}}
///       in{b}  = (out{b} - kill{b}) U gen{b}
///
///   Template parameters:
///     StateTy = BitVector, where each index corresponds to a machine register
///     Backward = true   (live reg operates in reverse order)
///
///   Subclass implementation notes:
///     Confluence operator = union  (if a reg is alive in any succ, it is alive
///     in the current block).
///
template <typename Derived, typename StateTy, bool Backward = false,
          typename StatePrinterTy = StatePrinter<StateTy>>
class DataflowAnalysis {
  /// CRTP convenience methods
  Derived &derived() {}

  const Derived &const_derived() const {}

  mutable std::optional<unsigned> AnnotationIndex;

protected:
  const BinaryContext &BC;
  /// Reference to the function being analysed
  BinaryFunction &Func;

  /// The id of the annotation allocator to be used
  MCPlusBuilder::AllocatorIdTy AllocatorId = 0;

  /// Tracks the state at basic block start (end) if direction of the dataflow
  /// is forward (backward).
  std::unordered_map<const BinaryBasicBlock *, StateTy> StateAtBBEntry;
  /// Map a point to its previous (succeeding) point if the direction of the
  /// dataflow is forward (backward). This is used to support convenience
  /// methods to access the resulting state before (after) a given instruction,
  /// otherwise our clients need to keep "prev" pointers themselves.
  DenseMap<const MCInst *, ProgramPoint> PrevPoint;

  /// Perform any bookkeeping before dataflow starts
  void preflight() {}

  /// Sets initial state for each BB
  StateTy getStartingStateAtBB(const BinaryBasicBlock &BB) {}

  /// Sets initial state for each instruction (out set)
  StateTy getStartingStateAtPoint(const MCInst &Point) {}

  /// Computes the in set for the first instruction in a BB by applying the
  /// confluence operator to the out sets of the last instruction of each pred
  /// (in case of a backwards dataflow, we will operate on the in sets of each
  /// successor to determine the starting state of the last instruction of the
  /// current BB)
  void doConfluence(StateTy &StateOut, const StateTy &StateIn) {}

  /// In case of a forwards dataflow, compute the in set for the first
  /// instruction in a Landing Pad considering all out sets for associated
  /// throw sites.
  /// In case of a backwards dataflow, compute the in set of a invoke
  /// instruction considering in sets for the first instructions of its
  /// landing pads.
  void doConfluenceWithLP(StateTy &StateOut, const StateTy &StateIn,
                          const MCInst &Invoke) {}

  /// Returns the out set of an instruction given its in set.
  /// If backwards, computes the in set given its out set.
  StateTy computeNext(const MCInst &Point, const StateTy &Cur) {}

  /// Returns the MCAnnotation name
  StringRef getAnnotationName() const {}

  unsigned getAnnotationIndex() const {}

  /// Private getter methods accessing state in a read-write fashion
  StateTy &getOrCreateStateAt(const BinaryBasicBlock &BB) {}

  StateTy &getOrCreateStateAt(MCInst &Point) {}

  StateTy &getOrCreateStateAt(ProgramPoint Point) {}

public:
  /// Return the allocator id
  unsigned getAllocatorId() {}

  /// If the direction of the dataflow is forward, operates on the last
  /// instruction of all predecessors when performing an iteration of the
  /// dataflow equation for the start of this BB.  If backwards, operates on
  /// the first instruction of all successors.
  void doForAllSuccsOrPreds(const BinaryBasicBlock &BB,
                            std::function<void(ProgramPoint)> Task) {}

  /// We need the current binary context and the function that will be processed
  /// in this dataflow analysis.
  DataflowAnalysis(BinaryFunction &BF,
                   MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
      :{}

  virtual ~DataflowAnalysis() {}

  /// Track the state at basic block start (end) if direction of the dataflow
  /// is forward (backward).
  ErrorOr<const StateTy &> getStateAt(const BinaryBasicBlock &BB) const {}

  /// Track the state at the end (start) of each MCInst in this function if
  /// the direction of the dataflow is forward (backward).
  ErrorOr<const StateTy &> getStateAt(const MCInst &Point) const {}

  /// Return the out set (in set) of a given program point if the direction of
  /// the dataflow is forward (backward).
  ErrorOr<const StateTy &> getStateAt(ProgramPoint Point) const {}

  /// Relies on a ptr map to fetch the previous instruction and then retrieve
  /// state. WARNING: Watch out for invalidated pointers. Do not use this
  /// function if you invalidated pointers after the analysis has been completed
  ErrorOr<const StateTy &> getStateBefore(const MCInst &Point) {}

  ErrorOr<const StateTy &> getStateBefore(ProgramPoint Point) {}

  /// Remove any state annotations left by this analysis
  void cleanAnnotations() {}

  /// Public entry point that will perform the entire analysis form start to
  /// end.
  void run() {}
};

/// Define an iterator for navigating the expressions calculated by a
/// dataflow analysis at each program point, when they are backed by a
/// BitVector.
class ExprIterator {};

/// Specialization of DataflowAnalysis whose state specifically stores
/// a set of instructions.
template <typename Derived, bool Backward = false,
          typename StatePrinterTy = StatePrinter<BitVector>>
class InstrsDataflowAnalysis
    : public DataflowAnalysis<Derived, BitVector, Backward, StatePrinterTy> {
public:
  /// These iterator functions offer access to the set of pointers to
  /// instructions in a given program point
  template <typename T> ExprIterator expr_begin(const T &Point) const {}
  ExprIterator expr_begin(const BitVector &BV) const {}
  ExprIterator expr_end() const {}

  /// Used to size the set of expressions/definitions being tracked by the
  /// dataflow analysis
  uint64_t NumInstrs{0};
  /// We put every MCInst we want to track (which one representing an
  /// expression/def) into a vector because we need to associate them with
  /// small numbers. They will be tracked via BitVectors throughout the
  /// dataflow analysis.
  std::vector<MCInst *> Expressions;
  /// Maps expressions defs (MCInsts) to its index in the Expressions vector
  std::unordered_map<const MCInst *, uint64_t> ExprToIdx;

  /// Return whether \p Expr is in the state set at \p Point
  bool count(ProgramPoint Point, const MCInst &Expr) const {}

  bool count(const MCInst &Point, const MCInst &Expr) const {}

  /// Return whether \p Expr is in the state set at the instr of index
  /// \p PointIdx
  bool count(unsigned PointIdx, const MCInst &Expr) const {}

  InstrsDataflowAnalysis(BinaryFunction &BF,
                         MCPlusBuilder::AllocatorIdTy AllocId = 0)
      :{}
  virtual ~InstrsDataflowAnalysis() {}
};

} // namespace bolt

/// DenseMapInfo allows us to use the DenseMap LLVM data structure to store
/// ProgramPoints.
template <> struct DenseMapInfo<bolt::ProgramPoint> {};

raw_ostream &operator<<(raw_ostream &OS, const BitVector &Val);

} // namespace llvm

#endif