//===- bolt/Passes/DataflowAnalysis.h ---------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef BOLT_PASSES_DATAFLOWANALYSIS_H #define BOLT_PASSES_DATAFLOWANALYSIS_H #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryFunction.h" #include "llvm/Support/Errc.h" #include <optional> #include <queue> namespace llvm { namespace bolt { /// Represents a given program point as viewed by a dataflow analysis. This /// point is a location that may be either an instruction or a basic block. /// Example: /// /// BB1: --> ProgramPoint 1 (stored as bb *) /// add --> ProgramPoint 2 (stored as inst *) /// sub --> ProgramPoint 3 (stored as inst *) /// jmp --> ProgramPoint 4 (stored as inst *) /// /// ProgramPoints allow us to attach a state to any location in the program /// and is a core concept used in the dataflow analysis engine. /// /// A dataflow analysis will associate a state with a program point. In /// analyses whose direction is forward, this state tracks what happened after /// the execution of an instruction, and the BB tracks the state of what /// happened before the execution of the first instruction in this BB. For /// backwards dataflow analyses, state tracks what happened before the /// execution of a given instruction, while the state associated with a BB /// tracks what happened after the execution of the last instruction of a BB. class ProgramPoint { … }; /// Convenience function to operate on all predecessors of a BB, as viewed /// by a dataflow analysis. This includes throw sites if it is a landing pad. void doForAllPreds(const BinaryBasicBlock &BB, std::function<void(ProgramPoint)> Task); /// Operates on all successors of a basic block. void doForAllSuccs(const BinaryBasicBlock &BB, std::function<void(ProgramPoint)> Task); /// Default printer for State data. template <typename StateTy> class StatePrinter { … }; /// Printer for State data that is a BitVector of registers. class RegStatePrinter { … }; /// Base class for dataflow analyses. Depends on the type of whatever object is /// stored as the state (StateTy) at each program point. The dataflow then /// updates the state at each program point depending on the instruction being /// processed, iterating until all points converge and agree on a state value. /// Remember that depending on how you formulate your dataflow equation, this /// may not converge and will loop indefinitely. /// /p Backward indicates the direction of the dataflow. If false, direction is /// forward. /// /// Example: Compute the set of live registers at each program point. /// /// Modelling: /// Let State be the set of registers that are live. The kill set of a /// point is the set of all registers clobbered by the instruction at this /// program point. The gen set is the set of all registers read by it. /// /// out{b} = Union (s E succs{b}) {in{s}} /// in{b} = (out{b} - kill{b}) U gen{b} /// /// Template parameters: /// StateTy = BitVector, where each index corresponds to a machine register /// Backward = true (live reg operates in reverse order) /// /// Subclass implementation notes: /// Confluence operator = union (if a reg is alive in any succ, it is alive /// in the current block). /// template <typename Derived, typename StateTy, bool Backward = false, typename StatePrinterTy = StatePrinter<StateTy>> class DataflowAnalysis { /// CRTP convenience methods Derived &derived() { … } const Derived &const_derived() const { … } mutable std::optional<unsigned> AnnotationIndex; protected: const BinaryContext &BC; /// Reference to the function being analysed BinaryFunction &Func; /// The id of the annotation allocator to be used MCPlusBuilder::AllocatorIdTy AllocatorId = 0; /// Tracks the state at basic block start (end) if direction of the dataflow /// is forward (backward). std::unordered_map<const BinaryBasicBlock *, StateTy> StateAtBBEntry; /// Map a point to its previous (succeeding) point if the direction of the /// dataflow is forward (backward). This is used to support convenience /// methods to access the resulting state before (after) a given instruction, /// otherwise our clients need to keep "prev" pointers themselves. DenseMap<const MCInst *, ProgramPoint> PrevPoint; /// Perform any bookkeeping before dataflow starts void preflight() { … } /// Sets initial state for each BB StateTy getStartingStateAtBB(const BinaryBasicBlock &BB) { … } /// Sets initial state for each instruction (out set) StateTy getStartingStateAtPoint(const MCInst &Point) { … } /// Computes the in set for the first instruction in a BB by applying the /// confluence operator to the out sets of the last instruction of each pred /// (in case of a backwards dataflow, we will operate on the in sets of each /// successor to determine the starting state of the last instruction of the /// current BB) void doConfluence(StateTy &StateOut, const StateTy &StateIn) { … } /// In case of a forwards dataflow, compute the in set for the first /// instruction in a Landing Pad considering all out sets for associated /// throw sites. /// In case of a backwards dataflow, compute the in set of a invoke /// instruction considering in sets for the first instructions of its /// landing pads. void doConfluenceWithLP(StateTy &StateOut, const StateTy &StateIn, const MCInst &Invoke) { … } /// Returns the out set of an instruction given its in set. /// If backwards, computes the in set given its out set. StateTy computeNext(const MCInst &Point, const StateTy &Cur) { … } /// Returns the MCAnnotation name StringRef getAnnotationName() const { … } unsigned getAnnotationIndex() const { … } /// Private getter methods accessing state in a read-write fashion StateTy &getOrCreateStateAt(const BinaryBasicBlock &BB) { … } StateTy &getOrCreateStateAt(MCInst &Point) { … } StateTy &getOrCreateStateAt(ProgramPoint Point) { … } public: /// Return the allocator id unsigned getAllocatorId() { … } /// If the direction of the dataflow is forward, operates on the last /// instruction of all predecessors when performing an iteration of the /// dataflow equation for the start of this BB. If backwards, operates on /// the first instruction of all successors. void doForAllSuccsOrPreds(const BinaryBasicBlock &BB, std::function<void(ProgramPoint)> Task) { … } /// We need the current binary context and the function that will be processed /// in this dataflow analysis. DataflowAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId = 0) : … { … } virtual ~DataflowAnalysis() { … } /// Track the state at basic block start (end) if direction of the dataflow /// is forward (backward). ErrorOr<const StateTy &> getStateAt(const BinaryBasicBlock &BB) const { … } /// Track the state at the end (start) of each MCInst in this function if /// the direction of the dataflow is forward (backward). ErrorOr<const StateTy &> getStateAt(const MCInst &Point) const { … } /// Return the out set (in set) of a given program point if the direction of /// the dataflow is forward (backward). ErrorOr<const StateTy &> getStateAt(ProgramPoint Point) const { … } /// Relies on a ptr map to fetch the previous instruction and then retrieve /// state. WARNING: Watch out for invalidated pointers. Do not use this /// function if you invalidated pointers after the analysis has been completed ErrorOr<const StateTy &> getStateBefore(const MCInst &Point) { … } ErrorOr<const StateTy &> getStateBefore(ProgramPoint Point) { … } /// Remove any state annotations left by this analysis void cleanAnnotations() { … } /// Public entry point that will perform the entire analysis form start to /// end. void run() { … } }; /// Define an iterator for navigating the expressions calculated by a /// dataflow analysis at each program point, when they are backed by a /// BitVector. class ExprIterator { … }; /// Specialization of DataflowAnalysis whose state specifically stores /// a set of instructions. template <typename Derived, bool Backward = false, typename StatePrinterTy = StatePrinter<BitVector>> class InstrsDataflowAnalysis : public DataflowAnalysis<Derived, BitVector, Backward, StatePrinterTy> { public: /// These iterator functions offer access to the set of pointers to /// instructions in a given program point template <typename T> ExprIterator expr_begin(const T &Point) const { … } ExprIterator expr_begin(const BitVector &BV) const { … } ExprIterator expr_end() const { … } /// Used to size the set of expressions/definitions being tracked by the /// dataflow analysis uint64_t NumInstrs{0}; /// We put every MCInst we want to track (which one representing an /// expression/def) into a vector because we need to associate them with /// small numbers. They will be tracked via BitVectors throughout the /// dataflow analysis. std::vector<MCInst *> Expressions; /// Maps expressions defs (MCInsts) to its index in the Expressions vector std::unordered_map<const MCInst *, uint64_t> ExprToIdx; /// Return whether \p Expr is in the state set at \p Point bool count(ProgramPoint Point, const MCInst &Expr) const { … } bool count(const MCInst &Point, const MCInst &Expr) const { … } /// Return whether \p Expr is in the state set at the instr of index /// \p PointIdx bool count(unsigned PointIdx, const MCInst &Expr) const { … } InstrsDataflowAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId = 0) : … { … } virtual ~InstrsDataflowAnalysis() { … } }; } // namespace bolt /// DenseMapInfo allows us to use the DenseMap LLVM data structure to store /// ProgramPoints. template <> struct DenseMapInfo<bolt::ProgramPoint> { … }; raw_ostream &operator<<(raw_ostream &OS, const BitVector &Val); } // namespace llvm #endif