//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This template implementation resides in a separate file so that it // does not get injected into every .cpp file that includes the // generic header. // // DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO. // // This file should only be included by files that implement a // specialization of the relvant templates. Currently these are: // - UniformityAnalysis.cpp // // Note: The DEBUG_TYPE macro should be defined before using this // file so that any use of LLVM_DEBUG is associated with the // including file rather than this file. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Implementation of uniformity analysis. /// /// The algorithm is a fixed point iteration that starts with the assumption /// that all control flow and all values are uniform. Starting from sources of /// divergence (whose discovery must be implemented by a CFG- or even /// target-specific derived class), divergence of values is propagated from /// definition to uses in a straight-forward way. The main complexity lies in /// the propagation of the impact of divergent control flow on the divergence of /// values (sync dependencies). /// /// NOTE: In general, no interface exists for a transform to update /// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a /// transitive dependence, but it also does not provide an interface for /// updating itself. Given that, transforms should not preserve uniformity in /// their getAnalysisUsage() callback. /// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H #define LLVM_ADT_GENERICUNIFORMITYIMPL_H #include "llvm/ADT/GenericUniformityInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE … namespace llvm { /// Construct a specially modified post-order traversal of cycles. /// /// The ModifiedPO is contructed using a virtually modified CFG as follows: /// /// 1. The successors of pre-entry nodes (predecessors of an cycle /// entry that are outside the cycle) are replaced by the /// successors of the successors of the header. /// 2. Successors of the cycle header are replaced by the exit blocks /// of the cycle. /// /// Effectively, we produce a depth-first numbering with the following /// properties: /// /// 1. Nodes after a cycle are numbered earlier than the cycle header. /// 2. The header is numbered earlier than the nodes in the cycle. /// 3. The numbering of the nodes within the cycle forms an interval /// starting with the header. /// /// Effectively, the virtual modification arranges the nodes in a /// cycle as a DAG with the header as the sole leaf, and successors of /// the header as the roots. A reverse traversal of this numbering has /// the following invariant on the unmodified original CFG: /// /// Each node is visited after all its predecessors, except if that /// predecessor is the cycle header. /// template <typename ContextT> class ModifiedPostOrder { … }; template <typename> class DivergencePropagator; /// \class GenericSyncDependenceAnalysis /// /// \brief Locate join blocks for disjoint paths starting at a divergent branch. /// /// An analysis per divergent branch that returns the set of basic /// blocks whose phi nodes become divergent due to divergent control. /// These are the blocks that are reachable by two disjoint paths from /// the branch, or cycle exits reachable along a path that is disjoint /// from a path to the cycle latch. // --- Above line is not a doxygen comment; intentionally left blank --- // // Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis. // // The SyncDependenceAnalysis is used in the UniformityAnalysis to model // control-induced divergence in phi nodes. // // -- Reference -- // The algorithm is an extension of Section 5 of // // An abstract interpretation for SPMD divergence // on reducible control flow graphs. // Julian Rosemann, Simon Moll and Sebastian Hack // POPL '21 // // // -- Sync dependence -- // Sync dependence characterizes the control flow aspect of the // propagation of branch divergence. For example, // // %cond = icmp slt i32 %tid, 10 // br i1 %cond, label %then, label %else // then: // br label %merge // else: // br label %merge // merge: // %a = phi i32 [ 0, %then ], [ 1, %else ] // // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid // because %tid is not on its use-def chains, %a is sync dependent on %tid // because the branch "br i1 %cond" depends on %tid and affects which value %a // is assigned to. // // // -- Reduction to SSA construction -- // There are two disjoint paths from A to X, if a certain variant of SSA // construction places a phi node in X under the following set-up scheme. // // This variant of SSA construction ignores incoming undef values. // That is paths from the entry without a definition do not result in // phi nodes. // // entry // / \ // A \ // / \ Y // B C / // \ / \ / // D E // \ / // F // // Assume that A contains a divergent branch. We are interested // in the set of all blocks where each block is reachable from A // via two disjoint paths. This would be the set {D, F} in this // case. // To generally reduce this query to SSA construction we introduce // a virtual variable x and assign to x different values in each // successor block of A. // // entry // / \ // A \ // / \ Y // x = 0 x = 1 / // \ / \ / // D E // \ / // F // // Our flavor of SSA construction for x will construct the following // // entry // / \ // A \ // / \ Y // x0 = 0 x1 = 1 / // \ / \ / // x2 = phi E // \ / // x3 = phi // // The blocks D and F contain phi nodes and are thus each reachable // by two disjoins paths from A. // // -- Remarks -- // * In case of cycle exits we need to check for temporal divergence. // To this end, we check whether the definition of x differs between the // cycle exit and the cycle header (_after_ SSA construction). // // * In the presence of irreducible control flow, the fixed point is // reached only after multiple iterations. This is because labels // reaching the header of a cycle must be repropagated through the // cycle. This is true even in a reducible cycle, since the labels // may have been produced by a nested irreducible cycle. // // * Note that SyncDependenceAnalysis is not concerned with the points // of convergence in an irreducible cycle. It's only purpose is to // identify join blocks. The "diverged entry" criterion is // separately applied on join blocks to determine if an entire // irreducible cycle is assumed to be divergent. // // * Relevant related work: // A simple algorithm for global data flow analysis problems. // Matthew S. Hecht and Jeffrey D. Ullman. // SIAM Journal on Computing, 4(4):519–532, December 1975. // template <typename ContextT> class GenericSyncDependenceAnalysis { … }; /// \brief Analysis that identifies uniform values in a data-parallel /// execution. /// /// This analysis propagates divergence in a data-parallel context /// from sources of divergence to all users. It can be instantiated /// for an IR that provides a suitable SSAContext. template <typename ContextT> class GenericUniformityAnalysisImpl { … }; template <typename ImplT> void GenericUniformityAnalysisImplDeleter<ImplT>::operator()(ImplT *Impl) { … } /// Compute divergence starting with a divergent branch. template <typename ContextT> class DivergencePropagator { … }; template <typename ContextT> typename llvm::GenericSyncDependenceAnalysis<ContextT>::DivergenceDescriptor llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc; template <typename ContextT> llvm::GenericSyncDependenceAnalysis<ContextT>::GenericSyncDependenceAnalysis( const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI) : … { … } template <typename ContextT> auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks( const BlockT *DivTermBlock) -> const DivergenceDescriptor & { … } template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::markDivergent( const InstructionT &I) { … } template <typename ContextT> bool GenericUniformityAnalysisImpl<ContextT>::markDivergent( ConstValueRefT Val) { … } template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride( const InstructionT &Instr) { … } // Mark as divergent all external uses of values defined in \p DefCycle. // // A value V defined by a block B inside \p DefCycle may be used outside the // cycle only if the use is a PHI in some exit block, or B dominates some exit // block. Thus, we check uses as follows: // // - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle. // - For every block B inside \p DefCycle that dominates at least one exit // block, check all uses outside \p DefCycle. // // FIXME: This function does not distinguish between divergent and uniform // exits. For each divergent exit, only the values that are live at that exit // need to be propagated as divergent at their use outside the cycle. template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence( const CycleT &DefCycle) { … } template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence( const BlockT &DivExit, const CycleT &InnerDivCycle) { … } template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs( const BlockT &BB) { … } /// Mark divergent phi nodes in a join block template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes( const BlockT &JoinBlock) { … } /// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles. /// /// \return true iff \p Candidate was added to \p Cycles. template <typename CycleT> static bool insertIfNotContained(SmallVector<CycleT *> &Cycles, CycleT *Candidate) { … } /// Return the outermost cycle made divergent by branch outside it. /// /// If two paths that diverged outside an irreducible cycle join /// inside that cycle, then that whole cycle is assumed to be /// divergent. This does not apply if the cycle is reducible. template <typename CycleT, typename BlockT> static const CycleT *getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock) { … } /// Return the outermost cycle made divergent by branch inside it. /// /// This checks the "diverged entry" criterion defined in the /// docs/ConvergenceAnalysis.html. template <typename ContextT, typename CycleT, typename BlockT, typename DominatorTreeT> static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context) { … } template <typename ContextT, typename CycleT, typename BlockT, typename DominatorTreeT> static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context) { … } template <typename ContextT> bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent( const BlockT &ObservingBlock, const InstructionT &Def) const { … } template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence( const InstructionT &Term) { … } template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::compute() { … } template <typename ContextT> bool GenericUniformityAnalysisImpl<ContextT>::isAlwaysUniform( const InstructionT &Instr) const { … } template <typename ContextT> GenericUniformityInfo<ContextT>::GenericUniformityInfo( const DominatorTreeT &DT, const CycleInfoT &CI, const TargetTransformInfo *TTI) { … } template <typename ContextT> void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const { … } template <typename ContextT> bool GenericUniformityInfo<ContextT>::hasDivergence() const { … } template <typename ContextT> const typename ContextT::FunctionT & GenericUniformityInfo<ContextT>::getFunction() const { … } /// Whether \p V is divergent at its definition. template <typename ContextT> bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const { … } template <typename ContextT> bool GenericUniformityInfo<ContextT>::isDivergent(const InstructionT *I) const { … } template <typename ContextT> bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const { … } template <typename ContextT> bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) { … } /// \brief T helper function for printing. template <typename ContextT> void GenericUniformityInfo<ContextT>::print(raw_ostream &out) const { … } template <typename ContextT> void llvm::ModifiedPostOrder<ContextT>::computeStackPO( SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI, const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) { … } template <typename ContextT> void ModifiedPostOrder<ContextT>::computeCyclePO( const CycleInfoT &CI, const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) { … } /// \brief Generically compute the modified post order. template <typename ContextT> void llvm::ModifiedPostOrder<ContextT>::compute(const CycleInfoT &CI) { … } } // namespace llvm #undef DEBUG_TYPE #endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H