llvm/llvm/include/llvm/ADT/GenericUniformityImpl.h

//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This template implementation resides in a separate file so that it
// does not get injected into every .cpp file that includes the
// generic header.
//
// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
//
// This file should only be included by files that implement a
// specialization of the relvant templates. Currently these are:
// - UniformityAnalysis.cpp
//
// Note: The DEBUG_TYPE macro should be defined before using this
// file so that any use of LLVM_DEBUG is associated with the
// including file rather than this file.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Implementation of uniformity analysis.
///
/// The algorithm is a fixed point iteration that starts with the assumption
/// that all control flow and all values are uniform. Starting from sources of
/// divergence (whose discovery must be implemented by a CFG- or even
/// target-specific derived class), divergence of values is propagated from
/// definition to uses in a straight-forward way. The main complexity lies in
/// the propagation of the impact of divergent control flow on the divergence of
/// values (sync dependencies).
///
/// NOTE: In general, no interface exists for a transform to update
/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
/// transitive dependence, but it also does not provide an interface for
/// updating itself. Given that, transforms should not preserve uniformity in
/// their getAnalysisUsage() callback.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
#define LLVM_ADT_GENERICUNIFORMITYIMPL_H

#include "llvm/ADT/GenericUniformityInfo.h"

#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"

#define DEBUG_TYPE

namespace llvm {

/// Construct a specially modified post-order traversal of cycles.
///
/// The ModifiedPO is contructed using a virtually modified CFG as follows:
///
/// 1. The successors of pre-entry nodes (predecessors of an cycle
///    entry that are outside the cycle) are replaced by the
///    successors of the successors of the header.
/// 2. Successors of the cycle header are replaced by the exit blocks
///    of the cycle.
///
/// Effectively, we produce a depth-first numbering with the following
/// properties:
///
/// 1. Nodes after a cycle are numbered earlier than the cycle header.
/// 2. The header is numbered earlier than the nodes in the cycle.
/// 3. The numbering of the nodes within the cycle forms an interval
///    starting with the header.
///
/// Effectively, the virtual modification arranges the nodes in a
/// cycle as a DAG with the header as the sole leaf, and successors of
/// the header as the roots. A reverse traversal of this numbering has
/// the following invariant on the unmodified original CFG:
///
///    Each node is visited after all its predecessors, except if that
///    predecessor is the cycle header.
///
template <typename ContextT> class ModifiedPostOrder {};

template <typename> class DivergencePropagator;

/// \class GenericSyncDependenceAnalysis
///
/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
///
/// An analysis per divergent branch that returns the set of basic
/// blocks whose phi nodes become divergent due to divergent control.
/// These are the blocks that are reachable by two disjoint paths from
/// the branch, or cycle exits reachable along a path that is disjoint
/// from a path to the cycle latch.

// --- Above line is not a doxygen comment; intentionally left blank ---
//
// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
//
// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
// control-induced divergence in phi nodes.
//
// -- Reference --
// The algorithm is an extension of Section 5 of
//
//   An abstract interpretation for SPMD divergence
//       on reducible control flow graphs.
//   Julian Rosemann, Simon Moll and Sebastian Hack
//   POPL '21
//
//
// -- Sync dependence --
// Sync dependence characterizes the control flow aspect of the
// propagation of branch divergence. For example,
//
//   %cond = icmp slt i32 %tid, 10
//   br i1 %cond, label %then, label %else
// then:
//   br label %merge
// else:
//   br label %merge
// merge:
//   %a = phi i32 [ 0, %then ], [ 1, %else ]
//
// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
// because %tid is not on its use-def chains, %a is sync dependent on %tid
// because the branch "br i1 %cond" depends on %tid and affects which value %a
// is assigned to.
//
//
// -- Reduction to SSA construction --
// There are two disjoint paths from A to X, if a certain variant of SSA
// construction places a phi node in X under the following set-up scheme.
//
// This variant of SSA construction ignores incoming undef values.
// That is paths from the entry without a definition do not result in
// phi nodes.
//
//       entry
//     /      \
//    A        \
//  /   \       Y
// B     C     /
//  \   /  \  /
//    D     E
//     \   /
//       F
//
// Assume that A contains a divergent branch. We are interested
// in the set of all blocks where each block is reachable from A
// via two disjoint paths. This would be the set {D, F} in this
// case.
// To generally reduce this query to SSA construction we introduce
// a virtual variable x and assign to x different values in each
// successor block of A.
//
//           entry
//         /      \
//        A        \
//      /   \       Y
// x = 0   x = 1   /
//      \  /   \  /
//        D     E
//         \   /
//           F
//
// Our flavor of SSA construction for x will construct the following
//
//            entry
//          /      \
//         A        \
//       /   \       Y
// x0 = 0   x1 = 1  /
//       \   /   \ /
//     x2 = phi   E
//         \     /
//         x3 = phi
//
// The blocks D and F contain phi nodes and are thus each reachable
// by two disjoins paths from A.
//
// -- Remarks --
// * In case of cycle exits we need to check for temporal divergence.
//   To this end, we check whether the definition of x differs between the
//   cycle exit and the cycle header (_after_ SSA construction).
//
// * In the presence of irreducible control flow, the fixed point is
//   reached only after multiple iterations. This is because labels
//   reaching the header of a cycle must be repropagated through the
//   cycle. This is true even in a reducible cycle, since the labels
//   may have been produced by a nested irreducible cycle.
//
// * Note that SyncDependenceAnalysis is not concerned with the points
//   of convergence in an irreducible cycle. It's only purpose is to
//   identify join blocks. The "diverged entry" criterion is
//   separately applied on join blocks to determine if an entire
//   irreducible cycle is assumed to be divergent.
//
// * Relevant related work:
//     A simple algorithm for global data flow analysis problems.
//     Matthew S. Hecht and Jeffrey D. Ullman.
//     SIAM Journal on Computing, 4(4):519–532, December 1975.
//
template <typename ContextT> class GenericSyncDependenceAnalysis {};

/// \brief Analysis that identifies uniform values in a data-parallel
/// execution.
///
/// This analysis propagates divergence in a data-parallel context
/// from sources of divergence to all users. It can be instantiated
/// for an IR that provides a suitable SSAContext.
template <typename ContextT> class GenericUniformityAnalysisImpl {};

template <typename ImplT>
void GenericUniformityAnalysisImplDeleter<ImplT>::operator()(ImplT *Impl) {}

/// Compute divergence starting with a divergent branch.
template <typename ContextT> class DivergencePropagator {};

template <typename ContextT>
typename llvm::GenericSyncDependenceAnalysis<ContextT>::DivergenceDescriptor
    llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc;

template <typename ContextT>
llvm::GenericSyncDependenceAnalysis<ContextT>::GenericSyncDependenceAnalysis(
    const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
    :{}

template <typename ContextT>
auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks(
    const BlockT *DivTermBlock) -> const DivergenceDescriptor & {}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
    const InstructionT &I) {}

template <typename ContextT>
bool GenericUniformityAnalysisImpl<ContextT>::markDivergent(
    ConstValueRefT Val) {}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
    const InstructionT &Instr) {}

// Mark as divergent all external uses of values defined in \p DefCycle.
//
// A value V defined by a block B inside \p DefCycle may be used outside the
// cycle only if the use is a PHI in some exit block, or B dominates some exit
// block. Thus, we check uses as follows:
//
// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
// - For every block B inside \p DefCycle that dominates at least one exit
//   block, check all uses outside \p DefCycle.
//
// FIXME: This function does not distinguish between divergent and uniform
// exits. For each divergent exit, only the values that are live at that exit
// need to be propagated as divergent at their use outside the cycle.
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
    const CycleT &DefCycle) {}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
    const BlockT &DivExit, const CycleT &InnerDivCycle) {}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
    const BlockT &BB) {}

/// Mark divergent phi nodes in a join block
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
    const BlockT &JoinBlock) {}

/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
///
/// \return true iff \p Candidate was added to \p Cycles.
template <typename CycleT>
static bool insertIfNotContained(SmallVector<CycleT *> &Cycles,
                                 CycleT *Candidate) {}

/// Return the outermost cycle made divergent by branch outside it.
///
/// If two paths that diverged outside an irreducible cycle join
/// inside that cycle, then that whole cycle is assumed to be
/// divergent. This does not apply if the cycle is reducible.
template <typename CycleT, typename BlockT>
static const CycleT *getExtDivCycle(const CycleT *Cycle,
                                    const BlockT *DivTermBlock,
                                    const BlockT *JoinBlock) {}

/// Return the outermost cycle made divergent by branch inside it.
///
/// This checks the "diverged entry" criterion defined in the
/// docs/ConvergenceAnalysis.html.
template <typename ContextT, typename CycleT, typename BlockT,
          typename DominatorTreeT>
static const CycleT *
getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
               const BlockT *JoinBlock, const DominatorTreeT &DT,
               ContextT &Context) {}

template <typename ContextT, typename CycleT, typename BlockT,
          typename DominatorTreeT>
static const CycleT *
getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
                           const BlockT *JoinBlock, const DominatorTreeT &DT,
                           ContextT &Context) {}

template <typename ContextT>
bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
    const BlockT &ObservingBlock, const InstructionT &Def) const {}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
    const InstructionT &Term) {}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::compute() {}

template <typename ContextT>
bool GenericUniformityAnalysisImpl<ContextT>::isAlwaysUniform(
    const InstructionT &Instr) const {}

template <typename ContextT>
GenericUniformityInfo<ContextT>::GenericUniformityInfo(
    const DominatorTreeT &DT, const CycleInfoT &CI,
    const TargetTransformInfo *TTI) {}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const {}

template <typename ContextT>
bool GenericUniformityInfo<ContextT>::hasDivergence() const {}

template <typename ContextT>
const typename ContextT::FunctionT &
GenericUniformityInfo<ContextT>::getFunction() const {}

/// Whether \p V is divergent at its definition.
template <typename ContextT>
bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const {}

template <typename ContextT>
bool GenericUniformityInfo<ContextT>::isDivergent(const InstructionT *I) const {}

template <typename ContextT>
bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const {}

template <typename ContextT>
bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) {}

/// \brief T helper function for printing.
template <typename ContextT>
void GenericUniformityInfo<ContextT>::print(raw_ostream &out) const {}

template <typename ContextT>
void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
    SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
    const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {}

template <typename ContextT>
void ModifiedPostOrder<ContextT>::computeCyclePO(
    const CycleInfoT &CI, const CycleT *Cycle,
    SmallPtrSetImpl<const BlockT *> &Finalized) {}

/// \brief Generically compute the modified post order.
template <typename ContextT>
void llvm::ModifiedPostOrder<ContextT>::compute(const CycleInfoT &CI) {}

} // namespace llvm

#undef DEBUG_TYPE

#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H