llvm/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h

//===- Utils.h - General analysis utilities ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This header file defines prototypes for various transformation utilities for
// memref's and non-loop IR structures. These are not passes by themselves but
// are used either by passes, optimization sequences, or in turn by other
// transformation utilities.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H
#define MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H

#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include <memory>
#include <optional>

namespace mlir {
class Block;
class Location;
class Operation;
class Value;

namespace affine {
class AffineForOp;
class AffineValueMap;
struct MemRefAccess;

// LoopNestStateCollector walks loop nests and collects load and store
// operations, and whether or not a region holding op other than ForOp and IfOp
// was encountered in the loop nest.
struct LoopNestStateCollector {};

// MemRefDependenceGraph is a graph data structure where graph nodes are
// top-level operations in a `Block` which contain load/store ops, and edges
// are memref dependences between the nodes.
// TODO: Add a more flexible dependence graph representation.
struct MemRefDependenceGraph {};

/// Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered
/// from the outermost 'affine.for' operation to the innermost one while not
/// traversing outside of the surrounding affine scope.
void getAffineForIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops);

/// Populates 'ivs' with IVs of the surrounding affine.for and affine.parallel
/// ops ordered from the outermost one to the innermost while not traversing
/// outside of the surrounding affine scope.
void getAffineIVs(Operation &op, SmallVectorImpl<Value> &ivs);

/// Populates 'ops' with affine operations enclosing `op` ordered from outermost
/// to innermost while stopping at the boundary of the affine scope. affine.for,
/// affine.if, or affine.parallel ops comprise such surrounding affine ops.
/// `ops` is guaranteed by design to have a successive chain of affine parent
/// ops.
void getEnclosingAffineOps(Operation &op, SmallVectorImpl<Operation *> *ops);

/// Returns the nesting depth of this operation, i.e., the number of loops
/// surrounding this operation.
unsigned getNestingDepth(Operation *op);

/// Returns whether a loop is a parallel loop and contains a reduction loop.
bool isLoopParallelAndContainsReduction(AffineForOp forOp);

/// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
/// at 'forOp'.
void getSequentialLoops(AffineForOp forOp,
                        llvm::SmallDenseSet<Value, 8> *sequentialLoops);

/// Enumerates different result statuses of slice computation by
/// `computeSliceUnion`
// TODO: Identify and add different kinds of failures during slice computation.
struct SliceComputationResult {};

/// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their
/// associated operands for a set of loops within a loop nest (typically the
/// set of loops surrounding a store operation). Loop bound AffineMaps which
/// are non-null represent slices of that loop's iteration space.
struct ComputationSliceState {};

/// Computes the computation slice loop bounds for one loop nest as affine maps
/// of the other loop nest's IVs and symbols, using 'dependenceConstraints'
/// computed between 'depSourceAccess' and 'depSinkAccess'.
/// If 'isBackwardSlice' is true, a backwards slice is computed in which the
/// slice bounds of loop nest surrounding 'depSourceAccess' are computed in
/// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess'
/// at 'loopDepth'.
/// If 'isBackwardSlice' is false, a forward slice is computed in which the
/// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms
/// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at
/// 'loopDepth'.
/// The slice loop bounds and associated operands are returned in 'sliceState'.
//
//  Backward slice example:
//
//    affine.for %i0 = 0 to 10 {
//      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
//    }
//    affine.for %i1 = 0 to 10 {
//      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
//    }
//
//    // Backward computation slice of loop nest '%i0'.
//    affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) {
//      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
//    }
//
//  Forward slice example:
//
//    affine.for %i0 = 0 to 10 {
//      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
//    }
//    affine.for %i1 = 0 to 10 {
//      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
//    }
//
//    // Forward computation slice of loop nest '%i1'.
//    affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) {
//      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
//    }
//
void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp,
                              FlatAffineValueConstraints *dependenceConstraints,
                              unsigned loopDepth, bool isBackwardSlice,
                              ComputationSliceState *sliceState);

/// Return the number of iterations for the `slicetripCountMap` provided.
uint64_t getSliceIterationCount(
    const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap);

/// Builds a map 'tripCountMap' from AffineForOp to constant trip count for
/// loop nest surrounding represented by slice loop bounds in 'slice'. Returns
/// true on success, false otherwise (if a non-constant trip count was
/// encountered).
bool buildSliceTripCountMap(
    const ComputationSliceState &slice,
    llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap);

/// Computes in 'sliceUnion' the union of all slice bounds computed at
/// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
/// then verifies if it is valid. The parameter 'numCommonLoops' is the number
/// of loops common to the operations in 'opsA' and 'opsB'. If 'isBackwardSlice'
/// is true, computes slice bounds for loop nest surrounding ops in 'opsA', as a
/// function of IVs and symbols of loop nest surrounding ops in 'opsB' at
/// 'loopDepth'. If 'isBackwardSlice' is false, computes slice bounds for loop
/// nest surrounding ops in 'opsB', as a function of IVs and symbols of loop
/// nest surrounding ops in 'opsA' at 'loopDepth'. Returns
/// 'SliceComputationResult::Success' if union was computed correctly, an
/// appropriate 'failure' otherwise.
SliceComputationResult
computeSliceUnion(ArrayRef<Operation *> opsA, ArrayRef<Operation *> opsB,
                  unsigned loopDepth, unsigned numCommonLoops,
                  bool isBackwardSlice, ComputationSliceState *sliceUnion);

/// Creates a clone of the computation contained in the loop nest surrounding
/// 'srcOpInst', slices the iteration space of src loop based on slice bounds
/// in 'sliceState', and inserts the computation slice at the beginning of the
/// operation block of the loop at 'dstLoopDepth' in the loop nest surrounding
/// 'dstOpInst'. Returns the top-level loop of the computation slice on
/// success, returns nullptr otherwise.
// Loop depth is a crucial optimization choice that determines where to
// materialize the results of the backward slice - presenting a trade-off b/w
// storage and redundant computation in several cases.
// TODO: Support computation slices with common surrounding loops.
AffineForOp insertBackwardComputationSlice(Operation *srcOpInst,
                                           Operation *dstOpInst,
                                           unsigned dstLoopDepth,
                                           ComputationSliceState *sliceState);

/// A region of a memref's data space; this is typically constructed by
/// analyzing load/store op's on this memref and the index space of loops
/// surrounding such op's.
// For example, the memref region for a load operation at loop depth = 1:
//
//    affine.for %i = 0 to 32 {
//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
//        affine.load %A[%ii]
//      }
//    }
//
// Region:  {memref = %A, write = false, {%i <= m0 <= %i + 7} }
// The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
//
struct MemRefRegion {};

/// Returns the size of a memref with element type int or float in bytes if it's
/// statically shaped, std::nullopt otherwise.
std::optional<uint64_t> getIntOrFloatMemRefSizeInBytes(MemRefType memRefType);

/// Checks a load or store op for an out of bound access; returns failure if the
/// access is out of bounds along any of the dimensions, success otherwise.
/// Emits a diagnostic error (with location information) if emitError is true.
template <typename LoadOrStoreOpPointer>
LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
                                      bool emitError = true);

/// Returns the number of surrounding loops common to both A and B.
unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b);

/// Gets the memory footprint of all data touched in the specified memory space
/// in bytes; if the memory space is unspecified, considers all memory spaces.
std::optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp,
                                               int memorySpace = -1);

/// Returns the memref's element type's size in bytes where the elemental type
/// is an int or float or a vector of such types.
std::optional<int64_t> getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType);

/// Simplify the integer set by simplifying the underlying affine expressions by
/// flattening and some simple inference. Also, drop any duplicate constraints.
/// Returns the simplified integer set. This method runs in time linear in the
/// number of constraints.
IntegerSet simplifyIntegerSet(IntegerSet set);

/// Returns the innermost common loop depth for the set of operations in 'ops'.
unsigned getInnermostCommonLoopDepth(
    ArrayRef<Operation *> ops,
    SmallVectorImpl<AffineForOp> *surroundingLoops = nullptr);

/// Try to simplify the given affine.min or affine.max op to an affine map with
/// a single result and operands, taking into account the specified constraint
/// set. Return failure if no simplified version could be found.
FailureOr<AffineValueMap>
simplifyConstrainedMinMaxOp(Operation *op,
                            FlatAffineValueConstraints constraints);

} // namespace affine
} // namespace mlir

#endif // MLIR_DIALECT_AFFINE_ANALYSIS_UTILS_H