llvm/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h

//===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This header file defines prototypes for various loop fusion utility
// methods: these are not passes by themselves but are used either by passes,
// optimization sequences, or in turn by other transformation utilities.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
#define MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H

#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"

namespace mlir {
class Operation;

namespace affine {
class AffineForOp;
struct ComputationSliceState;

struct FusionResult {};

/// Describes the fusion strategy to be used in the Affine loop fusion
/// utilities. Currently, it is used to specialized the loop fusion utilities
/// with the assumptions made in the AffineLoopFusion pass for producer-consumer
/// and sibling fusion, while sharing a single implementation. The latter
/// strategies are also limited to scenarios where a single memref is involved
/// in the producer-consume or sibling relationship between the candidate
/// loops. We use 'memref' to keep track of such a memref.
// TODO: Generalize utilities so that producer-consumer and sibling fusion
// strategies can be used without the assumptions made in the AffineLoopFusion
// pass.
class FusionStrategy {};

/// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the
/// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult
/// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are
/// in the same block and dependences would not be violated). Otherwise
/// returns a FusionResult explaining why fusion is not feasible.
/// NOTE: This function is not feature complete and should only be used in
/// testing.
FusionResult
canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth,
             ComputationSliceState *srcSlice,
             FusionStrategy fusionStrategy = FusionStrategy::Generic);

/// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion
/// point and source slice loop bounds specified in 'srcSlice'.
/// `isInnermostSiblingInsertionFusion` enables cleanup of `srcForOp that is a
/// single-iteration reduction loop being sibling-fused into a 'dstForOp'.
void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
               const ComputationSliceState &srcSlice,
               bool isInnermostSiblingInsertionFusion = false);

/// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
/// and operation count) for a loop nest up until (and including) the innermost
/// loop body.
struct LoopNestStats {};

/// Collect loop nest statistics (eg. loop trip count and operation count)
/// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
/// returns false otherwise.
// TODO: Consider moving this to LoopUtils.
bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats);

/// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.
/// Currently, the total cost is computed by counting the total operation
/// instance count (i.e. total number of operations in the loop body * loop
/// trip count) for the entire loop nest.
int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats);

/// Computes and returns in 'computeCost', the total compute cost of fusing the
/// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently,
/// the total cost is computed by counting the total operation instance count
/// (i.e. total number of operations in the loop body * loop trip count) for
/// the entire loop nest.
/// Returns true on success, failure otherwise (e.g. non-constant trip counts).
bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
                          AffineForOp dstForOp, LoopNestStats &dstStats,
                          const ComputationSliceState &slice,
                          int64_t *computeCost);

/// Returns in 'producerConsumerMemrefs' the memrefs involved in a
/// producer-consumer dependence between write ops in 'srcOps' and read ops in
/// 'dstOps'.
void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps,
                                   ArrayRef<Operation *> dstOps,
                                   DenseSet<Value> &producerConsumerMemrefs);

} // namespace affine
} // namespace mlir

#endif // MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H