//===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This header file defines prototypes for various loop fusion utility // methods: these are not passes by themselves but are used either by passes, // optimization sequences, or in turn by other transformation utilities. // //===----------------------------------------------------------------------===// #ifndef MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H #define MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H #include "mlir/IR/Value.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" namespace mlir { class Operation; namespace affine { class AffineForOp; struct ComputationSliceState; struct FusionResult { … }; /// Describes the fusion strategy to be used in the Affine loop fusion /// utilities. Currently, it is used to specialized the loop fusion utilities /// with the assumptions made in the AffineLoopFusion pass for producer-consumer /// and sibling fusion, while sharing a single implementation. The latter /// strategies are also limited to scenarios where a single memref is involved /// in the producer-consume or sibling relationship between the candidate /// loops. We use 'memref' to keep track of such a memref. // TODO: Generalize utilities so that producer-consumer and sibling fusion // strategies can be used without the assumptions made in the AffineLoopFusion // pass. class FusionStrategy { … }; /// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the /// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult /// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are /// in the same block and dependences would not be violated). Otherwise /// returns a FusionResult explaining why fusion is not feasible. /// NOTE: This function is not feature complete and should only be used in /// testing. FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth, ComputationSliceState *srcSlice, FusionStrategy fusionStrategy = FusionStrategy::Generic); /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion /// point and source slice loop bounds specified in 'srcSlice'. /// `isInnermostSiblingInsertionFusion` enables cleanup of `srcForOp that is a /// single-iteration reduction loop being sibling-fused into a 'dstForOp'. void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, const ComputationSliceState &srcSlice, bool isInnermostSiblingInsertionFusion = false); /// LoopNestStats aggregates various per-loop statistics (eg. loop trip count /// and operation count) for a loop nest up until (and including) the innermost /// loop body. struct LoopNestStats { … }; /// Collect loop nest statistics (eg. loop trip count and operation count) /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success, /// returns false otherwise. // TODO: Consider moving this to LoopUtils. bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats); /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'. /// Currently, the total cost is computed by counting the total operation /// instance count (i.e. total number of operations in the loop body * loop /// trip count) for the entire loop nest. int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats); /// Computes and returns in 'computeCost', the total compute cost of fusing the /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently, /// the total cost is computed by counting the total operation instance count /// (i.e. total number of operations in the loop body * loop trip count) for /// the entire loop nest. /// Returns true on success, failure otherwise (e.g. non-constant trip counts). bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats, AffineForOp dstForOp, LoopNestStats &dstStats, const ComputationSliceState &slice, int64_t *computeCost); /// Returns in 'producerConsumerMemrefs' the memrefs involved in a /// producer-consumer dependence between write ops in 'srcOps' and read ops in /// 'dstOps'. void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps, ArrayRef<Operation *> dstOps, DenseSet<Value> &producerConsumerMemrefs); } // namespace affine } // namespace mlir #endif // MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H