//===- LoopFusionUtils.cpp ---- Utilities for loop fusion ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements loop fusion transformation utility functions. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/LoopFusionUtils.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Analysis/TopologicalSortUtils.h" #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <optional> #define DEBUG_TYPE … usingnamespacemlir; usingnamespacemlir::affine; // Gathers all load and store memref accesses in 'opA' into 'values', where // 'values[memref] == true' for each store operation. static void getLoadAndStoreMemRefAccesses(Operation *opA, DenseMap<Value, bool> &values) { … } /// Returns true if 'op' is a load or store operation which access a memref /// accessed 'values' and at least one of the access is a store operation. /// Returns false otherwise. static bool isDependentLoadOrStoreOp(Operation *op, DenseMap<Value, bool> &values) { … } // Returns the first operation in range ('opA', 'opB') which has a data // dependence on 'opA'. Returns 'nullptr' of no dependence exists. static Operation *getFirstDependentOpInRange(Operation *opA, Operation *opB) { … } // Returns the last operation 'opX' in range ('opA', 'opB'), for which there // exists a data dependence from 'opX' to 'opB'. // Returns 'nullptr' of no dependence exists. static Operation *getLastDependentOpInRange(Operation *opA, Operation *opB) { … } // Computes and returns an insertion point operation, before which the // the fused <srcForOp, dstForOp> loop nest can be inserted while preserving // dependences. Returns nullptr if no such insertion point is found. static Operation *getFusedLoopNestInsertionPoint(AffineForOp srcForOp, AffineForOp dstForOp) { … } // Gathers all load and store ops in loop nest rooted at 'forOp' into // 'loadAndStoreOps'. static bool gatherLoadsAndStores(AffineForOp forOp, SmallVectorImpl<Operation *> &loadAndStoreOps) { … } /// Returns the maximum loop depth at which we could fuse producer loop /// 'srcForOp' into consumer loop 'dstForOp' without violating data dependences. // TODO: Generalize this check for sibling and more generic fusion scenarios. // TODO: Support forward slice fusion. static unsigned getMaxLoopDepth(ArrayRef<Operation *> srcOps, ArrayRef<Operation *> dstOps) { … } // TODO: This pass performs some computation that is the same for all the depths // (e.g., getMaxLoopDepth). Implement a version of this utility that processes // all the depths at once or only the legal maximal depth for maximal fusion. FusionResult mlir::affine::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth, ComputationSliceState *srcSlice, FusionStrategy fusionStrategy) { … } /// Patch the loop body of a forOp that is a single iteration reduction loop /// into its containing block. static LogicalResult promoteSingleIterReductionLoop(AffineForOp forOp, bool siblingFusionUser) { … } /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point /// and source slice loop bounds specified in 'srcSlice'. void mlir::affine::fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, const ComputationSliceState &srcSlice, bool isInnermostSiblingInsertion) { … } /// Collect loop nest statistics (eg. loop trip count and operation count) /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success, /// returns false otherwise. bool mlir::affine::getLoopNestStats(AffineForOp forOpRoot, LoopNestStats *stats) { … } // Computes the total cost of the loop nest rooted at 'forOp'. // Currently, the total cost is computed by counting the total operation // instance count (i.e. total number of operations in the loop bodyloop // operation count * loop trip count) for the entire loop nest. // If 'tripCountOverrideMap' is non-null, overrides the trip count for loops // specified in the map when computing the total op instance count. // NOTEs: 1) This is used to compute the cost of computation slices, which are // sliced along the iteration dimension, and thus reduce the trip count. // If 'computeCostMap' is non-null, the total op count for forOps specified // in the map is increased (not overridden) by adding the op count from the // map to the existing op count for the for loop. This is done before // multiplying by the loop's trip count, and is used to model the cost of // inserting a sliced loop nest of known cost into the loop's body. // 2) This is also used to compute the cost of fusing a slice of some loop nest // within another loop. static int64_t getComputeCostHelper( Operation *forOp, LoopNestStats &stats, llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountOverrideMap, DenseMap<Operation *, int64_t> *computeCostMap) { … } /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'. /// Currently, the total cost is computed by counting the total operation /// instance count (i.e. total number of operations in the loop body * loop /// trip count) for the entire loop nest. int64_t mlir::affine::getComputeCost(AffineForOp forOp, LoopNestStats &stats) { … } /// Computes and returns in 'computeCost', the total compute cost of fusing the /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently, /// the total cost is computed by counting the total operation instance count /// (i.e. total number of operations in the loop body * loop trip count) for /// the entire loop nest. bool mlir::affine::getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats, AffineForOp dstForOp, LoopNestStats &dstStats, const ComputationSliceState &slice, int64_t *computeCost) { … } /// Returns in 'producerConsumerMemrefs' the memrefs involved in a /// producer-consumer dependence between write ops in 'srcOps' and read ops in /// 'dstOps'. void mlir::affine::gatherProducerConsumerMemrefs( ArrayRef<Operation *> srcOps, ArrayRef<Operation *> dstOps, DenseSet<Value> &producerConsumerMemrefs) { … }