//===- HoistPadding.cpp - Hoisting for tensor::PadOp ----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements functions concerned with hoisting padding operations. // //===----------------------------------------------------------------------===// #include "mlir/Analysis/Presburger/IntegerRelation.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/Transforms/Transforms.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Transforms/Hoisting.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/Matchers.h" #include "mlir/Interfaces/DestinationStyleOpInterface.h" #include "mlir/Transforms/LoopInvariantCodeMotionUtils.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/Support/Debug.h" dbgs; #define DEBUG_TYPE … #define DBGS() … usingnamespacemlir; usingnamespacemlir::linalg; usingnamespacemlir::linalg::detail; #ifndef NDEBUG static bool debugPrintLoopInShortForm(Operation *op) { AsmState state(op->getParentOfType<func::FuncOp>()); (void)state; if (auto forOp = dyn_cast<scf::ForOp>(op)) { forOp.getInductionVar().printAsOperand(dbgs(), state); dbgs() << " @ " << forOp.getOperation(); return true; } return false; } #endif static void debugPrintBackwardSlice(SetVector<Operation *> &backwardSlice) { … } /// Return at most nLevels of immediately enclosing scf::ForOp loops. /// Stops at the first parent that is not an scf::ForOp. /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm. /// Control-flow and other containing ops with regions are not modeled atm. static void getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels, SmallVector<scf::ForOp> &reverseEnclosingLoops) { … } /// Return at most nLevels of immediately enclosing scf::ForOp loops. /// Stops at the first parent that is not an scf::ForOp. /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm. /// Control-flow and other containing ops with regions are not modeled atm. static void getEnclosingLoopsUntil(tensor::PadOp padOp, scf::ForOp untilLoop, SmallVector<scf::ForOp> &reverseEnclosingLoops) { … } // Get all the ops in the backwards slice starting from `padOp` and that // are dominated by the outermost enclosing loop. // This also requires tracking ops defining values used in the region but // defined above. static void computeBackwardSlice(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp, SetVector<Operation *> &backwardSlice) { … } //===----------------------------------------------------------------------===// // HoistPaddingAnalysis Implementation. //===----------------------------------------------------------------------===// namespace { /// Analysis class to support tensor::PadOp hoisting across multiple enclosing /// loops. The failure conditions are: /// 1. Pad op has a use that is not an input of a LinalgOp. /// 2. Pad op does not have a constant padding value. /// 3. There is no immediately enclosing scf::ForOp. /// 4. The backward slice from the pad op to the scf::ForOp to hoist above /// contains an unknown op with non index type operands, a region, or a /// memory effect. /// 5. The backward slice from the pad op to the scf::ForOp to hoist above is /// empty. /// 6. The source tensor of pad op is not defined by an extract slice op. /// 7. The source tensor of the extract slice op is not defined outside of /// the outermost enclosing scf::ForOp. /// 8. There is no enclosing scf::ForOp that indexes the padded data. /// Other cases succeed and will trigger hoisting of the pad op. struct HoistPaddingAnalysis { … }; } // namespace HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp, int numLoops) : … { … } HoistPaddingAnalysis::HoistPaddingAnalysis(tensor::PadOp padOp, scf::ForOp outermostEnclosingForOp) : … { … } void HoistPaddingAnalysis::enableHoistPadding(RewriterBase &rewriter) { … } void HoistPaddingAnalysis::finalizeHoistPaddingAnalysis() { … } LogicalResult HoistPaddingAnalysis::dropNonIndexDependencies() { … } SmallVector<Value> HoistPaddingAnalysis::getHoistedPackedTensorSizes(RewriterBase &rewriter, Location loc) const { … } static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) { … } //===----------------------------------------------------------------------===// // buildPackingLoopNest Implementation. //===----------------------------------------------------------------------===// /// Return the current iteration number in the loop (iv - lb).ceilDiv(step). /// The returned Value is guaranteed not to depend on any loop comprised in /// [`outer`, `forOp`]. /// Return null if such a loop-independent quantity cannot be computed. static Value buildLoopIterationCount(RewriterBase &rewriter, scf::ForOp outer, scf::ForOp forOp) { … } // Build a packing loop nest by iteratively traversing the backward slice and // clone the operations, iteratively stepping into the loops that we encounter. // The implementation proceeds in a stack-like fashion: // 1. Iteratively clone and step into the loops, pushing the // `hoistedPackedTensor` // deeper in the stack. // 2. At the innermost loop level, create a GenericOp if `transposeVector` is // non-empty. // 3. At the innermost loop level, create a InsertSliceOp. // 4. Iteratively pop and yield the result of the InsertSliceOp across the // cloned loops. static FailureOr<PackingResult> buildPackingLoopNestImpl( RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef<int64_t> transposeVector, RankedTensorType transposedTensorType, tensor::EmptyOp emptyOp, const HoistPaddingAnalysis &analysis) { … } /// Build the packing loop nest required to hoist `opToHoist` above /// `outermostEnclosingForOp`. /// The loop nest is built just before `outermostEnclosingForOp`. static FailureOr<PackingResult> buildPackingLoopNestImpl( RewriterBase &rewriter, IRMapping &bvm, tensor::PadOp opToHoist, ArrayRef<int64_t> transposeVector, const HoistPaddingAnalysis &analysis) { … } /// Build the packing loop nest required to hoist `opToHoist` above /// `outermostEnclosingForOp`. /// The loop nest is built just before `outermostEnclosingForOp`. FailureOr<PackingResult> mlir::linalg::detail::buildPackingLoopNest( RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef<int64_t> transposeVector) { … } //===----------------------------------------------------------------------===// // hoistPaddingOnTensors Implementation. //===----------------------------------------------------------------------===// /// Return true if we can walk back the use-def chain from `extractSliceOp` to /// expectedSource going through DestinationStyleOpInterface inits only. /// This is a poor man's analysis that is sufficient to check the extractSliceOp /// the matches tensor.pad we want to hoist. /// In the future, it will be easier to ensure this with a matching symmetric /// tensor.unpad op. static bool tracesBackToExpectedValue(tensor::ExtractSliceOp extractSliceOp, Value expectedSource) { … } /// If the original consumer of `outerSliceOp` was a `forOp` (i.e. through an /// iter arg), propagate the `hoistedPackedTensor` value through the same iter /// arg. /// TODO: for multiple loops we need to track the use to the innermost loop. /// /// Match: /// ``` /// %outerSliceOp = tensor.extract_slice .. /// %f = scf.for ... iter_args(%arg0 = %outerSliceOp) { /// %hoistedPackedTensor = tensor.pad %arg0 /// %1 = compute %hoistedPackedTensor /// %2 = tensor.extract_slice %1 /// scf.yield %2 /// } /// ``` /// /// and rewrite as: /// ``` /// %outerSliceOp = tensor.extract_slice .. /// %hoistedPackedTensor = tensor.pad %outerSliceOp /// %f = scf.for ... iter_args(%arg0 = %hoistedPackedTensor) { /// %1 = compute %arg0 /// scf.yield %1 /// } /// %2 = tensor.extract_slice %forOp /// ``` /// /// Return null when no rewrite happened. static tensor::ExtractSliceOp padThroughLoopIterArg(RewriterBase &rewriter, Value paddedValueBeforeHoisting, Value hoistedPackedTensor, tensor::ExtractSliceOp outerSliceOp, scf::ForOp forOp) { … } /// Produce a tensor extracted from the packingResult. This can be used as a /// replacement for `opToHoist` in callers. static Value replaceByPackingResult(RewriterBase &rewriter, const IRMapping &bvm, tensor::PadOp opToHoist, RankedTensorType transposedTensorType, const HoistPaddingAnalysis &analysis, const PackingResult &packingResult) { … } FailureOr<Value> mlir::linalg::hoistPaddingOnTensors( RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef<int64_t> transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl<TransposeOp> &transposeOps) { … } FailureOr<Value> mlir::linalg::hoistPaddingOnTensors( tensor::PadOp opToHoist, int64_t numLoops, ArrayRef<int64_t> transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl<TransposeOp> &transposeOps) { … }