//===- Transforms.cpp - Linalg transformations as patterns ----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements logic and helpers to expose Linalg transforms as rewrite // patterns. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/SCF/Transforms/Transforms.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" #include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <type_traits> #include <utility> #define DEBUG_TYPE … usingnamespacemlir; usingnamespacemlir::linalg; #define DBGS() … #define DBGSNL() … //===----------------------------------------------------------------------===// // Transformations exposed as functional-style API calls. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // peelLoop transformation. //===----------------------------------------------------------------------===// /// Try to peel and canonicalize loop `op` and return the new result. /// Also applies affine_min/max bounds simplification on the fly where relevant. // TODO: Add support for scf.parallel and affine.for loops. SmallVector<Value> mlir::linalg::peelLoop(RewriterBase &rewriter, Operation *op) { … } /// Peel 'loops' and applies affine_min/max bounds simplification on the fly /// where relevant. void mlir::linalg::peelLoops(RewriterBase &rewriter, ArrayRef<scf::ForOp> loops) { … } //===----------------------------------------------------------------------===// // pack transformation. //===----------------------------------------------------------------------===// #ifndef NDEBUG /// Return true if `map` has 0 or 1 result function of AffineDimExpr(dim). static bool hasAtMostOneResultFunctionOfDim(AffineMap map, int64_t dim) { bool found = false; for (AffineExpr e : map.getResults()) { if (!e.isFunctionOfDim(dim)) continue; if (found) return false; found = true; } return true; } #endif // NDEBUG /// Return the index of the first result of `map` that is a function of /// AffineDimExpr(dim), std::nullopt otherwise. static std::optional<int64_t> getFirstResultIndexFunctionOf(AffineMap map, int64_t dim) { … } /// Perform one step of packing of a LinalgOp's metadata along `dim` into the /// `newDim` at `iteratorTypes.size()` by: /// 1. Appending `iteratorTypes[newDim]`, equal to `iteratorTypes[dim]`. /// 2. Appending a `newDim` to the domain of every indexing map. /// 3. For each operand (i.e. for each map in `indexingMaps`), perform packing /// by potentially adding a `newDim` result to `map`. /// The preserved invariant is that `iteratorTypes.size()` is always equal to /// `map.getNumDims()` for every map in `indexingMaps`. /// /// Update `indexingMaps` and `iteratorTypes` inplace as one step of the update. /// Return a vector that records the optional packing for each operand. /// Return failure if the packed indexing cannot be represented with a LinalgOp. /// /// Further details: /// ================ /// The current implementation of packing (i.e. data tiling) consists of /// rewriting a linearized strip-mined form into a higher-dimensional access. /// e.g. consider an access `A[I][f(j, k, l)]` and packing by 4; we rewrite /// `I` into `4 * i + ii`, where `0 <= ii < 4`. /// The access is further rewritten as `A[i][f(j, k, l)][ii]`. /// /// This rewrite into higher dimensional access is not possible for general /// AffineExpr in Linalg atm, it is restricted to an AffineDimExpr: /// e.g. consider an access `A[I + J][f(j, k, l)]` and packing by 4; we /// rewrite `I + J` into `4 * i + ii + J`, where `0 <= ii < 4`. /// The rewrite of the access would be a form not representable in Linalg: /// `A[i + (ii + J) / 4][f(j, k, l)][(ii + J) % 4]`. /// Note however that as `J` and `ii` iterate, the accesses do not have a /// particular alignment, so packing does not achieve alignment in this case /// /// In the future, we may want to consider a mixed-form that allows some /// alignment in the presence of multiple accesses: /// `A[I][f(j, k, l)]` and `B[I + J][f(j, k, l)]` /// And would rewrite accesses as: /// `A[i][f(j, k, l)][ii]` and `B[4 * i + ii + J][f(j, k, l)]` static FailureOr<SmallVector<std::optional<int64_t>>> packLinalgMetadataOnce(SmallVectorImpl<AffineMap> &indexingMaps, SmallVectorImpl<utils::IteratorType> &iteratorTypes, int64_t dim) { … } namespace { /// Helper struct to encode packing along one dimension of a LinalgOp. struct PackedOperandsDim { … }; /// Helper struct to encode packing along all dimensions of a LinalgOp. struct PackedOperandsDimList { … }; } // namespace FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter, tensor::PackOp packOp) { … } FailureOr<LowerUnPackOpResult> linalg::lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp) { … } SmallVector<int64_t> PackedOperandsDimList::extractPackedDimsForOperand(int64_t operandPos) { … } SmallVector<OpFoldResult> PackedOperandsDimList::extractPackSizesForOperand(int64_t operandPos) { … } /// Implement packing of a single LinalgOp by performing packing by /// `packedSizes`. There must be one packedSizes entry per `linalgOp` iterator. /// Return the packed Linalg op on success, failure otherwise. FailureOr<PackResult> linalg::pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef<OpFoldResult> packedSizes) { … } //===----------------------------------------------------------------------===// // packTranspose transformation. //===----------------------------------------------------------------------===// /// Return a copy of `tensorType` after permutation by `permutationVector`. // Note: Should be a new method in of MemRef/RankedTensor/VectorType::Builder // but this would introduce a dependence on Dialect in IR. // TODO: Restructure. static RankedTensorType permuteShape(RankedTensorType tensorType, ArrayRef<int64_t> permutationVector) { … } /// Return a new GenericOp obtained by transposing opOperand by the permutation /// vector: /// - the corresponding indexing map is transposed by `permutation` /// - the corresponding operand value is replaced by `transposedValue` /// `linalgOp` is replaced by the return op in the process. /// Asserts that `transposedValue` is of the proper transposed ShapedType. static LinalgOp transposeOneLinalgOperandAndReplace( RewriterBase &rewriter, LinalgOp linalgOp, OpOperand &opOperand, ArrayRef<int64_t> permutation, Value transposedValue) { … } FailureOr<PackTransposeResult> linalg::packTranspose(RewriterBase &rewriter, tensor::PackOp packOp, linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp, ArrayRef<int64_t> outerPerm, ArrayRef<int64_t> innerPerm) { … } //===----------------------------------------------------------------------===// // packMatmulGreedily transformation. //===----------------------------------------------------------------------===// /// Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m /// and n are proper parallel dimensions and k is a proper reduction /// dimension. Packing occurs by rewriting the op as a linalg.generic and /// calling linalg::pack by `mnkPackedSizes`. The order of the packed /// dimensions is customizable: the `mnkOrder` is a permutation of {0, 1, 2} /// to reorder {m, n, k} into one of the 8 possible forms. The outer /// dimensions of the operands are not permuted at this time, this is left for /// future work. FailureOr<PackResult> linalg::packMatmulGreedily(RewriterBase &rewriter, LinalgOp linalgOp, ArrayRef<OpFoldResult> mnkPackedSizes, ArrayRef<int64_t> mnkPaddedSizesNextMultipleOf, ArrayRef<int64_t> mnkOrder) { … } //===----------------------------------------------------------------------===// // Transformations exposed as rewrite patterns. //===----------------------------------------------------------------------===// LinalgTilingOptions & mlir::linalg::LinalgTilingOptions::setTileSizes(ArrayRef<int64_t> ts) { … } LogicalResult mlir::linalg::CopyVectorizationPattern::matchAndRewrite( memref::CopyOp copyOp, PatternRewriter &rewriter) const { … } /// Filling `dest` using FillOp constant padding value if possible. /// Otherwise, generate a tensor::GenerateOp. Value GeneralizePadOpPattern::createFillOrGenerateOp( RewriterBase &rewriter, tensor::PadOp padOp, Value dest, const SmallVector<Value> &dynSizes) const { … } LogicalResult GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const { … } LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const { … } /// If padding value is set, returns a tensor.pad Op for the source tensor, /// with the output shape matching the output of `packOp`. Otherwise, returns /// the source directly. /// /// This method assumes that all outer dims for this pack Op are 1. static Value getPackOpSourceOrPaddedSource(OpBuilder &builder, tensor::PackOp packOp) { … } // Normalizes a permutation on a higher rank space to its actual size, e.g. // perm = [1, 4, 2] // becomes // norm = [0, 2, 1] static SmallVector<int64_t> getPackUnpackNormalizedPerm(int rank, ArrayRef<int64_t> perm) { … } // Gets the normalized permutation implied by innerDimsPos and outerDimsPerm // assuming rank reduction of unit outer dims. static SmallVector<int64_t> getPackUnpackRankReducedPerm(ArrayRef<int64_t> shape, ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm) { … } LogicalResult GeneralizeOuterUnitDimsPackOpPattern::matchAndRewrite( tensor::PackOp packOp, PatternRewriter &rewriter) const { … } LogicalResult GeneralizeOuterUnitDimsUnPackOpPattern::matchAndRewrite( tensor::UnPackOp unpackOp, PatternRewriter &rewriter) const { … } // The following are patterns for downscaling convolution ops with size-1 // window dimensions. // // Note that we'd eventually want to write such transformations in a generic // way, e.g., converting to linalg.generic, removing the size-1 dimensions, // and then turning back to named ops. But for now it's fine to have a few // patterns matching special ops to get started. template <typename Conv2DOp, typename Conv1DOp> FailureOr<Conv1DOp> DownscaleSizeOneWindowed2DConvolution<Conv2DOp, Conv1DOp>:: returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const { … } template struct linalg::DownscaleSizeOneWindowed2DConvolution<Conv2DNhwcHwcfOp, Conv1DNwcWcfOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution<Conv2DNchwFchwOp, Conv1DNcwFcwOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNhwcSumOp, PoolingNwcSumOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNchwSumOp, PoolingNcwSumOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMaxOp, PoolingNwcMaxOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution< PoolingNhwcMaxUnsignedOp, PoolingNwcMaxUnsignedOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNhwcMinOp, PoolingNwcMinOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution< PoolingNhwcMinUnsignedOp, PoolingNwcMinUnsignedOp>; template struct linalg::DownscaleSizeOneWindowed2DConvolution<PoolingNchwMaxOp, PoolingNcwMaxOp>; FailureOr<DepthwiseConv1DNwcWcOp> DownscaleDepthwiseConv2DNhwcHwcOp::returningMatchAndRewrite( DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const { … } FailureOr<Conv1DOp> DownscaleConv2DOp::returningMatchAndRewrite(Conv2DOp convOp, PatternRewriter &rewriter) const { … } void linalg::populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit) { … }