//===- AffineDataCopyGeneration.cpp - Explicit memref copying pass ------*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a pass to automatically promote accessed memref regions // to buffers in a faster memory space that is explicitly managed, with the // necessary data movement operations performed through either regular // point-wise load/store's or DMAs. Such explicit copying (also referred to as // array packing/unpacking in the literature), when done on arrays that exhibit // reuse, results in near elimination of conflict misses, TLB misses, reduced // use of hardware prefetch streams, and reduced false sharing. It is also // necessary for hardware that explicitly managed levels in the memory // hierarchy, and where DMAs may have to be used. This optimization is often // performed on already tiled code. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/MapVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include <algorithm> #include <optional> namespace mlir { namespace affine { #define GEN_PASS_DEF_AFFINEDATACOPYGENERATION #include "mlir/Dialect/Affine/Passes.h.inc" } // namespace affine } // namespace mlir #define DEBUG_TYPE … usingnamespacemlir; usingnamespacemlir::affine; namespace { /// Replaces all loads and stores on memref's living in 'slowMemorySpace' by /// introducing copy operations to transfer data into `fastMemorySpace` and /// rewriting the original load's/store's to instead load/store from the /// allocated fast memory buffers. Additional options specify the identifier /// corresponding to the fast memory space and the amount of fast memory space /// available. The pass traverses through the nesting structure, recursing to /// inner levels if necessary to determine at what depth copies need to be /// placed so that the allocated buffers fit within the memory capacity /// provided. // TODO: We currently can't generate copies correctly when stores // are strided. Check for strided stores. struct AffineDataCopyGeneration : public affine::impl::AffineDataCopyGenerationBase< AffineDataCopyGeneration> { … }; } // namespace /// Generates copies for memref's living in 'slowMemorySpace' into newly created /// buffers in 'fastMemorySpace', and replaces memory operations to the former /// by the latter. Only load op's handled for now. std::unique_ptr<OperationPass<func::FuncOp>> mlir::affine::createAffineDataCopyGenerationPass( unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace, int minDmaTransferSize, uint64_t fastMemCapacityBytes) { … } std::unique_ptr<OperationPass<func::FuncOp>> mlir::affine::createAffineDataCopyGenerationPass() { … } /// Generate copies for this block. The block is partitioned into separate /// ranges: each range is either a sequence of one or more operations starting /// and ending with an affine load or store op, or just an affine.for op (which /// could have other affine for op's nested within). void AffineDataCopyGeneration::runOnBlock(Block *block, DenseSet<Operation *> ©Nests) { … } void AffineDataCopyGeneration::runOnOperation() { … }