AffineDataCopyGeneration.cpp | Explore in Territory

//===- AffineDataCopyGeneration.cpp - Explicit memref copying pass ------*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a pass to automatically promote accessed memref regions
// to buffers in a faster memory space that is explicitly managed, with the
// necessary data movement operations performed through either regular
// point-wise load/store's or DMAs. Such explicit copying (also referred to as
// array packing/unpacking in the literature), when done on arrays that exhibit
// reuse, results in near elimination of conflict misses, TLB misses, reduced
// use of hardware prefetch streams, and reduced false sharing. It is also
// necessary for hardware that explicitly managed levels in the memory
// hierarchy, and where DMAs may have to be used. This optimization is often
// performed on already tiled code.
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/Affine/Passes.h"

#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/LoopUtils.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
#include <optional>

namespace mlir {
namespace affine {
#define GEN_PASS_DEF_AFFINEDATACOPYGENERATION
#include "mlir/Dialect/Affine/Passes.h.inc"
} // namespace affine
} // namespace mlir

#define DEBUG_TYPE …

usingnamespacemlir;
usingnamespacemlir::affine;

namespace {

/// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
/// introducing copy operations to transfer data into `fastMemorySpace` and
/// rewriting the original load's/store's to instead load/store from the
/// allocated fast memory buffers. Additional options specify the identifier
/// corresponding to the fast memory space and the amount of fast memory space
/// available. The pass traverses through the nesting structure, recursing to
/// inner levels if necessary to determine at what depth copies need to be
/// placed so that the allocated buffers fit within the memory capacity
/// provided.
// TODO: We currently can't generate copies correctly when stores
// are strided. Check for strided stores.
struct AffineDataCopyGeneration
    : public affine::impl::AffineDataCopyGenerationBase<
          AffineDataCopyGeneration> { … };

} // namespace

/// Generates copies for memref's living in 'slowMemorySpace' into newly created
/// buffers in 'fastMemorySpace', and replaces memory operations to the former
/// by the latter. Only load op's handled for now.
std::unique_ptr<OperationPass<func::FuncOp>>
mlir::affine::createAffineDataCopyGenerationPass(
    unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace,
    int minDmaTransferSize, uint64_t fastMemCapacityBytes) { … }
std::unique_ptr<OperationPass<func::FuncOp>>
mlir::affine::createAffineDataCopyGenerationPass() { … }

/// Generate copies for this block. The block is partitioned into separate
/// ranges: each range is either a sequence of one or more operations starting
/// and ending with an affine load or store op, or just an affine.for op (which
/// could have other affine for op's nested within).
void AffineDataCopyGeneration::runOnBlock(Block *block,
                                          DenseSet<Operation *> &copyNests) { … }

void AffineDataCopyGeneration::runOnOperation() { … }
llvm/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp