//===- AsyncRegionRewriter.cpp - Implementation of GPU async rewriters ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the GPU dialect pattern rewriters that make GPU op // within a region execute asynchronously. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/Async/IR/Async.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Transforms/Utils.h" #include "mlir/IR/Builders.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/TypeSwitch.h" namespace mlir { #define GEN_PASS_DEF_GPUASYNCREGIONPASS #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" } // namespace mlir usingnamespacemlir; namespace { class GpuAsyncRegionPass : public impl::GpuAsyncRegionPassBase<GpuAsyncRegionPass> { … }; } // namespace static bool isTerminator(Operation *op) { … } static bool hasSideEffects(Operation *op) { … } // Region walk callback which makes GPU ops implementing the AsyncOpInterface // execute asynchronously. struct GpuAsyncRegionPass::ThreadTokenCallback { … }; /// Erases `executeOp` and returns a clone with additional `results`. async::ExecuteOp addExecuteResults(async::ExecuteOp executeOp, ValueRange results) { … } // Callback for `async.execute` ops which tries to push the contained // synchronous `gpu.wait` op to the dependencies of the `async.execute`. struct GpuAsyncRegionPass::DeferWaitCallback { … }; // Callback for `async.execute` ops which repeats !gpu.async.token results // so that each of them is only used once. struct GpuAsyncRegionPass::SingleTokenUseCallback { … }; // Replaces synchronous GPU ops in the op's region with asynchronous ones and // inserts the necessary synchronization (as gpu.wait ops). Assumes sequential // execution semantics and that no GPU ops are asynchronous yet. void GpuAsyncRegionPass::runOnOperation() { … } std::unique_ptr<OperationPass<func::FuncOp>> mlir::createGpuAsyncRegionPass() { … }