//===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the GPU dialect kernel outlining pass. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/AsmParser/AsmParser.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Transforms/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/RegionUtils.h" #include <limits> namespace mlir { #define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONS #define GEN_PASS_DEF_GPUKERNELOUTLINING #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" } // namespace mlir usingnamespacemlir; template <typename OpTy> static void createForAllDimensions(OpBuilder &builder, Location loc, SmallVectorImpl<Value> &values) { … } /// Adds operations generating block/thread ids and grid/block dimensions at the /// beginning of the `launchFuncOpBody` region. Add mapping from argument in /// entry block of `launchOpBody`, to the corresponding result value of the /// added operations. static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, Region &launchOpBody, IRMapping &map, bool hasCluster = false) { … } /// Identifies operations that are beneficial to sink into kernels. These /// operations may not have side-effects, as otherwise sinking (and hence /// duplicating them) is not legal. static bool isLikelyAnIndexComputation(Operation *op) { … } /// For a given operation `op`, computes whether it is beneficial to sink the /// operation into the kernel. An operation can be sunk if doing so does not /// introduce new kernel arguments. Whether a value is already available in the /// kernel (and hence does not introduce new arguments) is checked by /// querying `existingDependencies` and `availableValues`. /// If an operand is not yet available, we recursively check whether it can be /// made available by siking its defining op. /// Operations that are indentified for sinking are added to `beneficiaryOps` in /// the order they should appear in the kernel. Furthermore, `availableValues` /// is updated with results that will be available after sinking the identified /// ops. static bool extractBeneficiaryOps( Operation *op, const SetVector<Value> &existingDependencies, SetVector<Operation *> &beneficiaryOps, llvm::SmallPtrSetImpl<Value> &availableValues, llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) { … } LogicalResult mlir::sinkOperationsIntoLaunchOp( gpu::LaunchOp launchOp, llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) { … } /// Return the provided KernelDim3 as an array of i32 constants if possible. static DenseI32ArrayAttr maybeConstantDimsAttr(gpu::KernelDim3 dims) { … } /// Outline the `gpu.launch` operation body into a kernel function. Replace /// `gpu.terminator` operations by `gpu.return` in the generated function. /// Set block and grid size bounds if known. static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, StringRef kernelFnName, SetVector<Value> &operands) { … } gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp, StringRef kernelFnName, llvm::SmallVectorImpl<Value> &operands) { … } /// Replace `gpu.launch` operations with an `gpu.launch_func` operation /// launching `kernelFunc`. The kernel func contains the body of the /// `gpu.launch` with constant region arguments inlined. static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, gpu::GPUFuncOp kernelFunc, ValueRange operands) { … } namespace { /// Pass that moves ops which are likely an index computation into gpu.launch /// body. class GpuLaunchSinkIndexComputationsPass : public impl::GpuLaunchSinkIndexComputationsBase< GpuLaunchSinkIndexComputationsPass> { … }; /// Pass that moves the kernel of each LaunchOp into its separate nested module. /// /// This pass moves the kernel code of each LaunchOp into a function created /// inside a nested module. It also creates an external function of the same /// name in the parent module. /// /// The gpu.modules are intended to be compiled to a cubin blob independently in /// a separate pass. The external functions can then be annotated with the /// symbol of the cubin accessor function. class GpuKernelOutliningPass : public impl::GpuKernelOutliningBase<GpuKernelOutliningPass> { … }; } // namespace std::unique_ptr<Pass> mlir::createGpuLauchSinkIndexComputationsPass() { … } std::unique_ptr<OperationPass<ModuleOp>> mlir::createGpuKernelOutliningPass(StringRef dataLayoutStr) { … }