//===- CreateAsyncGroups.cpp - Create async device copies -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "mlir/Dialect/NVGPU/Transforms/Transforms.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" #include "mlir/Dialect/NVGPU/Transforms/Utils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" usingnamespacemlir; /// Return "true" if the given vector transfer op is contiguous and suitable /// for replacement with an async copy. template <typename OpTy> static bool isContiguousXferOp(OpTy op) { … } /// Return "true" if the given op is a contiguous and suitable /// vector.transfer_write or vector.store op. static bool isContiguousStore(Operation *write) { … } /// Return "true" if the given op is a contiguous and suitable /// vector.transfer_read or vector.load op. static bool isContiguousRead(Operation *read) { … } namespace { /// A vector.create_mask op and extract position. struct TransferMask { … }; } // namespace /// If the given vector load op has a mask that is defined by /// vector.create_mask, return that op. static FailureOr<TransferMask> getMaskOp(Operation *loadOp) { … } /// Build an SSA value that represents the number of read elements. static Value buildNumReadElements(OpBuilder &b, Location loc, Operation *readOp) { … } /// Return "true" if the conversion to async copy is supported by "async copy". static bool resultsInSupportedAsyncCopy(MemRefType memrefType, VectorType vecType) { … } void nvgpu::createAsyncGroups(RewriterBase &rewriter, Operation *op, bool bypassL1) { … }