#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
#include <type_traits>
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Region.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#define DEBUG_TYPE …
#define DBGS() …
#define DBGSNL() …
namespace mlir {
#define GEN_PASS_DEF_CONVERTVECTORTOGPU
#include "mlir/Conversion/Passes.h.inc"
}
usingnamespacemlir;
template <typename TransferOpType>
static void getXferIndices(RewriterBase &rewriter, TransferOpType xferOp,
AffineMap offsetMap, ArrayRef<Value> dimValues,
SmallVector<Value, 4> &indices) { … }
static bool contractSupportsMMAMatrixType(vector::ContractionOp contract,
bool useNvGpu) { … }
static bool isTransposeMatrixLoadMap(AffineMap permutationMap) { … }
static std::optional<int64_t> getStaticallyKnownRowStride(ShapedType type) { … }
static bool transferReadSupportsMMAMatrixType(vector::TransferReadOp readOp) { … }
static bool
transferWriteSupportsMMAMatrixType(vector::TransferWriteOp writeOp) { … }
static bool constantSupportsMMAMatrixType(arith::ConstantOp constantOp) { … }
static bool broadcastSupportsMMAMatrixType(vector::BroadcastOp broadcastOp) { … }
template <typename ExtOpTy>
static bool integerExtendSupportsMMAMatrixType(ExtOpTy extOp) { … }
static bool fpExtendSupportsMMAMatrixType(arith::ExtFOp extOp) { … }
static std::optional<gpu::MMAElementwiseOp>
convertElementwiseOpToMMA(Operation *op) { … }
static bool elementwiseSupportsMMAMatrixType(Operation *op) { … }
static bool
extractStridedSliceSupportsMMAMatrixType(vector::ExtractStridedSliceOp op) { … }
static bool supportsMMaMatrixType(Operation *op, bool useNvGpu) { … }
static SetVector<Operation *>
getSliceContract(Operation *op,
const BackwardSliceOptions &backwardSliceOptions,
const ForwardSliceOptions &forwardSliceOptions) { … }
static SetVector<Operation *> getOpToConvert(mlir::Operation *op,
bool useNvGpu) { … }
namespace {
struct PrepareContractToGPUMMA
: public OpRewritePattern<vector::ContractionOp> { … };
struct CombineTransferReadOpTranspose final
: public OpRewritePattern<vector::TransposeOp> { … };
}
static const char *inferFragType(Operation *op) { … }
static LogicalResult
convertTransferReadOp(RewriterBase &rewriter, vector::TransferReadOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertTransferWriteOp(RewriterBase &rewriter, vector::TransferWriteOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static VectorType
getMmaSyncVectorOperandType(const nvgpu::FragmentElementInfo ®Info) { … }
static LogicalResult
convertConstantOpMmaSync(RewriterBase &rewriter, arith::ConstantOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static FailureOr<bool> isTransposed(vector::TransferReadOp op) { … }
static LogicalResult
creatLdMatrixCompatibleLoads(RewriterBase &rewriter, vector::TransferReadOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
createNonLdMatrixLoads(RewriterBase &rewriter, vector::TransferReadOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static bool isSharedMemory(MemRefType type) { … }
static LogicalResult
convertTransferReadToLoads(RewriterBase &rewriter, vector::TransferReadOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertTransferWriteToStores(RewriterBase &rewriter, vector::TransferWriteOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
SmallVectorImpl<int64_t> &results) { … }
static LogicalResult
convertExtractStridedSlice(RewriterBase &rewriter,
vector::ExtractStridedSliceOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertContractOp(RewriterBase &rewriter, vector::ContractionOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertContractOpToMmaSync(RewriterBase &rewriter, vector::ContractionOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertConstantOp(RewriterBase &rewriter, arith::ConstantOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertBroadcastOp(RewriterBase &rewriter, vector::BroadcastOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static scf::ForOp replaceForOpWithNewSignature(RewriterBase &rewriter,
scf::ForOp loop,
ValueRange newInitArgs) { … }
static LogicalResult convertForOp(RewriterBase &rewriter, scf::ForOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertYieldOp(RewriterBase &rewriter, scf::YieldOp op,
llvm::DenseMap<Value, Value> &valueMapping) { … }
static LogicalResult
convertElementwiseOp(RewriterBase &rewriter, Operation *op,
gpu::MMAElementwiseOp opType,
llvm::DenseMap<Value, Value> &valueMapping) { … }
void mlir::populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns,
bool useNvGpu) { … }
LogicalResult mlir::convertVectorToMMAOps(RewriterBase &rewriter,
Operation *rootOp) { … }
LogicalResult mlir::convertVectorToNVVMCompatibleMMASync(RewriterBase &rewriter,
Operation *rootOp) { … }
namespace {
struct ConvertVectorToGPUPass
: public impl::ConvertVectorToGPUBase<ConvertVectorToGPUPass> { … };
}
std::unique_ptr<Pass> mlir::createConvertVectorToGPUPass(bool useNvGpu) { … }