#include "mlir/Dialect/NVGPU/Utils/MMAUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
usingnamespacemlir;
usingnamespacemlir::nvgpu;
static constexpr int64_t kThreadsPerRow = …;
static constexpr int64_t kNumRowsPerTile = …;
static bool isAccumulatorOrResult(MatMulOperandRole operandType) { … }
static int64_t inferNumRegistersPerMatrixFragment(const WarpMatrixInfo &type) { … }
static std::array<int64_t, 2> getTileShape(ArrayRef<int64_t> operandShape,
Type elementType,
int64_t lineSizeBits) { … }
FailureOr<vector::ContractionOp> nvgpu::getUserContract(Operation *op) { … }
FailureOr<WarpMatrixInfo> nvgpu::getWarpMatrixInfo(Operation *op) { … }
int64_t nvgpu::inferTileWidthInBits(const WarpMatrixInfo &type) { … }
FailureOr<FragmentElementInfo>
nvgpu::getMmaSyncRegisterType(const WarpMatrixInfo &type) { … }
static AffineMap getRegisterIndexToTileOffsetMap(int64_t lineSize,
Type elementType,
ArrayRef<int64_t> operandShape,
bool isAccumulator,
int64_t elementsPerRegister,
AffineExpr logicalValueId) { … }
FailureOr<AffineMap>
nvgpu::getLaneIdAndValueIdToOperandCoord(OpBuilder &builder, Location loc,
const WarpMatrixInfo &fragmentType) { … }
FailureOr<nvgpu::LdMatrixParams>
nvgpu::getLdMatrixParams(const WarpMatrixInfo &type, bool transpose) { … }
FailureOr<AffineMap>
nvgpu::getLaneIdToLdMatrixMatrixCoord(OpBuilder &builder, Location loc,
const LdMatrixParams ¶ms) { … }
bool nvgpu::canLowerToWarpMatrixOperation(vector::TransferReadOp op) { … }
bool nvgpu::canLowerToWarpMatrixOperation(vector::TransferWriteOp op) { … }