#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Value.h"
#include "mlir/Pass/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#define DEBUG_TYPE …
#define DBGS() …
#define DBGSE() …
namespace mlir {
#define GEN_PASS_DEF_CONVERTNVGPUTONVVMPASS
#include "mlir/Conversion/Passes.h.inc"
}
usingnamespacemlir;
constexpr int exclude4LSB = …;
static Value truncToI32(ImplicitLocOpBuilder &b, Value value) { … }
static Type inferIntrinsicResultType(Type vectorResultType) { … }
static Value convertIntrinsicResult(Location loc, Type intrinsicResultType,
Type resultType, Value intrinsicResult,
RewriterBase &rewriter) { … }
static SmallVector<Value> unpackOperandVector(ImplicitLocOpBuilder &b,
Value operand,
NVVM::MMATypes operandPtxType) { … }
static bool isMbarrierShared(nvgpu::MBarrierGroupType barrierType) { … }
Attribute nvgpu::getMbarrierMemorySpace(MLIRContext *context,
nvgpu::MBarrierGroupType barrierType) { … }
MemRefType nvgpu::getMBarrierMemrefType(MLIRContext *context,
nvgpu::MBarrierGroupType barrierType) { … }
namespace {
struct MmaLdMatrixOpToNVVM : public ConvertOpToLLVMPattern<nvgpu::LdMatrixOp> { … };
static FailureOr<NVVM::MMATypes> getNvvmMmaType(Type t) { … }
struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern<nvgpu::MmaSyncOp> { … };
struct ConvertNVGPUToNVVMPass
: public impl::ConvertNVGPUToNVVMPassBase<ConvertNVGPUToNVVMPass> { … };
static std::string buildMmaSparseAsmConstraintString(unsigned matASize,
unsigned matBSize,
unsigned matCSize) { … }
static std::string buildMmaSparseAsmString(
const std::array<int64_t, 3> &shape, unsigned matASize, unsigned matBSize,
unsigned matCSize, NVVM::MMATypes ptxTypeA, NVVM::MMATypes ptxTypeB,
NVVM::MMATypes ptxTypeC, NVVM::MMATypes ptxTypeD,
std::optional<NVVM::MMAIntOverflow> overflow, unsigned metaDataSelector) { … }
static FailureOr<LLVM::InlineAsmOp> emitMmaSparseSyncOpAsm(
ImplicitLocOpBuilder &b, NVVM::MMATypes ptxTypeA, NVVM::MMATypes ptxTypeB,
NVVM::MMATypes ptxTypeC, NVVM::MMATypes ptxTypeD,
std::optional<NVVM::MMAIntOverflow> overflow, ArrayRef<Value> unpackedAData,
ArrayRef<Value> unpackedB, ArrayRef<Value> unpackedC, Value indexData,
int64_t metadataSelector, const std::array<int64_t, 3> &shape,
Type intrinsicResultType) { … }
struct NVGPUMmaSparseSyncLowering
: public ConvertOpToLLVMPattern<nvgpu::MmaSparseSyncOp> { … };
struct NVGPUAsyncCopyLowering
: public ConvertOpToLLVMPattern<nvgpu::DeviceAsyncCopyOp> { … };
struct NVGPUAsyncCreateGroupLowering
: public ConvertOpToLLVMPattern<nvgpu::DeviceAsyncCreateGroupOp> { … };
struct NVGPUAsyncWaitLowering
: public ConvertOpToLLVMPattern<nvgpu::DeviceAsyncWaitOp> { … };
struct NVGPUMBarrierCreateLowering
: public ConvertOpToLLVMPattern<nvgpu::MBarrierCreateOp> { … };
template <typename SourceOp>
struct MBarrierBasePattern : public ConvertOpToLLVMPattern<SourceOp> { … };
struct NVGPUMBarrierInitLowering
: public MBarrierBasePattern<nvgpu::MBarrierInitOp> { … };
struct NVGPUMBarrierArriveLowering
: public MBarrierBasePattern<nvgpu::MBarrierArriveOp> { … };
struct NVGPUMBarrierArriveNoCompleteLowering
: public MBarrierBasePattern<nvgpu::MBarrierArriveNoCompleteOp> { … };
struct NVGPUMBarrierTestWaitLowering
: public MBarrierBasePattern<nvgpu::MBarrierTestWaitOp> { … };
struct NVGPUMBarrierArriveExpectTxLowering
: public MBarrierBasePattern<nvgpu::MBarrierArriveExpectTxOp> { … };
struct NVGPUMBarrierTryWaitParityLowering
: public MBarrierBasePattern<nvgpu::MBarrierTryWaitParityOp> { … };
struct NVGPUTmaAsyncLoadOpLowering
: public MBarrierBasePattern<nvgpu::TmaAsyncLoadOp> { … };
struct NVGPUTmaAsyncStoreOpLowering
: public MBarrierBasePattern<nvgpu::TmaAsyncStoreOp> { … };
struct NVGPUGenerateWarpgroupDescriptorLowering
: public ConvertOpToLLVMPattern<nvgpu::WarpgroupGenerateDescriptorOp> { … };
static Value makeI64Const(ImplicitLocOpBuilder &b, int32_t index) { … }
static Value elementTypeAsLLVMConstant(ImplicitLocOpBuilder &b, Type type) { … }
struct NVGPUTmaCreateDescriptorOpLowering
: public ConvertOpToLLVMPattern<nvgpu::TmaCreateDescriptorOp> { … };
struct NVGPUWarpgroupMmaOpLowering
: public ConvertOpToLLVMPattern<nvgpu::WarpgroupMmaOp> { … };
struct NVGPUWarpgroupMmaStoreOpLowering
: public ConvertOpToLLVMPattern<nvgpu::WarpgroupMmaStoreOp> { … };
struct NVGPUWarpgroupMmaInitAccumulatorOpLowering
: public ConvertOpToLLVMPattern<nvgpu::WarpgroupMmaInitAccumulatorOp> { … };
struct NVGPUTmaPrefetchOpLowering
: public ConvertOpToLLVMPattern<nvgpu::TmaPrefetchOp> { … };
struct NVGPURcpOpLowering : public ConvertOpToLLVMPattern<nvgpu::RcpOp> { … };
}
void mlir::populateNVGPUToNVVMConversionPatterns(
const LLVMTypeConverter &converter, RewritePatternSet &patterns) { … }