#include "mlir/Dialect/NVGPU/Transforms/Passes.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/NVGPU/Transforms/Transforms.h"
#include "mlir/Dialect/NVGPU/Transforms/Utils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/MathExtras.h"
namespace mlir {
namespace nvgpu {
#define GEN_PASS_DEF_OPTIMIZESHAREDMEMORY
#include "mlir/Dialect/NVGPU/Transforms/Passes.h.inc"
}
}
usingnamespacemlir;
usingnamespacemlir::nvgpu;
constexpr int64_t kSharedMemoryLineSizeBytes = …;
constexpr int64_t kDefaultVectorSizeBits = …;
static Value permuteVectorOffset(OpBuilder &b, Location loc,
ArrayRef<Value> indices, MemRefType memrefTy,
int64_t srcDim, int64_t tgtDim) { … }
static void transformIndices(OpBuilder &builder, Location loc,
SmallVector<Value, 4> &indices,
MemRefType memrefTy, int64_t srcDim,
int64_t tgtDim) { … }
static LogicalResult
getShmReadAndWriteOps(Operation *parentOp, Value shmMemRef,
SmallVector<Operation *, 16> &readOps,
SmallVector<Operation *, 16> &writeOps) { … }
llvm::LogicalResult
mlir::nvgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp,
Value memrefValue) { … }
namespace {
class OptimizeSharedMemoryPass
: public nvgpu::impl::OptimizeSharedMemoryBase<OptimizeSharedMemoryPass> { … };
}
std::unique_ptr<Pass> mlir::nvgpu::createOptimizeSharedMemoryPass() { … }