GPUDialect.cpp | Explore in Territory

//===- GPUDialect.cpp - MLIR Dialect for GPU Kernels implementation -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the GPU kernel-related dialect and its operations.
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/IR/GPUDialect.h"

#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Interfaces/FunctionImplementation.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Transforms/InliningUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/StringSaver.h"
#include <cassert>

usingnamespacemlir;
usingnamespacemlir::gpu;

#include "mlir/Dialect/GPU/IR/GPUOpsDialect.cpp.inc"

//===----------------------------------------------------------------------===//
// GPU Device Mapping Attributes
//===----------------------------------------------------------------------===//

int64_t GPUBlockMappingAttr::getMappingId() const { … }

bool GPUBlockMappingAttr::isLinearMapping() const { … }

int64_t GPUBlockMappingAttr::getRelativeIndex() const { … }

int64_t GPUWarpgroupMappingAttr::getMappingId() const { … }

bool GPUWarpgroupMappingAttr::isLinearMapping() const { … }

int64_t GPUWarpgroupMappingAttr::getRelativeIndex() const { … }

int64_t GPUWarpMappingAttr::getMappingId() const { … }

bool GPUWarpMappingAttr::isLinearMapping() const { … }

int64_t GPUWarpMappingAttr::getRelativeIndex() const { … }

int64_t GPUThreadMappingAttr::getMappingId() const { … }

bool GPUThreadMappingAttr::isLinearMapping() const { … }

int64_t GPUThreadMappingAttr::getRelativeIndex() const { … }

int64_t GPUMemorySpaceMappingAttr::getMappingId() const { … }

bool GPUMemorySpaceMappingAttr::isLinearMapping() const { … }

int64_t GPUMemorySpaceMappingAttr::getRelativeIndex() const { … }

//===----------------------------------------------------------------------===//
// MMAMatrixType
//===----------------------------------------------------------------------===//

MMAMatrixType MMAMatrixType::get(ArrayRef<int64_t> shape, Type elementType,
                                 StringRef operand) { … }

MMAMatrixType
MMAMatrixType::getChecked(function_ref<InFlightDiagnostic()> emitError,
                          ArrayRef<int64_t> shape, Type elementType,
                          StringRef operand) { … }

unsigned MMAMatrixType::getNumDims() const { … }

ArrayRef<int64_t> MMAMatrixType::getShape() const { … }

Type MMAMatrixType::getElementType() const { … }

StringRef MMAMatrixType::getOperand() const { … }

bool MMAMatrixType::isValidElementType(Type elementType) { … }

LogicalResult
MMAMatrixType::verifyInvariants(function_ref<InFlightDiagnostic()> emitError,
                                ArrayRef<int64_t> shape, Type elementType,
                                StringRef operand) { … }

//===----------------------------------------------------------------------===//
// GPUDialect
//===----------------------------------------------------------------------===//

bool GPUDialect::isWorkgroupMemoryAddressSpace(Attribute memorySpace) { … }

bool GPUDialect::hasWorkgroupMemoryAddressSpace(MemRefType type) { … }

bool GPUDialect::isKernel(Operation *op) { … }

namespace {
/// This class defines the interface for handling inlining with gpu
/// operations.
struct GPUInlinerInterface : public DialectInlinerInterface { … };
} // namespace

void GPUDialect::initialize() { … }

static std::string getSparseHandleKeyword(SparseHandleKind kind) { … }

Type GPUDialect::parseType(DialectAsmParser &parser) const { … }
// TODO: print refined type here. Notice that should be corresponding to the
// parser
void GPUDialect::printType(Type type, DialectAsmPrinter &os) const { … }

static LogicalResult verifyKnownLaunchSizeAttr(Operation *op,
                                               NamedAttribute attr) { … }

LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
                                                   NamedAttribute attr) { … }

/// Parses an optional list of async operands with an optional leading keyword.
/// (`async`)? (`[` ssa-id-list `]`)?
///
/// This method is used by the tablegen assembly format for async ops as well.
static ParseResult parseAsyncDependencies(
    OpAsmParser &parser, Type &asyncTokenType,
    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &asyncDependencies) { … }

/// Prints optional async dependencies with its leading keyword.
///   (`async`)? (`[` ssa-id-list `]`)?
// Used by the tablegen assembly format for several async ops.
static void printAsyncDependencies(OpAsmPrinter &printer, Operation *op,
                                   Type asyncTokenType,
                                   OperandRange asyncDependencies) { … }

// GPU Memory attributions functions shared by LaunchOp and GPUFuncOp.
/// Parses a GPU function memory attribution.
///
/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
///                        (`private` `(` ssa-id-and-type-list `)`)?
///
/// Note that this function parses only one of the two similar parts, with the
/// keyword provided as argument.
static ParseResult
parseAttributions(OpAsmParser &parser, StringRef keyword,
                  SmallVectorImpl<OpAsmParser::Argument> &args) { … }

/// Prints a GPU function memory attribution.
static void printAttributions(OpAsmPrinter &p, StringRef keyword,
                              ArrayRef<BlockArgument> values) { … }

/// Verifies a GPU function memory attribution.
static LogicalResult verifyAttributions(Operation *op,
                                        ArrayRef<BlockArgument> attributions,
                                        gpu::AddressSpace memorySpace) { … }

//===----------------------------------------------------------------------===//
// AllReduceOp
//===----------------------------------------------------------------------===//

static LogicalResult verifyReduceOpAndType(gpu::AllReduceOperation opName,
                                           Type resType) { … }

LogicalResult gpu::AllReduceOp::verifyRegions() { … }

static bool canMakeGroupOpUniform(Operation *op) { … }

OpFoldResult gpu::AllReduceOp::fold(FoldAdaptor /*adaptor*/) { … }

// TODO: Support optional custom attributes (without dialect prefix).
static ParseResult parseAllReduceOperation(AsmParser &parser,
                                           AllReduceOperationAttr &attr) { … }

static void printAllReduceOperation(AsmPrinter &printer, Operation *op,
                                    AllReduceOperationAttr attr) { … }

//===----------------------------------------------------------------------===//
// SubgroupReduceOp
//===----------------------------------------------------------------------===//

LogicalResult gpu::SubgroupReduceOp::verify() { … }

OpFoldResult gpu::SubgroupReduceOp::fold(FoldAdaptor /*adaptor*/) { … }

//===----------------------------------------------------------------------===//
// AsyncOpInterface
//===----------------------------------------------------------------------===//

void gpu::addAsyncDependency(Operation *op, Value token) { … }

//===----------------------------------------------------------------------===//
// LaunchOp
//===----------------------------------------------------------------------===//

void LaunchOp::build(OpBuilder &builder, OperationState &result,
                     Value gridSizeX, Value gridSizeY, Value gridSizeZ,
                     Value getBlockSizeX, Value getBlockSizeY,
                     Value getBlockSizeZ, Value dynamicSharedMemorySize,
                     Type asyncTokenType, ValueRange asyncDependencies,
                     TypeRange workgroupAttributions,
                     TypeRange privateAttributions, Value clusterSizeX,
                     Value clusterSizeY, Value clusterSizeZ) { … }

KernelDim3 LaunchOp::getBlockIds() { … }

KernelDim3 LaunchOp::getThreadIds() { … }

KernelDim3 LaunchOp::getGridSize() { … }

KernelDim3 LaunchOp::getBlockSize() { … }

std::optional<KernelDim3> LaunchOp::getClusterIds() { … }

std::optional<KernelDim3> LaunchOp::getClusterSize() { … }

KernelDim3 LaunchOp::getGridSizeOperandValues() { … }

KernelDim3 LaunchOp::getBlockSizeOperandValues() { … }

std::optional<KernelDim3> LaunchOp::getClusterSizeOperandValues() { … }

LogicalResult LaunchOp::verify() { … }

LogicalResult LaunchOp::verifyRegions() { … }

// Pretty-print the kernel grid/block size assignment as
//   (%iter-x, %iter-y, %iter-z) in
//   (%size-x = %ssa-use, %size-y = %ssa-use, %size-z = %ssa-use)
// where %size-* and %iter-* will correspond to the body region arguments.
static void printSizeAssignment(OpAsmPrinter &p, KernelDim3 size,
                                KernelDim3 operands, KernelDim3 ids) { … }

void LaunchOp::print(OpAsmPrinter &p) { … }

// Parse the size assignment blocks for blocks and threads.  These have the form
//   (%region_arg, %region_arg, %region_arg) in
//   (%region_arg = %operand, %region_arg = %operand, %region_arg = %operand)
// where %region_arg are percent-identifiers for the region arguments to be
// introduced further (SSA defs), and %operand are percent-identifiers for the
// SSA value uses.
static ParseResult
parseSizeAssignment(OpAsmParser &parser,
                    MutableArrayRef<OpAsmParser::UnresolvedOperand> sizes,
                    MutableArrayRef<OpAsmParser::UnresolvedOperand> regionSizes,
                    MutableArrayRef<OpAsmParser::UnresolvedOperand> indices) { … }

/// Parses a Launch operation.
/// operation ::= `gpu.launch` (`async` `[` ssa-id-list `]`)?
///       `clusters` `(` ssa-id-list `)` `in` ssa-reassignment (Optional)
///       `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
///       `threads` `(` ssa-id-list `)` `in` ssa-reassignment
///       memory-attribution
///       region attr-dict?
/// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) { … }

/// Simplify the gpu.launch when the range of a thread or block ID is
/// trivially known to be one.
struct FoldLaunchArguments : public OpRewritePattern<LaunchOp> { … };

void LaunchOp::getCanonicalizationPatterns(RewritePatternSet &rewrites,
                                           MLIRContext *context) { … }

/// Adds a new block argument that corresponds to buffers located in
/// workgroup memory.
BlockArgument LaunchOp::addWorkgroupAttribution(Type type, Location loc) { … }

/// Adds a new block argument that corresponds to buffers located in
/// private memory.
BlockArgument LaunchOp::addPrivateAttribution(Type type, Location loc) { … }

//===----------------------------------------------------------------------===//
// LaunchFuncOp
//===----------------------------------------------------------------------===//

void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
                         SymbolRefAttr kernelSymbol, KernelDim3 gridSize,
                         KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
                         ValueRange kernelOperands, Type asyncTokenType,
                         ValueRange asyncDependencies,
                         std::optional<KernelDim3> clusterSize) { … }

void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
                         GPUFuncOp kernelFunc, KernelDim3 gridSize,
                         KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
                         ValueRange kernelOperands, Type asyncTokenType,
                         ValueRange asyncDependencies,
                         std::optional<KernelDim3> clusterSize) { … }

void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
                         SymbolRefAttr kernel, KernelDim3 gridSize,
                         KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
                         ValueRange kernelOperands, Value asyncObject,
                         std::optional<KernelDim3> clusterSize) { … }

StringAttr LaunchFuncOp::getKernelModuleName() { … }

StringAttr LaunchFuncOp::getKernelName() { … }

unsigned LaunchFuncOp::getNumKernelOperands() { … }

Value LaunchFuncOp::getKernelOperand(unsigned i) { … }

KernelDim3 LaunchFuncOp::getGridSizeOperandValues() { … }

KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() { … }

KernelDim3 LaunchFuncOp::getClusterSizeOperandValues() { … }

LogicalResult LaunchFuncOp::verify() { … }

static ParseResult
parseLaunchDimType(OpAsmParser &parser, Type &dimTy,
                   std::optional<OpAsmParser::UnresolvedOperand> clusterValue,
                   Type &clusterXTy, Type &clusterYTy, Type &clusterZTy) { … }

static void printLaunchDimType(OpAsmPrinter &printer, Operation *op, Type dimTy,
                               Value clusterValue, Type clusterXTy,
                               Type clusterYTy, Type clusterZTy) { … }

static ParseResult parseLaunchFuncOperands(
    OpAsmParser &parser,
    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &argNames,
    SmallVectorImpl<Type> &argTypes) { … }

static void printLaunchFuncOperands(OpAsmPrinter &printer, Operation *,
                                    OperandRange operands, TypeRange types) { … }

//===----------------------------------------------------------------------===//
// ShuffleOp
//===----------------------------------------------------------------------===//

void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
                      int32_t offset, int32_t width, ShuffleMode mode) { … }

//===----------------------------------------------------------------------===//
// BarrierOp
//===----------------------------------------------------------------------===//

namespace {

/// Remove gpu.barrier after gpu.barrier, the threads are already synchronized!
LogicalResult eraseRedundantGpuBarrierOps(BarrierOp op,
                                          PatternRewriter &rewriter) { … }

} // end anonymous namespace

void BarrierOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                            MLIRContext *context) { … }

//===----------------------------------------------------------------------===//
// GPUFuncOp
//===----------------------------------------------------------------------===//

/// Adds a new block argument that corresponds to buffers located in
/// workgroup memory.
BlockArgument GPUFuncOp::addWorkgroupAttribution(Type type, Location loc) { … }

/// Adds a new block argument that corresponds to buffers located in
/// private memory.
BlockArgument GPUFuncOp::addPrivateAttribution(Type type, Location loc) { … }

void GPUFuncOp::build(OpBuilder &builder, OperationState &result,
                      StringRef name, FunctionType type,
                      TypeRange workgroupAttributions,
                      TypeRange privateAttributions,
                      ArrayRef<NamedAttribute> attrs) { … }

/// Parses a GPU function memory attribution.
///
/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
///                        (`private` `(` ssa-id-and-type-list `)`)?
///
/// Note that this function parses only one of the two similar parts, with the
/// keyword provided as argument.
static ParseResult
parseAttributions(OpAsmParser &parser, StringRef keyword,
                  SmallVectorImpl<OpAsmParser::Argument> &args,
                  Attribute &attributionAttrs) { … }

/// Parses a GPU function.
///
/// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
///                 (`->` function-result-list)? memory-attribution `kernel`?
///                 function-attributes? region
ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) { … }

static void printAttributions(OpAsmPrinter &p, StringRef keyword,
                              ArrayRef<BlockArgument> values,
                              ArrayAttr attributes) { … }

void GPUFuncOp::print(OpAsmPrinter &p) { … }

static DictionaryAttr getAttributionAttrs(GPUFuncOp op, unsigned index,
                                          StringAttr attrName) { … }

DictionaryAttr GPUFuncOp::getworkgroupAttributionAttrs(unsigned index) { … }

DictionaryAttr GPUFuncOp::getPrivateAttributionAttrs(unsigned index) { … }

static void setAttributionAttrs(GPUFuncOp op, unsigned index,
                                DictionaryAttr value, StringAttr attrName) { … }

void GPUFuncOp::setworkgroupAttributionAttrs(unsigned index,
                                             DictionaryAttr value) { … }

void GPUFuncOp::setPrivateAttributionAttrs(unsigned int index,
                                           DictionaryAttr value) { … }

static Attribute getAttributionAttr(GPUFuncOp op, unsigned index,
                                    StringAttr name, StringAttr attrsName) { … }

Attribute GPUFuncOp::getWorkgroupAttributionAttr(unsigned index,
                                                 StringAttr name) { … }

Attribute GPUFuncOp::getPrivateAttributionAttr(unsigned index,
                                               StringAttr name) { … }

static void setAttributionAttr(GPUFuncOp op, unsigned index, StringAttr name,
                               Attribute value, StringAttr attrsName) { … }

void GPUFuncOp::setWorkgroupAttributionAttr(unsigned index, StringAttr name,
                                            Attribute value) { … }

void GPUFuncOp::setPrivateAttributionAttr(unsigned index, StringAttr name,
                                          Attribute value) { … }

LogicalResult GPUFuncOp::verifyType() { … }

/// Verifies the body of the function.
LogicalResult GPUFuncOp::verifyBody() { … }

//===----------------------------------------------------------------------===//
// ReturnOp
//===----------------------------------------------------------------------===//

LogicalResult gpu::ReturnOp::verify() { … }

//===----------------------------------------------------------------------===//
// GPUModuleOp
//===----------------------------------------------------------------------===//

void GPUModuleOp::build(OpBuilder &builder, OperationState &result,
                        StringRef name, ArrayAttr targets,
                        Attribute offloadingHandler) { … }

void GPUModuleOp::build(OpBuilder &builder, OperationState &result,
                        StringRef name, ArrayRef<Attribute> targets,
                        Attribute offloadingHandler) { … }

bool GPUModuleOp::hasTarget(Attribute target) { … }

void GPUModuleOp::setTargets(ArrayRef<TargetAttrInterface> targets) { … }

//===----------------------------------------------------------------------===//
// GPUBinaryOp
//===----------------------------------------------------------------------===//
void BinaryOp::build(OpBuilder &builder, OperationState &result, StringRef name,
                     Attribute offloadingHandler, ArrayAttr objects) { … }

void BinaryOp::build(OpBuilder &builder, OperationState &result, StringRef name,
                     Attribute offloadingHandler, ArrayRef<Attribute> objects) { … }

static ParseResult parseOffloadingHandler(OpAsmParser &parser,
                                          Attribute &offloadingHandler) { … }

static void printOffloadingHandler(OpAsmPrinter &printer, Operation *op,
                                   Attribute offloadingHandler) { … }

//===----------------------------------------------------------------------===//
// GPUMemcpyOp
//===----------------------------------------------------------------------===//

LogicalResult MemcpyOp::verify() { … }

namespace {

/// Erases a common case of copy ops where a destination value is used only by
/// the copy op, alloc and dealloc ops.
struct EraseTrivialCopyOp : public OpRewritePattern<MemcpyOp> { … };

} // end anonymous namespace

void MemcpyOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                           MLIRContext *context) { … }

//===----------------------------------------------------------------------===//
// GPU_SubgroupMmaLoadMatrixOp
//===----------------------------------------------------------------------===//

LogicalResult SubgroupMmaLoadMatrixOp::verify() { … }

//===----------------------------------------------------------------------===//
// GPU_SubgroupMmaStoreMatrixOp
//===----------------------------------------------------------------------===//

LogicalResult SubgroupMmaStoreMatrixOp::verify() { … }

//===----------------------------------------------------------------------===//
// GPU_SubgroupMmaComputeOp
//===----------------------------------------------------------------------===//

LogicalResult SubgroupMmaComputeOp::verify() { … }

LogicalResult MemcpyOp::fold(FoldAdaptor adaptor,
                             SmallVectorImpl<::mlir::OpFoldResult> &results) { … }

LogicalResult MemsetOp::fold(FoldAdaptor adaptor,
                             SmallVectorImpl<::mlir::OpFoldResult> &results) { … }

//===----------------------------------------------------------------------===//
// GPU_WaitOp
//===----------------------------------------------------------------------===//

namespace {

/// Remove gpu.wait op use of gpu.wait op def without async dependencies.
/// %t = gpu.wait async []       // No async dependencies.
/// ...  gpu.wait ... [%t, ...]  // %t can be removed.
struct EraseRedundantGpuWaitOpPairs : public OpRewritePattern<WaitOp> { … };

/// Simplify trivial gpu.wait ops for the following patterns.
/// 1. %t = gpu.wait async ... ops, where %t has no uses (regardless of async
/// dependencies).
/// 2. %t1 = gpu.wait async [%t0], in this case, we can replace uses of %t1 with
/// %t0.
/// 3. gpu.wait [] ops, i.e gpu.wait ops that neither have any async
/// dependencies nor return any token.
struct SimplifyGpuWaitOp : public OpRewritePattern<WaitOp> { … };

} // end anonymous namespace

void WaitOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                         MLIRContext *context) { … }

//===----------------------------------------------------------------------===//
// GPU_AllocOp
//===----------------------------------------------------------------------===//

LogicalResult AllocOp::verify() { … }

namespace {

/// Folding of memref.dim(gpu.alloc(%size), %idx) -> %size similar to
/// `memref::AllocOp`.
struct SimplifyDimOfAllocOp : public OpRewritePattern<memref::DimOp> { … };

} // namespace

void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                          MLIRContext *context) { … }

//===----------------------------------------------------------------------===//
// GPU object attribute
//===----------------------------------------------------------------------===//

LogicalResult ObjectAttr::verify(function_ref<InFlightDiagnostic()> emitError,
                                 Attribute target, CompilationTarget format,
                                 StringAttr object, DictionaryAttr properties,
                                 KernelTableAttr kernels) { … }

namespace {
LogicalResult parseObject(AsmParser &odsParser, CompilationTarget &format,
                          StringAttr &object) { … }

void printObject(AsmPrinter &odsParser, CompilationTarget format,
                 StringAttr object) { … }
} // namespace

//===----------------------------------------------------------------------===//
// GPU select object attribute
//===----------------------------------------------------------------------===//

LogicalResult
gpu::SelectObjectAttr::verify(function_ref<InFlightDiagnostic()> emitError,
                              Attribute target) { … }

//===----------------------------------------------------------------------===//
// DynamicSharedMemoryOp
//===----------------------------------------------------------------------===//

LogicalResult gpu::DynamicSharedMemoryOp::verify() { … }

//===----------------------------------------------------------------------===//
// GPU KernelMetadataAttr
//===----------------------------------------------------------------------===//

KernelMetadataAttr KernelMetadataAttr::get(FunctionOpInterface kernel,
                                           DictionaryAttr metadata) { … }

KernelMetadataAttr
KernelMetadataAttr::getChecked(function_ref<InFlightDiagnostic()> emitError,
                               FunctionOpInterface kernel,
                               DictionaryAttr metadata) { … }

KernelMetadataAttr
KernelMetadataAttr::appendMetadata(ArrayRef<NamedAttribute> attrs) const { … }

LogicalResult
KernelMetadataAttr::verify(function_ref<InFlightDiagnostic()> emitError,
                           StringAttr name, Type functionType,
                           ArrayAttr argAttrs, DictionaryAttr metadata) { … }

//===----------------------------------------------------------------------===//
// GPU KernelTableAttr
//===----------------------------------------------------------------------===//

KernelTableAttr KernelTableAttr::get(MLIRContext *context,
                                     ArrayRef<KernelMetadataAttr> kernels,
                                     bool isSorted) { … }

KernelTableAttr KernelTableAttr::getChecked(
    function_ref<InFlightDiagnostic()> emitError, MLIRContext *context,
    ArrayRef<KernelMetadataAttr> kernels, bool isSorted) { … }

LogicalResult
KernelTableAttr::verify(function_ref<InFlightDiagnostic()> emitError,
                        ArrayRef<KernelMetadataAttr> kernels) { … }

KernelMetadataAttr KernelTableAttr::lookup(StringRef key) const { … }

KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const { … }

//===----------------------------------------------------------------------===//
// GPU target options
//===----------------------------------------------------------------------===//

TargetOptions::TargetOptions(
    StringRef toolkitPath, ArrayRef<std::string> linkFiles,
    StringRef cmdOptions, CompilationTarget compilationTarget,
    function_ref<SymbolTable *()> getSymbolTableCallback)
    : … { … }

TargetOptions::TargetOptions(
    TypeID typeID, StringRef toolkitPath, ArrayRef<std::string> linkFiles,
    StringRef cmdOptions, CompilationTarget compilationTarget,
    function_ref<SymbolTable *()> getSymbolTableCallback)
    : … { … }

TypeID TargetOptions::getTypeID() const { … }

StringRef TargetOptions::getToolkitPath() const { … }

ArrayRef<std::string> TargetOptions::getLinkFiles() const { … }

StringRef TargetOptions::getCmdOptions() const { … }

SymbolTable *TargetOptions::getSymbolTable() const { … }

CompilationTarget TargetOptions::getCompilationTarget() const { … }

CompilationTarget TargetOptions::getDefaultCompilationTarget() { … }

std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
TargetOptions::tokenizeCmdOptions() const { … }

MLIR_DEFINE_EXPLICIT_TYPE_ID(::mlir::gpu::TargetOptions)

#include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc"
#include "mlir/Dialect/GPU/IR/GPUOpsEnums.cpp.inc"

#define GET_ATTRDEF_CLASSES
#include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"

#define GET_OP_CLASSES
#include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc"

#include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.cpp.inc"
llvm/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp