llvm/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir

// RUN: mlir-opt %s \
// RUN:  -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_90a cubin-features=+ptx80 opt-level=3" \
// RUN:  | mlir-cpu-runner \
// RUN:   --shared-libs=%mlir_cuda_runtime \
// RUN:   --shared-libs=%mlir_runner_utils \
// RUN:   --shared-libs=%mlir_c_runner_utils \
// RUN:   --entry-point-result=void \
// RUN:  | FileCheck %s

// CHECK: clusterIdx: (1, 1, 0) in Cluster Dimension: (2, 2, 1) blockIdx: (3, 3, 0) 

module attributes {gpu.container_module} {
  func.func @main() {
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %c4 = arith.constant 4 : index    
    gpu.launch_func  @gpumodule::@kernel_cluster clusters in(%c2,%c2,%c1)  blocks in (%c4, %c4, %c1) threads in (%c1, %c1, %c1)  
    return
  }
  gpu.module @gpumodule {
    gpu.func @kernel_cluster() kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 1>} {
      %cidX = gpu.cluster_id  x
      %cidY = gpu.cluster_id  y
      %cidZ = gpu.cluster_id  z
      %cdimX = gpu.cluster_dim_blocks  x
      %cdimY = gpu.cluster_dim_blocks  y
      %cdimZ = gpu.cluster_dim_blocks  z
      %bidX = gpu.block_id  x
      %bidY = gpu.block_id  y
      %bidZ = gpu.block_id  z
      %cidX_i32 = index.casts %cidX : index to i32
      %cidY_i32 = index.casts %cidY : index to i32
      %cidZ_i32 = index.casts %cidZ : index to i32
      %cdimX_i32 = index.casts %cdimX : index to i32
      %cdimY_i32 = index.casts %cdimY : index to i32
      %cdimZ_i32 = index.casts %cdimZ : index to i32
      %bidX_i32 = index.casts %bidX : index to i32
      %bidY_i32 = index.casts %bidY : index to i32
      %bidZ_i32 = index.casts %bidZ : index to i32

      %c3 = arith.constant 3 : index
      %cnd1 =  arith.cmpi eq, %bidX, %c3 : index
      %cnd2 =  arith.cmpi eq, %bidY, %c3 : index
      scf.if %cnd1 {
        scf.if %cnd2 {
          gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n" 
            %cidX_i32,
            %cidY_i32,
            %cidZ_i32,
            %cdimX_i32,
            %cdimY_i32,
            %cdimZ_i32,
            %bidX_i32,
            %bidY_i32,
            %bidZ_i32      
            : 
            i32, i32, i32, i32, i32, i32, i32, i32, i32
        }
      }
      
      gpu.return
    }
  }
}