// RUN: mlir-opt %s \
// RUN: -test-lower-to-arm-sme -test-lower-to-llvm -verify-diagnostics | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=main -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
// RUN: -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils,%native_arm_sme_abi_shlib | \
// RUN: FileCheck %s
/// This function uses too many tiles! There's only two i16 tiles (ZA0.H and
/// ZA1.H), but this function uses five i16 tiles! Very expensive spills/reloads
/// will be inserted to emulate the extra three tiles. Note: This is only done
/// to avoid the compiler erroring out but is expected to have very poor
/// performance (hence the warning).
func.func @use_too_many_tiles(%a: memref<?x?xi16>, %b: memref<?x?xi16>, %c: memref<?x?xi16>) {
%c0 = arith.constant 0 : index
// expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
%tile_a = arith.constant dense<0> : vector<[8]x[8]xi16>
// expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
%tile_b = arith.constant dense<1> : vector<[8]x[8]xi16>
// expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
%tile_c = arm_sme.tile_load %a[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
%tile_d = arm_sme.tile_load %b[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
%tile_e = arm_sme.tile_load %c[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
// CHECK-LABEL: tile_a:
// CHECK-COUNT-8: ( 0, 0, 0, 0, 0, 0, 0, 0
vector.print str "tile_a:\n"
// expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
vector.print %tile_a : vector<[8]x[8]xi16>
// CHECK-LABEL: tile_b:
// CHECK-COUNT-8: ( 1, 1, 1, 1, 1, 1, 1, 1
vector.print str "tile_b:\n"
// expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
vector.print %tile_b : vector<[8]x[8]xi16>
// CHECK-LABEL: tile_c:
// CHECK-COUNT-8: ( 2, 2, 2, 2, 2, 2, 2, 2
vector.print str "tile_c:\n"
// expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
vector.print %tile_c : vector<[8]x[8]xi16>
// CHECK-LABEL: tile_d:
// CHECK-COUNT-8: ( 3, 3, 3, 3, 3, 3, 3, 3
vector.print str "tile_d:\n"
vector.print %tile_d : vector<[8]x[8]xi16>
// CHECK-LABEL: tile_e:
// CHECK-COUNT-8: ( 4, 4, 4, 4, 4, 4, 4, 4
vector.print str "tile_e:\n"
vector.print %tile_e : vector<[8]x[8]xi16>
return
}
func.func @main() {
%c16 = arith.constant 16 : index
%svl_h = arm_sme.streaming_vl <half>
%c2 = arith.constant 2 : i16
%c3 = arith.constant 3 : i16
%c4 = arith.constant 4 : i16
%memA = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
%memB = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
%memC = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
linalg.fill ins(%c2 : i16) outs(%memA : memref<?x?xi16>)
linalg.fill ins(%c3 : i16) outs(%memB : memref<?x?xi16>)
linalg.fill ins(%c4 : i16) outs(%memC : memref<?x?xi16>)
func.call @use_too_many_tiles(%memA, %memB, %memC) : (memref<?x?xi16>, memref<?x?xi16>, memref<?x?xi16>) -> ()
return
}