// RUN: mlir-opt %s -convert-scf-to-cf -test-arm-sme-tile-allocation -split-input-file -verify-diagnostics | FileCheck %s
// RUN: mlir-opt %s -convert-scf-to-cf -test-arm-sme-tile-allocation=dump-tile-live-ranges -mlir-disable-threading -split-input-file -verify-diagnostics 2>&1 >/dev/null | FileCheck %s --check-prefix=CHECK-LIVE-RANGE
// This file tests some simple aspects of using liveness in the SME tile allocator.
// Note: We use -convert-scf-to-cf first as the tile allocator expects CF, but
// some of these tests are written in SCF (to make things easier to follow).
// CHECK-LIVE-RANGE-LABEL: @constant_with_multiple_users
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE: S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: || arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: |E test.some_use
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LABEL: @constant_with_multiple_users(
// CHECK-SAME: %[[VECTOR_A:.*]]: vector<[4]xf32>, %[[VECTOR_B:.*]]: vector<[4]xf32>
func.func @constant_with_multiple_users(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %index: index) {
// CHECK-NEXT: %[[ZERO_TILE_0:.*]] = arm_sme.zero {tile_id = 0 : i32} : vector<[4]x[4]xf32>
// CHECK-NEXT: %[[ZERO_TILE_1:.*]] = arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32>
// CHECK-NEXT: %[[INSERT_TILE_1:.*]] = arm_sme.insert_tile_slice %[[VECTOR_A]], %[[ZERO_TILE_1]][%{{.*}}] {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
// CHECK-NEXT: %[[INSERT_TILE_0:.*]] = arm_sme.insert_tile_slice %[[VECTOR_B]], %[[ZERO_TILE_0]][%{{.*}}] {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
%zero = arm_sme.zero : vector<[4]x[4]xf32>
%tile_a = arm_sme.insert_tile_slice %a, %zero[%index] : vector<[4]xf32> into vector<[4]x[4]xf32>
%tile_b = arm_sme.insert_tile_slice %b, %zero[%index] : vector<[4]xf32> into vector<[4]x[4]xf32>
"test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @value_with_multiple_users
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: || arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: |E test.some_use
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// expected-note@below {{tile operand is: <block argument> of type 'vector<[4]x[4]xf32>'}}
func.func @value_with_multiple_users(%tile: vector<[4]x[4]xf32>, %a: vector<[4]xf32>, %b: vector<[4]xf32>, %index: index) {
// expected-error@below {{op tile operand allocated to different SME virtial tile (move required)}}
%tile_a = arm_sme.insert_tile_slice %a, %tile[%index] : vector<[4]xf32> into vector<[4]x[4]xf32>
%tile_b = arm_sme.insert_tile_slice %b, %tile[%index] : vector<[4]xf32> into vector<[4]x[4]xf32>
"test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @reuse_tiles_after_initial_use
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: |||| test.dummy
// CHECK-LIVE-RANGE-NEXT: |||| test.dummy
// CHECK-LIVE-RANGE-NEXT: |||| test.dummy
// CHECK-LIVE-RANGE-NEXT: E||| test.some_use
// CHECK-LIVE-RANGE-NEXT: E|| test.some_use
// CHECK-LIVE-RANGE-NEXT: E| test.some_use
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LIVE-RANGE-NEXT: S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: |||| test.dummy
// CHECK-LIVE-RANGE-NEXT: |||| test.dummy
// CHECK-LIVE-RANGE-NEXT: |||| test.dummy
// CHECK-LIVE-RANGE-NEXT: E||| test.some_use
// CHECK-LIVE-RANGE-NEXT: E|| test.some_use
// CHECK-LIVE-RANGE-NEXT: E| test.some_use
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LABEL: @reuse_tiles_after_initial_use
func.func @reuse_tiles_after_initial_use() {
// CHECK: arm_sme.get_tile {tile_id = 0 : i32}
// CHECK: arm_sme.get_tile {tile_id = 1 : i32}
// CHECK: arm_sme.get_tile {tile_id = 2 : i32}
// CHECK: arm_sme.get_tile {tile_id = 3 : i32}
%tile_a = arm_sme.get_tile : vector<[4]x[4]xf32>
%tile_b = arm_sme.get_tile : vector<[4]x[4]xf32>
%tile_c = arm_sme.get_tile : vector<[4]x[4]xf32>
%tile_d = arm_sme.get_tile : vector<[4]x[4]xf32>
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_c) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_d) : (vector<[4]x[4]xf32>) -> ()
// CHECK: arm_sme.zero {tile_id = 0 : i32}
// CHECK: arm_sme.zero {tile_id = 1 : i32}
// CHECK: arm_sme.zero {tile_id = 2 : i32}
// CHECK: arm_sme.zero {tile_id = 3 : i32}
%tile_1 = arm_sme.zero : vector<[4]x[4]xf32>
%tile_2 = arm_sme.zero : vector<[4]x[4]xf32>
%tile_3 = arm_sme.zero : vector<[4]x[4]xf32>
%tile_4 = arm_sme.zero : vector<[4]x[4]xf32>
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.some_use"(%tile_1) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_2) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_3) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_4) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @tile_live_ins
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: EE cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb1:
// CHECK-LIVE-RANGE-NEXT: || test.dummy
// CHECK-LIVE-RANGE-NEXT: || test.dummy
// CHECK-LIVE-RANGE-NEXT: EE cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb2:
// CHECK-LIVE-RANGE-NEXT: || test.dummy
// CHECK-LIVE-RANGE-NEXT: || test.dummy
// CHECK-LIVE-RANGE-NEXT: EE cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb3:
// CHECK-LIVE-RANGE-NEXT: E| test.some_use
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LABEL: @tile_live_ins
func.func @tile_live_ins()
{
// CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32>
// CHECK: arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32>
%tile_1 = arm_sme.get_tile : vector<[4]x[4]xf32>
%tile_2 = arm_sme.zero : vector<[4]x[4]xf32>
cf.br ^bb1
^bb1:
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
cf.br ^bb2
^bb2:
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
cf.br ^bb3
^bb3:
"test.some_use"(%tile_1) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_2) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// This is basically the same test as tile_live_ins but shows that the order of
// the blocks within the source does not relate to the liveness, which is based
// on successors and predecessors (not textual order).
//
// So %tile_1 is live on the path bb0 -> bb2 -> bb1 (and dies in bb1). The
// 'hole' when looking at the live range dump comes from the textual order
// (and would disappear if bb1 was moved before bb2 in the source).
//
// When looking at the live range dump (outside of straight-line code) it
// normally makes more sense to consider blocks in isolation (and how they
// relate to the CFG).
// CHECK-LIVE-RANGE-LABEL: @non_sequential_live_ins
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: E cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb1:
// CHECK-LIVE-RANGE-NEXT: E| test.some_use
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: E cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb2:
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: || test.dummy
// CHECK-LIVE-RANGE-NEXT: EE cf.cond_br
// CHECK-LIVE-RANGE-NEXT: ^bb3:
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LIVE-RANGE-NEXT: func.return
// CHECK-LABEL: @non_sequential_live_ins
func.func @non_sequential_live_ins(%cond: i1) {
// CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32>
// CHECK: arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32>
%tile_1 = arm_sme.get_tile : vector<[4]x[4]xf32>
"test.dummy"(): () -> ()
cf.br ^bb2
^bb1:
"test.some_use"(%tile_1) : (vector<[4]x[4]xf32>) -> ()
"test.dummy"(): () -> ()
cf.br ^bb3
^bb2:
%tile_2 = arm_sme.zero : vector<[4]x[4]xf32>
"test.dummy"(): () -> ()
cf.cond_br %cond, ^bb1, ^bb3
^bb3:
"test.dummy"(): () -> ()
"test.some_use"(%tile_2) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @non_overlapping_branches
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb1:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.zero
// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: E cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb2:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: E cf.br
// CHECK-LABEL: @non_overlapping_branches
func.func @non_overlapping_branches(%cond: i1) {
// CHECK: arm_sme.zero {tile_id = 0 : i32} : vector<[4]x[4]xf32>
// CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32>
%tile = scf.if %cond -> vector<[4]x[4]xf32> {
// ^bb1:
%zero = arm_sme.zero : vector<[4]x[4]xf32>
scf.yield %zero : vector<[4]x[4]xf32>
} else {
// ^bb2:
%undef = arm_sme.get_tile : vector<[4]x[4]xf32>
scf.yield %undef : vector<[4]x[4]xf32>
}
"test.some_use"(%tile) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// Here %vecA and %vecB are not merged into the same live range (as they are unknown values).
// This means that %vecA and %vecB are both allocated to different tiles (which is not legal).
// expected-note@below {{tile operand is: <block argument> of type 'vector<[4]x[4]xf32>'}}
func.func @overlapping_branches(%cond: i1, %vecA: vector<[4]x[4]xf32>, %vecB: vector<[4]x[4]xf32>) {
// expected-error@below {{op tile operand allocated to different SME virtial tile (move required)}}
%tile = scf.if %cond -> vector<[4]x[4]xf32> {
scf.yield %vecA : vector<[4]x[4]xf32>
} else {
scf.yield %vecB : vector<[4]x[4]xf32>
}
"test.some_use"(%tile) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @run_out_of_tiles_but_avoid_spill
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb2:
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: ||||S arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: EEEEE cf.br
// Note in the live ranges (above) there is five tile values, but we only have four tiles.
// There is no 'real' spill as we spill the `arm_sme.zero` but are then able to clone it
// at each of its uses.
// CHECK-LABEL: @run_out_of_tiles_but_avoid_spill
func.func @run_out_of_tiles_but_avoid_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector<[4]xf32>, %d: vector<[4]xf32>) {
%init = arm_sme.zero : vector<[4]x[4]xf32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
// Live = %init
scf.for %i = %c0 to %c10 step %c1 {
// CHECK: arm_sme.zero {tile_id = 1 : i32}
// CHECK: arm_sme.zero {tile_id = 2 : i32}
// CHECK: arm_sme.zero {tile_id = 3 : i32}
// CHECK: arm_sme.zero {tile_id = 0 : i32}
%tile_a, %tile_b, %tile_c, %tile_d = scf.for %j = %c0 to %c10 step %c1
iter_args(%iter_a = %init, %iter_b = %init, %iter_c = %init, %iter_d = %init)
-> (vector<[4]x[4]xf32>, vector<[4]x[4]xf32> , vector<[4]x[4]xf32> , vector<[4]x[4]xf32>) {
// ^bb2:
// CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
// CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 2 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
// CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 3 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
// CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
%new_a = arm_sme.insert_tile_slice %a, %iter_a[%i] : vector<[4]xf32> into vector<[4]x[4]xf32>
%new_b = arm_sme.insert_tile_slice %b, %iter_b[%i] : vector<[4]xf32> into vector<[4]x[4]xf32>
%new_c = arm_sme.insert_tile_slice %c, %iter_c[%i] : vector<[4]xf32> into vector<[4]x[4]xf32>
%new_d = arm_sme.insert_tile_slice %d, %iter_d[%i] : vector<[4]xf32> into vector<[4]x[4]xf32>
scf.yield %new_a, %new_b, %new_c, %new_d : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>
}
// Live = %init, %tile_a, %tile_b, %tile_c, %tile_d (out of tiles!)
// This should be resolved by duplicating the arm_sme.zero (from folding
// arm_sme.copy_tile operations inserted by the tile allocator).
"test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_c) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_d) : (vector<[4]x[4]xf32>) -> ()
}
return
}
// -----
// We should be able to avoid spills like this, but logic handling this case is
// not implemented yet. Note tile ID >= 16 means a spill/in-memory tile.
// CHECK-LIVE-RANGE-LABEL: @avoidable_spill
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb2:
// CHECK-LIVE-RANGE-NEXT: || test.some_use
// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: ||||S arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: |||||S arm_sme.insert_tile_slice
// CHECK-LIVE-RANGE-NEXT: ||E||| test.some_use
// CHECK-LIVE-RANGE-NEXT: || E|| test.some_use
// CHECK-LIVE-RANGE-NEXT: || E| test.some_use
// CHECK-LIVE-RANGE-NEXT: || E test.some_use
// CHECK-LIVE-RANGE-NEXT: || arith.addi
// CHECK-LIVE-RANGE-NEXT: EE cf.br
// Note in the live ranges (above) there is two constant live-ins (first two ranges),
// which gives six overlapping live ranges (at the point where %tile_d is defined).
// The allocator currently will spill the first constant (which results in a real
// spill at it's use), however, this could be avoided by using the knowledge that
// at the first "test.some_use" there's actually only two live ranges (so we can
// fix this be duplicating the constant).
// CHECK-LABEL: @avoidable_spill
func.func @avoidable_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector<[4]xf32>, %d: vector<[4]xf32>) {
// CHECK: arm_sme.zero {tile_id = 16 : i32} : vector<[4]x[4]xf32>
%zero = arm_sme.zero : vector<[4]x[4]xf32>
%tile = arm_sme.get_tile : vector<[4]x[4]xf32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
scf.for %i = %c0 to %c10 step %c1 {
// So spilled here (unnecessarily).
// The arm_sme.zero op could be moved into the loop to avoid this.
"test.some_use"(%zero) : (vector<[4]x[4]xf32>) -> ()
%tile_a = arm_sme.insert_tile_slice %a, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32>
%tile_b = arm_sme.insert_tile_slice %b, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32>
%tile_c = arm_sme.insert_tile_slice %c, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32>
%tile_d = arm_sme.insert_tile_slice %d, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32>
// %zero is still live here (due the the backedge)
"test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_c) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tile_d) : (vector<[4]x[4]xf32>) -> ()
}
return
}
// -----
// This test is a follow up to the test of the same name in `tile-allocation-copies.mlir`.
// This shows the live ranges (which are why we need to split the conditional branch).
// CHECK-LIVE-RANGE-LABEL: @cond_branch_with_backedge
// CHECK-LIVE-RANGE: ^bb1:
// CHECK-LIVE-RANGE-NEXT: ||| | arith.cmpi
// CHECK-LIVE-RANGE-NEXT: EEE E cf.cond_br
//
// CHECK-LIVE-RANGE-NEXT: ^[[BB3_COPIES:[[:alnum:]]+]]:
// CHECK-LIVE-RANGE-NEXT: ||| ES arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: E|| |S arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: E| ||S arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: E |||S arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: EEEE cf.br
//
// It is important to note that the first three live ranges in ^bb1 do not end
// at the `cf.cond_br` they are live-out via the backedge bb1 -> bb2 -> bb1.
// This means that if we placed the `arm_sme.tile_copies` before the `cf.cond_br`
// then those live ranges would not end at the copies, resulting in unwanted
// overlapping live ranges (and hence tile spills).
//
// With the conditional branch split and the copies placed in the BB3_COPIES
// block the first three live ranges end at the copy operations (as the
// BB3_COPIES block is on the path out of the loop and has no backedge). This
// means there is no overlaps and the live ranges all merge, as shown below.
//
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb1:
// CHECK-LIVE-RANGE-NEXT: |||| arith.cmpi
// CHECK-LIVE-RANGE-NEXT: EEEE cf.cond_br
//
// CHECK-LIVE-RANGE-NEXT: ^[[BB3_COPIES]]:
// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: EEEE cf.br
// CHECK-LABEL: @cond_branch_with_backedge
// CHECK-NOT: tile_id = 16
// CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32>
// CHECK: arm_sme.get_tile {tile_id = 1 : i32} : vector<[4]x[4]xf32>
// CHECK: arm_sme.get_tile {tile_id = 2 : i32} : vector<[4]x[4]xf32>
// CHECK: arm_sme.get_tile {tile_id = 3 : i32} : vector<[4]x[4]xf32>
// CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
// CHECK-NOT: tile_id = 16
func.func @cond_branch_with_backedge(%slice: vector<[4]xf32>) {
%tileA = arm_sme.get_tile : vector<[4]x[4]xf32>
%tileB = arm_sme.get_tile : vector<[4]x[4]xf32>
%tileC = arm_sme.get_tile : vector<[4]x[4]xf32>
%tileD = arm_sme.get_tile : vector<[4]x[4]xf32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
// Live here: %tileA, %tileB, %tileC, %tileD
cf.br ^bb1(%c0, %tileA : index, vector<[4]x[4]xf32>)
^bb1(%currentIndex: index, %iterTile: vector<[4]x[4]xf32>):
%continueLoop = arith.cmpi slt, %currentIndex, %c10 : index
// Live here: %iterTile, %tileB, %tileC, %tileD
cf.cond_br %continueLoop, ^bb2, ^bb3(%iterTile, %tileB, %tileC, %tileD : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>)
^bb2:
// Live here: %iterTile, %tileB, %tileC, %tileD
%nextTile = arm_sme.insert_tile_slice %slice, %iterTile[%currentIndex] : vector<[4]xf32> into vector<[4]x[4]xf32>
%nextIndex = arith.addi %currentIndex, %c1 : index
cf.br ^bb1(%nextIndex, %nextTile : index, vector<[4]x[4]xf32>)
^bb3(%finalTileA: vector<[4]x[4]xf32>, %finalTileB: vector<[4]x[4]xf32>, %finalTileC: vector<[4]x[4]xf32>, %finalTileD: vector<[4]x[4]xf32>):
// Live here: %finalTileA, %finalTileB, %finalTileC, %finalTileD
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @fill_holes_in_tile_liveness
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: E cf.cond_br
// CHECK-LIVE-RANGE-NEXT: ^bb1:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LIVE-RANGE-NEXT: cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb2:
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LIVE-RANGE-NEXT: cf.br
// Here there's a 'hole' in the liveness of %tileA (in bb1) where another value
// can reuse the tile ID assigned to %tileA. The liveness for %tileB is
// entirely within the 'hole' in %tileA's live range, so %tileB should get the
// same tile ID as %tileA.
// CHECK-LABEL: @fill_holes_in_tile_liveness
func.func @fill_holes_in_tile_liveness(%cond: i1) {
// CHECK: arm_sme.get_tile {tile_id = [[TILE_ID_A:.*]] : i32}
%tileA = arm_sme.get_tile : vector<[4]x[4]xf32>
cf.cond_br %cond, ^bb2, ^bb1
^bb1:
// CHECK: arm_sme.get_tile {tile_id = [[TILE_ID_A]] : i32}
%tileB = arm_sme.get_tile : vector<[4]x[4]xf32>
"test.dummy"(): () -> ()
"test.some_use"(%tileB) : (vector<[4]x[4]xf32>) -> ()
cf.br ^bb3
^bb2:
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.some_use"(%tileA) : (vector<[4]x[4]xf32>) -> ()
cf.br ^bb3
^bb3:
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @holes_in_tile_liveness_inactive_overlaps
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: E cf.cond_br
// CHECK-LIVE-RANGE-NEXT: ^bb1:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: | test.some_use
// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: E cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb2:
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: |S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: E| test.some_use
// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile
// CHECK-LIVE-RANGE-NEXT: E cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb3:
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LIVE-RANGE-NEXT: func.return
// This tests an edge case in inactive live ranges. The first live range is
// inactive at the start of ^bb1. If the tile allocator did not check if the
// second live range overlapped the first it would wrongly re-use tile ID 0
// (as the first live range is inactive so tile ID 0 is free). This would mean
// in ^bb2 two overlapping live ranges would have the same tile ID (bad!).
// CHECK-LABEL: @holes_in_tile_liveness_inactive_overlaps
func.func @holes_in_tile_liveness_inactive_overlaps(%cond: i1) {
// CHECK: arm_sme.get_tile {tile_id = 0 : i32}
%tileA = arm_sme.get_tile : vector<[4]x[4]xf32>
cf.cond_br %cond, ^bb2, ^bb1
^bb1:
// CHECK: arm_sme.get_tile {tile_id = 1 : i32}
%tileB = arm_sme.get_tile : vector<[4]x[4]xf32>
"test.dummy"(): () -> ()
"test.some_use"(%tileB) : (vector<[4]x[4]xf32>) -> ()
cf.br ^bb3(%tileB: vector<[4]x[4]xf32>)
^bb2:
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
// CHECK: arm_sme.get_tile {tile_id = 1 : i32}
%tileC = arm_sme.get_tile : vector<[4]x[4]xf32>
"test.some_use"(%tileA) : (vector<[4]x[4]xf32>) -> ()
cf.br ^bb3(%tileC: vector<[4]x[4]xf32>)
^bb3(%tile: vector<[4]x[4]xf32>):
"test.some_use"(%tile) : (vector<[4]x[4]xf32>) -> ()
return
}
// -----
// This is the same as the previous example, but changes the tile types to
// vector<[16]x[16]xi8>. This means in bb1 the allocator will need to spill the
// first live range (which is inactive).
// Note: The live ranges are the same as the previous example (so are not checked).
// CHECK-LABEL: @spill_inactive_live_range
func.func @spill_inactive_live_range(%cond: i1) {
// CHECK: arm_sme.get_tile {tile_id = 16 : i32}
%tileA = arm_sme.get_tile : vector<[16]x[16]xi8>
cf.cond_br %cond, ^bb2, ^bb1
^bb1:
// CHECK: arm_sme.get_tile {tile_id = 0 : i32}
%tileB = arm_sme.get_tile : vector<[16]x[16]xi8>
"test.dummy"(): () -> ()
"test.some_use"(%tileB) : (vector<[16]x[16]xi8>) -> ()
cf.br ^bb3(%tileB: vector<[16]x[16]xi8>)
^bb2:
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
// CHECK: arm_sme.get_tile {tile_id = 0 : i32}
%tileC = arm_sme.get_tile : vector<[16]x[16]xi8>
"test.some_use"(%tileA) : (vector<[16]x[16]xi8>) -> ()
cf.br ^bb3(%tileC: vector<[16]x[16]xi8>)
^bb3(%tile: vector<[16]x[16]xi8>):
"test.some_use"(%tile) : (vector<[16]x[16]xi8>) -> ()
return
}
// -----
// CHECK-LIVE-RANGE-LABEL: @reactivate_inactive_live_range
// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
// CHECK-LIVE-RANGE: ^bb0:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: E cf.cond_br
// CHECK-LIVE-RANGE-NEXT: ^bb1:
// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: | test.dummy
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LIVE-RANGE-NEXT: cf.br
// CHECK-LIVE-RANGE-NEXT: ^bb2:
// CHECK-LIVE-RANGE-NEXT: | S arm_sme.get_tile
// CHECK-LIVE-RANGE-NEXT: | | test.dummy
// CHECK-LIVE-RANGE-NEXT: | | test.dummy
// CHECK-LIVE-RANGE-NEXT: | E test.some_use
// CHECK-LIVE-RANGE-NEXT: E test.some_use
// CHECK-LIVE-RANGE-NEXT: cf.br
// Here the live range for %tileA becomes inactive in bb1 (so %tileB gets tile
// ID 0 too). Then in bb2 the live range for tileA is reactivated as it overlaps
// with the start of %tileC's live range (which means %tileC gets tile ID 1).
func.func @reactivate_inactive_live_range(%cond: i1) {
// CHECK: arm_sme.get_tile {tile_id = 0 : i32}
%tileA = arm_sme.get_tile : vector<[4]x[4]xf32>
cf.cond_br %cond, ^bb2, ^bb1
^bb1:
// CHECK: arm_sme.get_tile {tile_id = 0 : i32}
%tileB = arm_sme.get_tile : vector<[16]x[16]xi8>
"test.dummy"(): () -> ()
"test.some_use"(%tileB) : (vector<[16]x[16]xi8>) -> ()
cf.br ^bb3
^bb2:
// CHECK: arm_sme.get_tile {tile_id = 1 : i32}
%tileC = arm_sme.get_tile : vector<[4]x[4]xf32>
"test.dummy"(): () -> ()
"test.dummy"(): () -> ()
"test.some_use"(%tileC) : (vector<[4]x[4]xf32>) -> ()
"test.some_use"(%tileA) : (vector<[4]x[4]xf32>) -> ()
cf.br ^bb3
^bb3:
return
}