// RUN: mlir-opt -allow-unregistered-dialect %s -test-loop-fusion=test-loop-fusion-dependence-check -split-input-file -verify-diagnostics | FileCheck %s
// -----
// CHECK-LABEL: func @cannot_fuse_would_create_cycle() {
func.func @cannot_fuse_would_create_cycle() {
%a = memref.alloc() : memref<10xf32>
%b = memref.alloc() : memref<10xf32>
%c = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
// Set up the following dependences:
// 1) loop0 -> loop1 on memref '%a'
// 2) loop0 -> loop2 on memref '%b'
// 3) loop1 -> loop2 on memref '%c'
// Fusing loop nest '%i0' and loop nest '%i2' would create a cycle.
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
%v0 = affine.load %a[%i0] : memref<10xf32>
affine.store %cf7, %b[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
affine.store %cf7, %a[%i1] : memref<10xf32>
%v1 = affine.load %c[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
%v2 = affine.load %b[%i2] : memref<10xf32>
affine.store %cf7, %c[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @can_fuse_rar_dependence() {
func.func @can_fuse_rar_dependence() {
%a = memref.alloc() : memref<10xf32>
%b = memref.alloc() : memref<10xf32>
%c = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
// Set up the following dependences:
// Make dependence from 0 to 1 on '%a' read-after-read.
// 1) loop0 -> loop1 on memref '%a'
// 2) loop0 -> loop2 on memref '%b'
// 3) loop1 -> loop2 on memref '%c'
// Should fuse: no fusion preventing remarks should be emitted for this test.
affine.for %i0 = 0 to 10 {
%v0 = affine.load %a[%i0] : memref<10xf32>
affine.store %cf7, %b[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
%v1 = affine.load %a[%i1] : memref<10xf32>
%v2 = affine.load %c[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
%v3 = affine.load %b[%i2] : memref<10xf32>
affine.store %cf7, %c[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @can_fuse_different_memrefs() {
func.func @can_fuse_different_memrefs() {
%a = memref.alloc() : memref<10xf32>
%b = memref.alloc() : memref<10xf32>
%c = memref.alloc() : memref<10xf32>
%d = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
// Set up the following dependences:
// Make dependence from 0 to 1 on unrelated memref '%d'.
// 1) loop0 -> loop1 on memref '%a'
// 2) loop0 -> loop2 on memref '%b'
// 3) loop1 -> loop2 on memref '%c'
// Should fuse: no fusion preventing remarks should be emitted for this test.
affine.for %i0 = 0 to 10 {
%v0 = affine.load %a[%i0] : memref<10xf32>
affine.store %cf7, %b[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
affine.store %cf7, %d[%i1] : memref<10xf32>
%v1 = affine.load %c[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
%v2 = affine.load %b[%i2] : memref<10xf32>
affine.store %cf7, %c[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_intermediate_store() {
func.func @should_not_fuse_across_intermediate_store() {
%0 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
%v0 = affine.load %0[%i0] : memref<10xf32>
"op0"(%v0) : (f32) -> ()
}
// Should not fuse loop nests '%i0' and '%i1' across top-level store.
affine.store %cf7, %0[%c0] : memref<10xf32>
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
%v1 = affine.load %0[%i1] : memref<10xf32>
"op1"(%v1) : (f32) -> ()
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_intermediate_load() {
func.func @should_not_fuse_across_intermediate_load() {
%0 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
affine.store %cf7, %0[%i0] : memref<10xf32>
}
// Should not fuse loop nests '%i0' and '%i1' across top-level load.
%v0 = affine.load %0[%c0] : memref<10xf32>
"op0"(%v0) : (f32) -> ()
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
affine.store %cf7, %0[%i1] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def() {
func.func @should_not_fuse_across_ssa_value_def() {
%0 = memref.alloc() : memref<10xf32>
%1 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
%v0 = affine.load %0[%i0] : memref<10xf32>
affine.store %v0, %1[%i0] : memref<10xf32>
}
// Loop nest '%i0" cannot be fused past load from '%1' due to RAW dependence.
%v1 = affine.load %1[%c0] : memref<10xf32>
"op0"(%v1) : (f32) -> ()
// Loop nest '%i1' cannot be fused past SSA value def '%c2' which it uses.
%c2 = arith.constant 2 : index
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
affine.store %cf7, %0[%c2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_store_before_load() {
func.func @should_not_fuse_store_before_load() {
%0 = memref.alloc() : memref<10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
affine.store %cf7, %0[%i0] : memref<10xf32>
%v0 = affine.load %0[%i0] : memref<10xf32>
}
affine.for %i1 = 0 to 10 {
%v1 = affine.load %0[%i1] : memref<10xf32>
}
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
affine.store %cf7, %0[%i2] : memref<10xf32>
%v2 = affine.load %0[%i2] : memref<10xf32>
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_load_at_depth1() {
func.func @should_not_fuse_across_load_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
}
%v1 = affine.load %0[%i0, %c0] : memref<10x10xf32>
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_load_in_loop_at_depth1() {
func.func @should_not_fuse_across_load_in_loop_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
}
affine.for %i2 = 0 to 10 {
%v1 = affine.load %0[%i0, %i2] : memref<10x10xf32>
}
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_store_at_depth1() {
func.func @should_not_fuse_across_store_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
}
affine.store %cf7, %0[%i0, %c0] : memref<10x10xf32>
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
%v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_store_in_loop_at_depth1() {
func.func @should_not_fuse_across_store_in_loop_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
}
affine.for %i2 = 0 to 10 {
affine.store %cf7, %0[%i0, %i2] : memref<10x10xf32>
}
affine.for %i3 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
%v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
}
}
return
}
// -----
// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def_at_depth1() {
func.func @should_not_fuse_across_ssa_value_def_at_depth1() {
%0 = memref.alloc() : memref<10x10xf32>
%1 = memref.alloc() : memref<10x10xf32>
%c0 = arith.constant 0 : index
%cf7 = arith.constant 7.0 : f32
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
%v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
affine.store %v0, %1[%i0, %i1] : memref<10x10xf32>
}
// RAW dependence from store in loop nest '%i1' to 'load %1' prevents
// fusion loop nest '%i1' into loops after load.
%v1 = affine.load %1[%i0, %c0] : memref<10x10xf32>
"op0"(%v1) : (f32) -> ()
// Loop nest '%i2' cannot be fused past SSA value def '%c2' which it uses.
%c2 = arith.constant 2 : index
affine.for %i2 = 0 to 10 {
// expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
affine.store %cf7, %0[%i0, %c2] : memref<10x10xf32>
}
}
return
}