llvm/mlir/test/Dialect/Affine/loop-fusion-dependence-check.mlir

// RUN: mlir-opt -allow-unregistered-dialect %s -test-loop-fusion=test-loop-fusion-dependence-check -split-input-file -verify-diagnostics | FileCheck %s

// -----

// CHECK-LABEL: func @cannot_fuse_would_create_cycle() {
func.func @cannot_fuse_would_create_cycle() {
  %a = memref.alloc() : memref<10xf32>
  %b = memref.alloc() : memref<10xf32>
  %c = memref.alloc() : memref<10xf32>

  %cf7 = arith.constant 7.0 : f32

  // Set up the following dependences:
  // 1) loop0 -> loop1 on memref '%a'
  // 2) loop0 -> loop2 on memref '%b'
  // 3) loop1 -> loop2 on memref '%c'

  // Fusing loop nest '%i0' and loop nest '%i2' would create a cycle.
  affine.for %i0 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
    %v0 = affine.load %a[%i0] : memref<10xf32>
    affine.store %cf7, %b[%i0] : memref<10xf32>
  }
  affine.for %i1 = 0 to 10 {
    affine.store %cf7, %a[%i1] : memref<10xf32>
    %v1 = affine.load %c[%i1] : memref<10xf32>
  }
  affine.for %i2 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
    %v2 = affine.load %b[%i2] : memref<10xf32>
    affine.store %cf7, %c[%i2] : memref<10xf32>
  }
  return
}

// -----

// CHECK-LABEL: func @can_fuse_rar_dependence() {
func.func @can_fuse_rar_dependence() {
  %a = memref.alloc() : memref<10xf32>
  %b = memref.alloc() : memref<10xf32>
  %c = memref.alloc() : memref<10xf32>

  %cf7 = arith.constant 7.0 : f32

  // Set up the following dependences:
  // Make dependence from 0 to 1 on '%a' read-after-read.
  // 1) loop0 -> loop1 on memref '%a'
  // 2) loop0 -> loop2 on memref '%b'
  // 3) loop1 -> loop2 on memref '%c'

  // Should fuse: no fusion preventing remarks should be emitted for this test.
  affine.for %i0 = 0 to 10 {
    %v0 = affine.load %a[%i0] : memref<10xf32>
    affine.store %cf7, %b[%i0] : memref<10xf32>
  }
  affine.for %i1 = 0 to 10 {
    %v1 = affine.load %a[%i1] : memref<10xf32>
    %v2 = affine.load %c[%i1] : memref<10xf32>
  }
  affine.for %i2 = 0 to 10 {
    %v3 = affine.load %b[%i2] : memref<10xf32>
    affine.store %cf7, %c[%i2] : memref<10xf32>
  }
  return
}

// -----

// CHECK-LABEL: func @can_fuse_different_memrefs() {
func.func @can_fuse_different_memrefs() {
  %a = memref.alloc() : memref<10xf32>
  %b = memref.alloc() : memref<10xf32>
  %c = memref.alloc() : memref<10xf32>
  %d = memref.alloc() : memref<10xf32>

  %cf7 = arith.constant 7.0 : f32

  // Set up the following dependences:
  // Make dependence from 0 to 1 on unrelated memref '%d'.
  // 1) loop0 -> loop1 on memref '%a'
  // 2) loop0 -> loop2 on memref '%b'
  // 3) loop1 -> loop2 on memref '%c'

  // Should fuse: no fusion preventing remarks should be emitted for this test.
  affine.for %i0 = 0 to 10 {
    %v0 = affine.load %a[%i0] : memref<10xf32>
    affine.store %cf7, %b[%i0] : memref<10xf32>
  }
  affine.for %i1 = 0 to 10 {
    affine.store %cf7, %d[%i1] : memref<10xf32>
    %v1 = affine.load %c[%i1] : memref<10xf32>
  }
  affine.for %i2 = 0 to 10 {
    %v2 = affine.load %b[%i2] : memref<10xf32>
    affine.store %cf7, %c[%i2] : memref<10xf32>
  }
  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_intermediate_store() {
func.func @should_not_fuse_across_intermediate_store() {
  %0 = memref.alloc() : memref<10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
    %v0 = affine.load %0[%i0] : memref<10xf32>
    "op0"(%v0) : (f32) -> ()
  }

  // Should not fuse loop nests '%i0' and '%i1' across top-level store.
  affine.store %cf7, %0[%c0] : memref<10xf32>

  affine.for %i1 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
    %v1 = affine.load %0[%i1] : memref<10xf32>
    "op1"(%v1) : (f32) -> ()
  }
  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_intermediate_load() {
func.func @should_not_fuse_across_intermediate_load() {
  %0 = memref.alloc() : memref<10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
    affine.store %cf7, %0[%i0] : memref<10xf32>
  }

  // Should not fuse loop nests '%i0' and '%i1' across top-level load.
  %v0 = affine.load %0[%c0] : memref<10xf32>
  "op0"(%v0) : (f32) -> ()

  affine.for %i1 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
    affine.store %cf7, %0[%i1] : memref<10xf32>
  }

  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def() {
func.func @should_not_fuse_across_ssa_value_def() {
  %0 = memref.alloc() : memref<10xf32>
  %1 = memref.alloc() : memref<10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
    %v0 = affine.load %0[%i0] : memref<10xf32>
    affine.store %v0, %1[%i0] : memref<10xf32>
  }

  // Loop nest '%i0" cannot be fused past load from '%1' due to RAW dependence.
  %v1 = affine.load %1[%c0] : memref<10xf32>
  "op0"(%v1) : (f32) -> ()

  // Loop nest '%i1' cannot be fused past SSA value def '%c2' which it uses.
  %c2 = arith.constant 2 : index

  affine.for %i1 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
    affine.store %cf7, %0[%c2] : memref<10xf32>
  }

  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_store_before_load() {
func.func @should_not_fuse_store_before_load() {
  %0 = memref.alloc() : memref<10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
    affine.store %cf7, %0[%i0] : memref<10xf32>
    %v0 = affine.load %0[%i0] : memref<10xf32>
  }

  affine.for %i1 = 0 to 10 {
    %v1 = affine.load %0[%i1] : memref<10xf32>
  }

  affine.for %i2 = 0 to 10 {
    // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
    affine.store %cf7, %0[%i2] : memref<10xf32>
    %v2 = affine.load %0[%i2] : memref<10xf32>
  }
  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_load_at_depth1() {
func.func @should_not_fuse_across_load_at_depth1() {
  %0 = memref.alloc() : memref<10x10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    affine.for %i1 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
      affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
    }

    %v1 = affine.load %0[%i0, %c0] : memref<10x10xf32>

    affine.for %i3 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
      affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
    }
  }
  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_load_in_loop_at_depth1() {
func.func @should_not_fuse_across_load_in_loop_at_depth1() {
  %0 = memref.alloc() : memref<10x10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    affine.for %i1 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
      affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
    }

    affine.for %i2 = 0 to 10 {
      %v1 = affine.load %0[%i0, %i2] : memref<10x10xf32>
    }

    affine.for %i3 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
      affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
    }
  }
  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_store_at_depth1() {
func.func @should_not_fuse_across_store_at_depth1() {
  %0 = memref.alloc() : memref<10x10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    affine.for %i1 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
      %v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
    }

    affine.store %cf7, %0[%i0, %c0] : memref<10x10xf32>

    affine.for %i3 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
      %v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
    }
  }
  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_store_in_loop_at_depth1() {
func.func @should_not_fuse_across_store_in_loop_at_depth1() {
  %0 = memref.alloc() : memref<10x10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    affine.for %i1 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
      %v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
    }

    affine.for %i2 = 0 to 10 {
      affine.store %cf7, %0[%i0, %i2] : memref<10x10xf32>
    }

    affine.for %i3 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
      %v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
    }
  }
  return
}

// -----

// CHECK-LABEL: func @should_not_fuse_across_ssa_value_def_at_depth1() {
func.func @should_not_fuse_across_ssa_value_def_at_depth1() {
  %0 = memref.alloc() : memref<10x10xf32>
  %1 = memref.alloc() : memref<10x10xf32>
  %c0 = arith.constant 0 : index
  %cf7 = arith.constant 7.0 : f32

  affine.for %i0 = 0 to 10 {
    affine.for %i1 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
      %v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
      affine.store %v0, %1[%i0, %i1] : memref<10x10xf32>
    }

    // RAW dependence from store in loop nest '%i1' to 'load %1' prevents
    // fusion loop nest '%i1' into loops after load.
    %v1 = affine.load %1[%i0, %c0] : memref<10x10xf32>
    "op0"(%v1) : (f32) -> ()

    // Loop nest '%i2' cannot be fused past SSA value def '%c2' which it uses.
    %c2 = arith.constant 2 : index

    affine.for %i2 = 0 to 10 {
      // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
      affine.store %cf7, %0[%i0, %c2] : memref<10x10xf32>
    }
  }
  return
}