llvm/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir

// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation \
// RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null

// RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file --verify-diagnostics > /dev/null

// Test Case: Nested regions - This test defines a BufferBasedOp inside the
// region of a RegionBufferBasedOp.
// BufferDeallocation expected behavior: The AllocOp for the BufferBasedOp
// should remain inside the region of the RegionBufferBasedOp and it should insert
// the missing DeallocOp in the same region. The missing DeallocOp should be
// inserted after CopyOp.

func.func @nested_regions_and_cond_branch(
  %arg0: i1,
  %arg1: memref<2xf32>,
  %arg2: memref<2xf32>) {
  cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
  cf.br ^bb3(%arg1 : memref<2xf32>)
^bb2:
  %0 = memref.alloc() : memref<2xf32>
  test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
  ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
    %1 = memref.alloc() : memref<2xf32>
    test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
    %tmp1 = math.exp %gen1_arg0 : f32
    test.region_yield %tmp1 : f32
  }
  cf.br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
  test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
  return
}

// CHECK-LABEL: func @nested_regions_and_cond_branch
//  CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
//       CHECK: ^bb1:
//   CHECK-NOT:   bufferization.clone
//   CHECK-NOT:   bufferization.dealloc
//       CHECK:   cf.br ^bb3([[ARG1]], %false
//       CHECK: ^bb2:
//       CHECK:   [[ALLOC0:%.+]] = memref.alloc()
//       CHECK:   test.region_buffer_based
//       CHECK:     [[ALLOC1:%.+]] = memref.alloc()
//       CHECK:     test.buffer_based
//       CHECK:     bufferization.dealloc ([[ALLOC1]] : memref<2xf32>) if (%true
//  CHECK-NEXT:     test.region_yield
//   CHECK-NOT:   bufferization.clone
//   CHECK-NOT:   bufferization.dealloc
//       CHECK:   cf.br ^bb3([[ALLOC0]], %true
//       CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND0:%.+]]: i1):
//       CHECK:   test.copy
//  CHECK-NEXT:   [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
//  CHECK-NEXT:   bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND0]])
//       CHECK:   return

// -----

// Test Case: nested region control flow
// The alloc %1 flows through both if branches until it is finally returned.
// Hence, it does not require a specific dealloc operation. However, %3
// requires a dealloc.

func.func @nested_region_control_flow(
  %arg0 : index,
  %arg1 : index) -> memref<?x?xf32> {
  %0 = arith.cmpi eq, %arg0, %arg1 : index
  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
  %2 = scf.if %0 -> (memref<?x?xf32>) {
    scf.yield %1 : memref<?x?xf32>
  } else {
    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
    "test.read_buffer"(%3) : (memref<?x?xf32>) -> ()
    scf.yield %1 : memref<?x?xf32>
  }
  return %2 : memref<?x?xf32>
}

// CHECK-LABEL: func @nested_region_control_flow
//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
//       CHECK:   [[V0:%.+]]:2 = scf.if
//       CHECK:     scf.yield [[ALLOC]], %false
//       CHECK:     [[ALLOC1:%.+]] = memref.alloc(
//       CHECK:     bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
//   CHECK-NOT: retain
//       CHECK:     scf.yield [[ALLOC]], %false
//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
//       CHECK:     scf.yield [[V0]]#0
//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:     scf.yield [[CLONE]]
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] : {{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
//       CHECK:   return [[V1]]

// -----

// Test Case: nested region control flow with a nested buffer allocation in a
// divergent branch.
// Buffer deallocation places a copy for both  %1 and %3, since they are
// returned in the end.

func.func @nested_region_control_flow_div(
  %arg0 : index,
  %arg1 : index) -> memref<?x?xf32> {
  %0 = arith.cmpi eq, %arg0, %arg1 : index
  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
  %2 = scf.if %0 -> (memref<?x?xf32>) {
    scf.yield %1 : memref<?x?xf32>
  } else {
    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
    scf.yield %3 : memref<?x?xf32>
  }
  return %2 : memref<?x?xf32>
}

// CHECK-LABEL: func @nested_region_control_flow_div
//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
//       CHECK:   [[V0:%.+]]:2 = scf.if
//       CHECK:     scf.yield [[ALLOC]], %false
//       CHECK:     [[ALLOC1:%.+]] = memref.alloc(
//       CHECK:     scf.yield [[ALLOC1]], %true
//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
//       CHECK:     scf.yield [[V0]]#0
//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:     scf.yield [[CLONE]]
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
//       CHECK:   return [[V1]]

// -----

// Test Case: nested region control flow within a region interface.
// No copies are required in this case since the allocation finally escapes
// the method.

func.func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
  %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then {
    ^bb0(%arg1 : memref<?x?xf32>):
      test.region_if_yield %arg1 : memref<?x?xf32>
  } else {
    ^bb0(%arg1 : memref<?x?xf32>):
      test.region_if_yield %arg1 : memref<?x?xf32>
  } join {
    ^bb0(%arg1 : memref<?x?xf32>):
      test.region_if_yield %arg1 : memref<?x?xf32>
  }
  return %1 : memref<?x?xf32>
}

// CHECK-LABEL: func.func @inner_region_control_flow
//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
//       CHECK:   [[V0:%.+]]:2 = test.region_if [[ALLOC]], %false
//       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
//       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
//       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
//       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
//       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
//       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
//       CHECK:     scf.yield [[V0]]#0
//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:     scf.yield [[CLONE]]
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
//       CHECK:   return [[V1]]

// -----

func.func @nestedRegionsAndCondBranchAlloca(
  %arg0: i1,
  %arg1: memref<2xf32>,
  %arg2: memref<2xf32>) {
  cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
  cf.br ^bb3(%arg1 : memref<2xf32>)
^bb2:
  %0 = memref.alloc() : memref<2xf32>
  test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
  ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
    %1 = memref.alloca() : memref<2xf32>
    test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
    %tmp1 = math.exp %gen1_arg0 : f32
    test.region_yield %tmp1 : f32
  }
  cf.br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
  test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
  return
}

// CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca
//  CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
//       CHECK: ^bb1:
//       CHECK:   cf.br ^bb3([[ARG1]], %false
//       CHECK: ^bb2:
//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
//       CHECK:   test.region_buffer_based
//       CHECK:     memref.alloca()
//       CHECK:     test.buffer_based
//   CHECK-NOT:     bufferization.dealloc
//   CHECK-NOT:     bufferization.clone
//       CHECK:     test.region_yield
//       CHECK:   }
//       CHECK:   cf.br ^bb3([[ALLOC]], %true
//       CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND:%.+]]: i1):
//       CHECK:   test.copy
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[A0]]
//       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[COND]])

// -----

func.func @nestedRegionControlFlowAlloca(
  %arg0 : index, %arg1 : index, %arg2: f32) -> memref<?x?xf32> {
  %0 = arith.cmpi eq, %arg0, %arg1 : index
  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
  %2 = scf.if %0 -> (memref<?x?xf32>) {
    scf.yield %1 : memref<?x?xf32>
  } else {
    %3 = memref.alloca(%arg0, %arg1) : memref<?x?xf32>
    %c0 = arith.constant 0 : index
    memref.store %arg2, %3[%c0, %c0] : memref<?x?xf32>
    scf.yield %1 : memref<?x?xf32>
  }
  return %2 : memref<?x?xf32>
}

// CHECK-LABEL: func @nestedRegionControlFlowAlloca
//       CHECK: [[ALLOC:%.+]] = memref.alloc(
//       CHECK: [[V0:%.+]]:2 = scf.if
//       CHECK:   scf.yield [[ALLOC]], %false
//       CHECK:   memref.alloca(
//       CHECK:   scf.yield [[ALLOC]], %false
//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
//       CHECK:   scf.yield [[V0]]#0
//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:   scf.yield [[CLONE]]
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
//       CHECK: return [[V1]]

// -----

// Test Case: structured control-flow loop using a nested alloc.
// The iteration argument %iterBuf has to be freed before yielding %3 to avoid
// memory leaks.

func.func @loop_alloc(
  %lb: index,
  %ub: index,
  %step: index,
  %buf: memref<2xf32>,
  %res: memref<2xf32>) {
  %0 = memref.alloc() : memref<2xf32>
  "test.read_buffer"(%0) : (memref<2xf32>) -> ()
  %1 = scf.for %i = %lb to %ub step %step
    iter_args(%iterBuf = %buf) -> memref<2xf32> {
    %2 = arith.cmpi eq, %i, %ub : index
    %3 = memref.alloc() : memref<2xf32>
    scf.yield %3 : memref<2xf32>
  }
  test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
  return
}

// CHECK-LABEL: func @loop_alloc
//  CHECK-SAME: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: index, [[ARG2:%.+]]: index, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>)
//       CHECK: [[ALLOC:%.+]] = memref.alloc()
//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false
//       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]]
//       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]])
//   CHECK-NOT:       retain
//       CHECK:   scf.yield [[ALLOC1]], %true
//       CHECK: test.copy
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
//   CHECK-NOT: retain
//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1)
//   CHECK-NOT: retain

// -----

// Test Case: structured control-flow loop with a nested if operation.
// The loop yields buffers that have been defined outside of the loop and the
// backedges only use the iteration arguments (or one of its aliases).
// Therefore, we do not have to (and are not allowed to) free any buffers
// that are passed via the backedges.

func.func @loop_nested_if_no_alloc(
  %lb: index,
  %ub: index,
  %step: index,
  %buf: memref<2xf32>,
  %res: memref<2xf32>) {
  %0 = memref.alloc() : memref<2xf32>
  %1 = scf.for %i = %lb to %ub step %step
    iter_args(%iterBuf = %buf) -> memref<2xf32> {
    %2 = arith.cmpi eq, %i, %ub : index
    %3 = scf.if %2 -> (memref<2xf32>) {
      scf.yield %0 : memref<2xf32>
    } else {
      scf.yield %iterBuf : memref<2xf32>
    }
    scf.yield %3 : memref<2xf32>
  }
  test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
  return
}

// CHECK-LABEL: func @loop_nested_if_no_alloc
//  CHECK-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>)
//       CHECK: [[ALLOC:%.+]] = memref.alloc()
//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false
//       CHECK:   [[V1:%.+]]:2 = scf.if
//       CHECK:     scf.yield [[ALLOC]], %false
//       CHECK:     scf.yield [[ARG6]], %false
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]]
//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]]) retain ([[V1]]#0 :
//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
//       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
//       CHECK: test.copy
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1)

// TODO: we know statically that the inner dealloc will never deallocate
//       anything, i.e., we can optimize it away

// -----

// Test Case: structured control-flow loop with a nested if operation using
// a deeply nested buffer allocation.

func.func @loop_nested_if_alloc(
  %lb: index,
  %ub: index,
  %step: index,
  %buf: memref<2xf32>) -> memref<2xf32> {
  %0 = memref.alloc() : memref<2xf32>
  %1 = scf.for %i = %lb to %ub step %step
    iter_args(%iterBuf = %buf) -> memref<2xf32> {
    %2 = arith.cmpi eq, %i, %ub : index
    %3 = scf.if %2 -> (memref<2xf32>) {
      %4 = memref.alloc() : memref<2xf32>
      scf.yield %4 : memref<2xf32>
    } else {
      scf.yield %0 : memref<2xf32>
    }
    scf.yield %3 : memref<2xf32>
  }
  return %1 : memref<2xf32>
}

// CHECK-LABEL: func @loop_nested_if_alloc
//  CHECK-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>)
//       CHECK: [[ALLOC:%.+]] = memref.alloc()
//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG5:%.+]] = [[ARG3]], [[ARG6:%.+]] = %false
//       CHECK:   [[V1:%.+]]:2 = scf.if
//       CHECK:     [[ALLOC1:%.+]] = memref.alloc()
//       CHECK:     scf.yield [[ALLOC1]], %true
//       CHECK:     scf.yield [[ALLOC]], %false
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG5]]
//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG6]]) retain ([[V1]]#0 :
//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
//       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
//       CHECK: }
//       CHECK: [[V2:%.+]] = scf.if [[V0]]#1
//       CHECK:   scf.yield [[V0]]#0
//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:   scf.yield [[CLONE]]
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V2]] :
//       CHECK: return [[V2]]

// -----

// Test Case: several nested structured control-flow loops with a deeply nested
// buffer allocation inside an if operation.

func.func @loop_nested_alloc(
  %lb: index,
  %ub: index,
  %step: index,
  %buf: memref<2xf32>,
  %res: memref<2xf32>) {
  %0 = memref.alloc() : memref<2xf32>
  "test.read_buffer"(%0) : (memref<2xf32>) -> ()
  %1 = scf.for %i = %lb to %ub step %step
    iter_args(%iterBuf = %buf) -> memref<2xf32> {
    %2 = scf.for %i2 = %lb to %ub step %step
      iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
      %3 = scf.for %i3 = %lb to %ub step %step
        iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> {
        %4 = memref.alloc() : memref<2xf32>
        "test.read_buffer"(%4) : (memref<2xf32>) -> ()
        %5 = arith.cmpi eq, %i, %ub : index
        %6 = scf.if %5 -> (memref<2xf32>) {
          %7 = memref.alloc() : memref<2xf32>
          scf.yield %7 : memref<2xf32>
        } else {
          scf.yield %iterBuf3 : memref<2xf32>
        }
        scf.yield %6 : memref<2xf32>
      }
      scf.yield %3 : memref<2xf32>
    }
    scf.yield %2 : memref<2xf32>
  }
  test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
  return
}

// CHECK-LABEL: func @loop_nested_alloc
//       CHECK: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>, {{.*}}: memref<2xf32>)
//       CHECK: [[ALLOC:%.+]] = memref.alloc()
//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false
//       CHECK:   [[V1:%.+]]:2 = scf.for {{.*}} iter_args([[ARG9:%.+]] = [[ARG6]], [[ARG10:%.+]] = %false
//       CHECK:     [[V2:%.+]]:2 = scf.for {{.*}} iter_args([[ARG12:%.+]] = [[ARG9]], [[ARG13:%.+]] = %false
//       CHECK:       [[ALLOC1:%.+]] = memref.alloc()
//       CHECK:       [[V3:%.+]]:2 = scf.if
//       CHECK:         [[ALLOC2:%.+]] = memref.alloc()
//       CHECK:         scf.yield [[ALLOC2]], %true
//       CHECK:       } else {
//       CHECK:         scf.yield [[ARG12]], %false
//       CHECK:       }
//       CHECK:       [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG12]]
//       CHECK:       [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG13]]) retain ([[V3]]#0 :
//       CHECK:       bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
//   CHECK-NOT: retain
//       CHECK:       [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V3]]#1
//       CHECK:       scf.yield [[V3]]#0, [[OWN_AGG]]
//       CHECK:     }
//       CHECK:     [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG9]]
//       CHECK:     [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG10]]) retain ([[V2]]#0 :
//       CHECK:     [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V2]]#1
//       CHECK:     scf.yield [[V2]]#0, [[OWN_AGG]]
//       CHECK:   }
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]]
//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]]) retain ([[V1]]#0 :
//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
//       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
//       CHECK: }
//       CHECK: test.copy
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1)

// TODO: all the retain operands could be removed by doing some more thorough analysis

// -----

func.func @affine_loop() -> f32 {
  %buffer = memref.alloc() : memref<1024xf32>
  %sum_init_0 = arith.constant 0.0 : f32
  %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 {
    %t = affine.load %buffer[%i] : memref<1024xf32>
    %sum_next = arith.addf %sum_iter, %t : f32
    affine.yield %sum_next : f32
  }
  return %res : f32
}

// CHECK-LABEL: func @affine_loop
//       CHECK: [[ALLOC:%.+]] = memref.alloc()
//       CHECK: affine.for {{.*}} iter_args(%arg1 = %cst)
//       CHECK:   affine.yield
//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true

// -----

func.func @assumingOp(
  %arg0: !shape.witness,
  %arg2: memref<2xf32>,
  %arg3: memref<2xf32>) {
  // Confirm the alloc will be dealloc'ed in the block.
  %1 = shape.assuming %arg0 -> memref<2xf32> {
    %0 = memref.alloc() : memref<2xf32>
    "test.read_buffer"(%0) : (memref<2xf32>) -> ()
    shape.assuming_yield %arg2 : memref<2xf32>
  }
  // Confirm the alloc will be returned and dealloc'ed after its use.
  %3 = shape.assuming %arg0 -> memref<2xf32> {
    %2 = memref.alloc() : memref<2xf32>
    shape.assuming_yield %2 : memref<2xf32>
  }
  test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>)
  return
}

// CHECK-LABEL: func @assumingOp
//       CHECK: ({{.*}}, [[ARG1:%.+]]: memref<2xf32>, {{.*}}: memref<2xf32>)
//       CHECK: [[V0:%.+]]:2 = shape.assuming
//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true{{[0-9_]*}})
//   CHECK-NOT: retain
//       CHECK:   shape.assuming_yield [[ARG1]], %false
//       CHECK: }
//       CHECK: [[V1:%.+]]:2 = shape.assuming
//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
//       CHECK:   shape.assuming_yield [[ALLOC]], %true
//       CHECK: }
//       CHECK: test.copy
//       CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V1]]#0
//       CHECK: bufferization.dealloc ([[BASE1]] :{{.*}}) if ([[V1]]#1)
//   CHECK-NOT: retain
//       CHECK: bufferization.dealloc ([[BASE0]] :{{.*}}) if ([[V0]]#1)
//   CHECK-NOT: retain
//       CHECK: return

// -----

// Test Case: The op "test.one_region_with_recursive_memory_effects" does not
// implement the RegionBranchOpInterface. This is allowed during buffer
// deallocation because the operation's region does not deal with any MemRef
// values.

func.func @noRegionBranchOpInterface() {
  %0 = "test.one_region_with_recursive_memory_effects"() ({
    %1 = "test.one_region_with_recursive_memory_effects"() ({
      %2 = memref.alloc() : memref<2xi32>
      "test.read_buffer"(%2) : (memref<2xi32>) -> ()
      "test.return"() : () -> ()
    }) : () -> (i32)
    "test.return"() : () -> ()
  }) : () -> (i32)
  "test.return"() : () -> ()
}

// -----

// Test Case: The second op "test.one_region_with_recursive_memory_effects" does
// not implement the RegionBranchOpInterface but has buffer semantics. This is
// not allowed during buffer deallocation.

func.func @noRegionBranchOpInterface() {
  %0 = "test.one_region_with_recursive_memory_effects"() ({
    // expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}}
    %1 = "test.one_region_with_recursive_memory_effects"() ({
      %2 = memref.alloc() : memref<2xi32>
      "test.read_buffer"(%2) : (memref<2xi32>) -> ()
      "test.return"(%2) : (memref<2xi32>) -> ()
    }) : () -> (memref<2xi32>)
    "test.return"() : () -> ()
  }) : () -> (i32)
  "test.return"() : () -> ()
}

// -----

func.func @while_two_arg(%arg0: index) {
  %a = memref.alloc(%arg0) : memref<?xf32>
  scf.while (%arg1 = %a, %arg2 = %a) : (memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>) {
    // This op has a side effect, but it's not an allocate/free side effect.
    %0 = "test.side_effect_op"() {effects = [{effect="read"}]} : () -> i1
    scf.condition(%0) %arg1, %arg2 : memref<?xf32>, memref<?xf32>
  } do {
  ^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>):
    %b = memref.alloc(%arg0) : memref<?xf32>
    scf.yield %arg1, %b : memref<?xf32>, memref<?xf32>
  }
  return
}

// CHECK-LABEL: func @while_two_arg
//       CHECK: [[ALLOC:%.+]] = memref.alloc(
//       CHECK: [[V0:%.+]]:4 = scf.while ({{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false{{[0-9_]*}})
//       CHECK:   scf.condition
//       CHECK: ^bb0([[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: i1, [[ARG4:%.+]]: i1):
//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG2]]
//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG4]]) retain ([[ARG1]] :
//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[ARG3]]
//       CHECK:   scf.yield [[ARG1]], [[ALLOC1]], [[OWN_AGG]], %true
//       CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1
//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#2, [[V0]]#3)

// -----

func.func @while_three_arg(%arg0: index) {
  %a = memref.alloc(%arg0) : memref<?xf32>
  scf.while (%arg1 = %a, %arg2 = %a, %arg3 = %a) : (memref<?xf32>, memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>, memref<?xf32>) {
    // This op has a side effect, but it's not an allocate/free side effect.
    %0 = "test.side_effect_op"() {effects = [{effect="read"}]} : () -> i1
    scf.condition(%0) %arg1, %arg2, %arg3 : memref<?xf32>, memref<?xf32>, memref<?xf32>
  } do {
  ^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>, %arg3: memref<?xf32>):
    %b = memref.alloc(%arg0) : memref<?xf32>
    %q = memref.alloc(%arg0) : memref<?xf32>
    scf.yield %q, %b, %arg2: memref<?xf32>, memref<?xf32>, memref<?xf32>
  }
  return
}

// CHECK-LABEL: func @while_three_arg
//       CHECK: [[ALLOC:%.+]] = memref.alloc(
//       CHECK: [[V0:%.+]]:6 = scf.while ({{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false
//       CHECK:   scf.condition
//       CHECK: ^bb0([[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: memref<?xf32>, [[ARG4:%.+]]: i1, [[ARG5:%.+]]: i1, [[ARG6:%.+]]: i1):
//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
//       CHECK:   [[ALLOC2:%.+]] = memref.alloc(
//       CHECK:   [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG1]]
//       CHECK:   [[BASE2:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG3]]
//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE0]], [[BASE2]] :{{.*}}) if ([[ARG4]], [[ARG6]]) retain ([[ARG2]] :
//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[ARG5]]
//       CHECK:   scf.yield [[ALLOC2]], [[ALLOC1]], [[ARG2]], %true{{[0-9_]*}}, %true{{[0-9_]*}}, [[OWN_AGG]] :
//       CHECK: }
//       CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1
//       CHECK: [[BASE2:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#2
//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]], [[BASE2]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#3, [[V0]]#4, [[V0]]#5)

// TODO: better alias analysis could simplify the dealloc inside the body further

// -----

// Memref allocated in `then` region and passed back to the parent if op.
#set = affine_set<() : (0 >= 0)>
func.func @test_affine_if_1(%arg0: memref<10xf32>) -> memref<10xf32> {
  %0 = affine.if #set() -> memref<10xf32> {
    %alloc = memref.alloc() : memref<10xf32>
    affine.yield %alloc : memref<10xf32>
  } else {
    affine.yield %arg0 : memref<10xf32>
  }
  return %0 : memref<10xf32>
}

// CHECK-LABEL: func @test_affine_if_1
//  CHECK-SAME: ([[ARG0:%.*]]: memref<10xf32>)
//       CHECK: [[V0:%.+]]:2 = affine.if
//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
//       CHECK:   affine.yield [[ALLOC]], %true
//       CHECK:   affine.yield [[ARG0]], %false
//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
//       CHECK:   scf.yield [[V0]]#0
//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:   scf.yield [[CLONE]]
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) retain ([[V1]] :
//       CHECK: return [[V1]]

// TODO: the dealloc could be optimized away since the memref to be deallocated
//       either aliases with V1 or the condition is false

// -----

// Memref allocated before parent IfOp and used in `then` region.
// Expected result: deallocation should happen after affine.if op.
#set = affine_set<() : (0 >= 0)>
func.func @test_affine_if_2() -> memref<10xf32> {
  %alloc0 = memref.alloc() : memref<10xf32>
  %0 = affine.if #set() -> memref<10xf32> {
    affine.yield %alloc0 : memref<10xf32>
  } else {
    %alloc = memref.alloc() : memref<10xf32>
    affine.yield %alloc : memref<10xf32>
  }
  return %0 : memref<10xf32>
}
// CHECK-LABEL: func @test_affine_if_2
//       CHECK: [[ALLOC:%.+]] = memref.alloc()
//       CHECK: [[V0:%.+]]:2 = affine.if
//       CHECK:   affine.yield [[ALLOC]], %false
//       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
//       CHECK:   affine.yield [[ALLOC1]], %true
//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
//       CHECK:   scf.yield [[V0]]#0
//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:   scf.yield [[CLONE]]
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
//       CHECK: return [[V1]]

// -----

// Memref allocated before parent IfOp and used in `else` region.
// Expected result: deallocation should happen after affine.if op.
#set = affine_set<() : (0 >= 0)>
func.func @test_affine_if_3() -> memref<10xf32> {
  %alloc0 = memref.alloc() : memref<10xf32>
  %0 = affine.if #set() -> memref<10xf32> {
    %alloc = memref.alloc() : memref<10xf32>
    affine.yield %alloc : memref<10xf32>
  } else {
    affine.yield %alloc0 : memref<10xf32>
  }
  return %0 : memref<10xf32>
}

// CHECK-LABEL: func @test_affine_if_3
//       CHECK: [[ALLOC:%.+]] = memref.alloc()
//       CHECK: [[V0:%.+]]:2 = affine.if
//       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
//       CHECK:   affine.yield [[ALLOC1]], %true
//       CHECK:   affine.yield [[ALLOC]], %false
//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
//       CHECK:   scf.yield [[V0]]#0
//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
//       CHECK:   scf.yield [[CLONE]]
//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]]
//       CHECK: return [[V1]]