llvm/mlir/test/Dialect/SCF/buffer-deallocation.mlir

// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation \
// RUN:   -buffer-deallocation-simplification -split-input-file %s | FileCheck %s

func.func @parallel_insert_slice(%arg0: index) {
  %c0 = arith.constant 0 : index
  %alloc = memref.alloc() : memref<2xf32>
  scf.forall (%arg1) in (%arg0) {
    %alloc0 = memref.alloc() : memref<2xf32>
    %0 = memref.load %alloc[%c0] : memref<2xf32>
    linalg.fill ins(%0 : f32) outs(%alloc0 : memref<2xf32>)
  }
  return
}

// CHECK-LABEL: func @parallel_insert_slice
//  CHECK-SAME: (%arg0: index)
//       CHECK: [[ALLOC0:%.+]] = memref.alloc(
//       CHECK: scf.forall
//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
//       CHECK:   bufferization.dealloc ([[ALLOC1]] : memref<2xf32>) if (%true
//   CHECK-NOT: retain
//       CHECK: }
//       CHECK: bufferization.dealloc ([[ALLOC0]] : memref<2xf32>) if (%true
//   CHECK-NOT: retain

// -----

func.func @reduce(%buffer: memref<100xf32>) {
  %init = arith.constant 0.0 : f32
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  scf.parallel (%iv) = (%c0) to (%c1) step (%c1) init (%init) -> f32 {
    %elem_to_reduce = memref.load %buffer[%iv] : memref<100xf32>
    scf.reduce(%elem_to_reduce : f32) {
      ^bb0(%lhs : f32, %rhs: f32):
        %alloc = memref.alloc() : memref<2xf32>
        memref.store %lhs, %alloc [%c0] : memref<2xf32>
        memref.store %rhs, %alloc [%c1] : memref<2xf32>
        %0 = memref.load %alloc[%c0] : memref<2xf32>
        %1 = memref.load %alloc[%c1] : memref<2xf32>
        %res = arith.addf %0, %1 : f32
        scf.reduce.return %res : f32
    }
  }
  func.return
}

// CHECK-LABEL: func @reduce
//       CHECK: scf.reduce
//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
//       CHECK:   scf.reduce.return