// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation \
// RUN: -buffer-deallocation-simplification -split-input-file %s | FileCheck %s
// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null
// RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file > /dev/null
// Test Case:
// bb0
// / \
// bb1 bb2 <- Initial position of AllocOp
// \ /
// bb3
// BufferDeallocation expected behavior: bb2 contains an AllocOp which is
// passed to bb3. In the latter block, there should be a deallocation.
// Since bb1 does not contain an adequate alloc, the deallocation has to be
// made conditional on the branch taken in bb0.
func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
cf.cond_br %arg0, ^bb2(%arg1 : memref<2xf32>), ^bb1
^bb1:
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.br ^bb2(%0 : memref<2xf32>)
^bb2(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-LABEL: func @condBranch
// CHECK-SAME: ([[ARG0:%.+]]: i1,
// CHECK-SAME: [[ARG1:%.+]]: memref<2xf32>,
// CHECK-SAME: [[ARG2:%.+]]: memref<2xf32>)
// CHECK-NOT: bufferization.dealloc
// CHECK: cf.cond_br{{.*}}, ^bb2([[ARG1]], %false{{[0-9_]*}} :{{.*}}), ^bb1
// CHECK: ^bb1:
// CHECK: %[[ALLOC1:.*]] = memref.alloc
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: cf.br ^bb2(%[[ALLOC1]], %true
// CHECK-NEXT: ^bb2([[ALLOC2:%.+]]: memref<2xf32>, [[COND1:%.+]]: i1):
// CHECK: test.copy
// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC2]]
// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND1]])
// CHECK-NEXT: return
// -----
// Test Case:
// bb0
// / \
// bb1 bb2 <- Initial position of AllocOp
// \ /
// bb3
// BufferDeallocation expected behavior: The existing AllocOp has a dynamic
// dependency to block argument %0 in bb2. Since the dynamic type is passed
// to bb3 via the block argument %2, it is currently required to allocate a
// temporary buffer for %2 that gets copies of %arg0 and %1 with their
// appropriate shape dimensions. The copy buffer deallocation will be applied
// to %2 in block bb3.
func.func @condBranchDynamicType(
%arg0: i1,
%arg1: memref<?xf32>,
%arg2: memref<?xf32>,
%arg3: index) {
cf.cond_br %arg0, ^bb2(%arg1 : memref<?xf32>), ^bb1(%arg3: index)
^bb1(%0: index):
%1 = memref.alloc(%0) : memref<?xf32>
test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
cf.br ^bb2(%1 : memref<?xf32>)
^bb2(%2: memref<?xf32>):
test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>)
return
}
// CHECK-LABEL: func @condBranchDynamicType
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: index)
// CHECK-NOT: bufferization.dealloc
// CHECK: cf.cond_br{{.*}}^bb2(%arg1, %false{{[0-9_]*}} :{{.*}}), ^bb1
// CHECK: ^bb1([[IDX:%.*]]:{{.*}})
// CHECK: [[ALLOC1:%.*]] = memref.alloc([[IDX]])
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: cf.br ^bb2([[ALLOC1]], %true
// CHECK-NEXT: ^bb2([[ALLOC3:%.*]]:{{.*}}, [[COND:%.+]]:{{.*}})
// CHECK: test.copy([[ALLOC3]],
// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC3]]
// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]])
// CHECK-NEXT: return
// -----
// Test case: See above.
func.func @condBranchUnrankedType(
%arg0: i1,
%arg1: memref<*xf32>,
%arg2: memref<*xf32>,
%arg3: index) {
cf.cond_br %arg0, ^bb2(%arg1 : memref<*xf32>), ^bb1(%arg3: index)
^bb1(%0: index):
%1 = memref.alloc(%0) : memref<?xf32>
%2 = memref.cast %1 : memref<?xf32> to memref<*xf32>
test.buffer_based in(%arg1: memref<*xf32>) out(%2: memref<*xf32>)
cf.br ^bb2(%2 : memref<*xf32>)
^bb2(%3: memref<*xf32>):
test.copy(%3, %arg2) : (memref<*xf32>, memref<*xf32>)
return
}
// CHECK-LABEL: func @condBranchUnrankedType
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<*xf32>, [[ARG2:%.+]]: memref<*xf32>, [[ARG3:%.+]]: index)
// CHECK-NOT: bufferization.dealloc
// CHECK: cf.cond_br{{.*}}^bb2([[ARG1]], %false{{[0-9_]*}} :{{.*}}), ^bb1
// CHECK: ^bb1([[IDX:%.*]]:{{.*}})
// CHECK: [[ALLOC1:%.*]] = memref.alloc([[IDX]])
// CHECK-NEXT: [[CAST:%.+]] = memref.cast [[ALLOC1]]
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: cf.br ^bb2([[CAST]], %true
// CHECK-NEXT: ^bb2([[ALLOC3:%.*]]:{{.*}}, [[COND:%.+]]:{{.*}})
// CHECK: test.copy([[ALLOC3]],
// CHECK-NEXT: [[CAST:%.+]] = memref.reinterpret_cast [[ALLOC3]]
// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[CAST]]
// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]])
// CHECK-NEXT: return
// TODO: we can get rid of first dealloc by doing some must-alias analysis
// -----
// Test Case:
// bb0
// / \
// bb1 bb2 <- Initial position of AllocOp
// | / \
// | bb3 bb4
// | \ /
// \ bb5
// \ /
// bb6
// |
// bb7
// BufferDeallocation expected behavior: The existing AllocOp has a dynamic
// dependency to block argument %0 in bb2. Since the dynamic type is passed to
// bb5 via the block argument %2 and to bb6 via block argument %3, it is
// currently required to pass along the condition under which the newly
// allocated buffer should be deallocated, since the path via bb1 does not
// allocate a buffer.
func.func @condBranchDynamicTypeNested(
%arg0: i1,
%arg1: memref<?xf32>,
%arg2: memref<?xf32>,
%arg3: index) {
cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
^bb1:
cf.br ^bb6(%arg1 : memref<?xf32>)
^bb2(%0: index):
%1 = memref.alloc(%0) : memref<?xf32>
test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
cf.cond_br %arg0, ^bb3, ^bb4
^bb3:
cf.br ^bb5(%1 : memref<?xf32>)
^bb4:
cf.br ^bb5(%1 : memref<?xf32>)
^bb5(%2: memref<?xf32>):
cf.br ^bb6(%2 : memref<?xf32>)
^bb6(%3: memref<?xf32>):
cf.br ^bb7(%3 : memref<?xf32>)
^bb7(%4: memref<?xf32>):
test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>)
return
}
// CHECK-LABEL: func @condBranchDynamicTypeNested
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: index)
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.cond_br{{.*}}
// CHECK-NEXT: ^bb1
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.br ^bb6([[ARG1]], %false{{[0-9_]*}} :
// CHECK: ^bb2([[IDX:%.*]]:{{.*}})
// CHECK: [[ALLOC1:%.*]] = memref.alloc([[IDX]])
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true
// CHECK-NEXT: [[OWN:%.+]] = arith.select [[ARG0]], [[ARG0]], [[NOT_ARG0]]
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.cond_br{{.*}}, ^bb3, ^bb4
// CHECK-NEXT: ^bb3:
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.br ^bb5([[ALLOC1]], [[OWN]]
// CHECK-NEXT: ^bb4:
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.br ^bb5([[ALLOC1]], [[OWN]]
// CHECK-NEXT: ^bb5([[ALLOC2:%.*]]:{{.*}}, [[COND1:%.+]]:{{.*}})
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.br ^bb6([[ALLOC2]], [[COND1]]
// CHECK-NEXT: ^bb6([[ALLOC4:%.*]]:{{.*}}, [[COND2:%.+]]:{{.*}})
// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC4]]
// CHECK-NEXT: [[OWN:%.+]]:2 = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[COND2]]) retain ([[ALLOC4]], [[ARG2]] :
// CHECK: cf.br ^bb7([[ALLOC4]], [[OWN]]#0
// CHECK-NEXT: ^bb7([[ALLOC5:%.*]]:{{.*}}, [[COND3:%.+]]:{{.*}})
// CHECK: test.copy
// CHECK: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC5]]
// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND3]])
// CHECK-NEXT: return
// TODO: the dealloc in bb5 can be optimized away by adding another
// canonicalization pattern
// -----
// Test Case:
// bb0
// / \
// | bb1 <- Initial position of AllocOp
// \ /
// bb2
// BufferDeallocation expected behavior: It should insert a DeallocOp at the
// exit block after CopyOp since %1 is an alias for %0 and %arg1.
func.func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
^bb1:
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.br ^bb2(%0 : memref<2xf32>)
^bb2(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-LABEL: func @criticalEdge
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.cond_br{{.*}}, ^bb1, ^bb2([[ARG1]], %false
// CHECK: [[ALLOC1:%.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: cf.br ^bb2([[ALLOC1]], %true
// CHECK-NEXT: ^bb2([[ALLOC2:%.+]]:{{.*}}, [[COND:%.+]]: {{.*}})
// CHECK: test.copy
// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC2]]
// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]])
// CHECK-NEXT: return
// -----
// Test Case:
// bb0 <- Initial position of AllocOp
// / \
// | bb1
// \ /
// bb2
// BufferDeallocation expected behavior: It only inserts a DeallocOp at the
// exit block after CopyOp since %1 is an alias for %0 and %arg1.
func.func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
^bb1:
cf.br ^bb2(%0 : memref<2xf32>)
^bb2(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-LABEL: func @invCriticalEdge
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
// CHECK: [[ALLOC:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true
// CHECK-NEXT: bufferization.dealloc ([[ALLOC]] : {{.*}}) if ([[NOT_ARG0]])
// CHECK-NEXT: cf.cond_br{{.*}}^bb1, ^bb2([[ARG1]], %false
// CHECK-NEXT: ^bb1:
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK: cf.br ^bb2([[ALLOC]], [[ARG0]]
// CHECK-NEXT: ^bb2([[ALLOC1:%.+]]:{{.*}}, [[COND:%.+]]:{{.*}})
// CHECK: test.copy
// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC1]]
// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]])
// CHECK-NEXT: return
// -----
// Test Case:
// bb0 <- Initial position of the first AllocOp
// / \
// bb1 bb2
// \ /
// bb3 <- Initial position of the second AllocOp
// BufferDeallocation expected behavior: It only inserts two missing
// DeallocOps in the exit block. %5 is an alias for %0. Therefore, the
// DeallocOp for %0 should occur after the last BufferBasedOp. The Dealloc for
// %7 should happen after CopyOp.
func.func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
%7 = memref.alloc() : memref<2xf32>
test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-LABEL: func @ifElse
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
// CHECK: [[ALLOC0:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NOT: bufferization.dealloc
// CHECK-NOT: bufferization.clone
// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true
// CHECK-NEXT: cf.cond_br {{.*}}^bb1([[ARG1]], [[ALLOC0]], %false{{[0-9_]*}}, [[ARG0]] : {{.*}}), ^bb2([[ALLOC0]], [[ARG1]], [[NOT_ARG0]], %false{{[0-9_]*}} : {{.*}})
// CHECK: ^bb3([[A0:%.+]]:{{.*}}, [[A1:%.+]]:{{.*}}, [[COND0:%.+]]: i1, [[COND1:%.+]]: i1):
// CHECK: [[ALLOC1:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: test.copy
// CHECK-NEXT: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
// CHECK-NEXT: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
// CHECK-NEXT: bufferization.dealloc ([[ALLOC1]] : {{.*}}) if (%true
// CHECK-NOT: retain
// CHECK-NEXT: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[COND0]], [[COND1]])
// CHECK-NOT: retain
// CHECK-NEXT: return
// TODO: Instead of deallocating the bbarg memrefs, a slightly better analysis
// could do an unconditional deallocation on ALLOC0 and move it before the
// test.copy (dealloc of ALLOC1 would remain after the copy)
// -----
// Test Case: No users for buffer in if-else CFG
// bb0 <- Initial position of AllocOp
// / \
// bb1 bb2
// \ /
// bb3
// BufferDeallocation expected behavior: It only inserts a missing DeallocOp
// in the exit block since %5 or %6 are the latest aliases of %0.
func.func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-LABEL: func @ifElseNoUsers
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
// CHECK: [[ALLOC:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true
// CHECK-NEXT: cf.cond_br {{.*}}^bb1([[ARG1]], [[ALLOC]], %false{{[0-9_]*}}, [[ARG0]] : {{.*}}), ^bb2([[ALLOC]], [[ARG1]], [[NOT_ARG0]], %false{{[0-9_]*}} : {{.*}})
// CHECK: ^bb3([[A0:%.+]]:{{.*}}, [[A1:%.+]]:{{.*}}, [[COND0:%.+]]: i1, [[COND1:%.+]]: i1):
// CHECK: test.copy
// CHECK-NEXT: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
// CHECK-NEXT: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
// CHECK-NEXT: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[COND0]], [[COND1]])
// CHECK-NOT: retain
// CHECK-NEXT: return
// TODO: slightly better analysis could just insert an unconditional dealloc on %0
// -----
// Test Case:
// bb0 <- Initial position of the first AllocOp
// / \
// bb1 bb2
// | / \
// | bb3 bb4
// \ \ /
// \ /
// bb5 <- Initial position of the second AllocOp
// BufferDeallocation expected behavior: Two missing DeallocOps should be
// inserted in the exit block.
func.func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
^bb3(%5: memref<2xf32>):
cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
^bb4(%6: memref<2xf32>):
cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
%9 = memref.alloc() : memref<2xf32>
test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-LABEL: func @ifElseNested
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
// CHECK: [[ALLOC0:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true
// CHECK-NEXT: cf.cond_br {{.*}}^bb1([[ARG1]], [[ALLOC0]], %false{{[0-9_]*}}, [[ARG0]] : {{.*}}), ^bb2([[ALLOC0]], [[ARG1]], [[NOT_ARG0]], %false{{[0-9_]*}} :
// CHECK: ^bb5([[A0:%.+]]: memref<2xf32>, [[A1:%.+]]: memref<2xf32>, [[COND0:%.+]]: i1, [[COND1:%.+]]: i1):
// CHECK: [[ALLOC1:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: test.copy
// CHECK-NEXT: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
// CHECK-NEXT: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
// CHECK-NEXT: bufferization.dealloc ([[ALLOC1]] : {{.*}}) if (%true
// CHECK-NOT: retain
// CHECK-NEXT: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[COND0]], [[COND1]])
// CHECK-NOT: retain
// CHECK-NEXT: return
// TODO: Instead of deallocating the bbarg memrefs, a slightly better analysis
// could do an unconditional deallocation on ALLOC0 and move it before the
// test.copy (dealloc of ALLOC1 would remain after the copy)
// -----
// Test Case:
// bb0
// / \
// Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp
// \ /
// bb3
// BufferDeallocation expected behavior: We need to introduce a copy for each
// buffer since the buffers are passed to bb3. The both missing DeallocOps are
// inserted in the respective block of the allocs. The copy is freed in the exit
// block.
func.func @moving_alloc_and_inserting_missing_dealloc(
%cond: i1,
%arg0: memref<2xf32>,
%arg1: memref<2xf32>) {
cf.cond_br %cond, ^bb1, ^bb2
^bb1:
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
cf.br ^exit(%0 : memref<2xf32>)
^bb2:
%1 = memref.alloc() : memref<2xf32>
test.buffer_based in(%1: memref<2xf32>) out(%arg0: memref<2xf32>)
cf.br ^exit(%1 : memref<2xf32>)
^exit(%arg2: memref<2xf32>):
test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG0:%.+]]: memref<2xf32>, [[ARG0:%.+]]: memref<2xf32>)
// CHECK: ^bb1:
// CHECK: [[ALLOC0:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: cf.br ^bb3([[ALLOC0]], %true
// CHECK: ^bb2:
// CHECK: [[ALLOC1:%.+]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: cf.br ^bb3([[ALLOC1]], %true
// CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND0:%.+]]: i1):
// CHECK: test.copy
// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND0]])
// CHECK-NEXT: return
// -----
func.func @select_aliases(%arg0: index, %arg1: memref<?xi8>, %arg2: i1) {
%0 = memref.alloc(%arg0) : memref<?xi8>
%1 = memref.alloc(%arg0) : memref<?xi8>
%2 = arith.select %arg2, %0, %1 : memref<?xi8>
test.copy(%2, %arg1) : (memref<?xi8>, memref<?xi8>)
return
}
// CHECK-LABEL: func @select_aliases
// CHECK: [[ALLOC0:%.+]] = memref.alloc(
// CHECK: [[ALLOC1:%.+]] = memref.alloc(
// CHECK: arith.select
// CHECK: test.copy
// CHECK: bufferization.dealloc ([[ALLOC0]] : {{.*}}) if (%true
// CHECK-NOT: retain
// CHECK: bufferization.dealloc ([[ALLOC1]] : {{.*}}) if (%true
// CHECK-NOT: retain
// -----
func.func @select_aliases_not_same_ownership(%arg0: index, %arg1: memref<?xi8>, %arg2: i1) {
%0 = memref.alloc(%arg0) : memref<?xi8>
%1 = memref.alloca(%arg0) : memref<?xi8>
%2 = arith.select %arg2, %0, %1 : memref<?xi8>
cf.br ^bb1(%2 : memref<?xi8>)
^bb1(%arg3: memref<?xi8>):
test.copy(%arg3, %arg1) : (memref<?xi8>, memref<?xi8>)
return
}
// CHECK-LABEL: func @select_aliases_not_same_ownership
// CHECK: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: memref<?xi8>, [[ARG2:%.+]]: i1)
// CHECK: [[ALLOC0:%.+]] = memref.alloc(
// CHECK: [[ALLOC1:%.+]] = memref.alloca(
// CHECK: [[SELECT:%.+]] = arith.select
// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[ALLOC0]] :{{.*}}) if (%true{{[0-9_]*}}) retain ([[SELECT]] :
// CHECK: cf.br ^bb1([[SELECT]], [[OWN]] :
// CHECK: ^bb1([[A0:%.+]]: memref<?xi8>, [[COND:%.+]]: i1)
// CHECK: test.copy
// CHECK: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
// CHECK: bufferization.dealloc ([[BASE0]] : {{.*}}) if ([[COND]])
// CHECK-NOT: retain
// -----
func.func @select_captured_in_next_block(%arg0: index, %arg1: memref<?xi8>, %arg2: i1, %arg3: i1) {
%0 = memref.alloc(%arg0) : memref<?xi8>
%1 = memref.alloca(%arg0) : memref<?xi8>
%2 = arith.select %arg2, %0, %1 : memref<?xi8>
cf.cond_br %arg3, ^bb1(%0 : memref<?xi8>), ^bb1(%arg1 : memref<?xi8>)
^bb1(%arg4: memref<?xi8>):
test.copy(%arg4, %2) : (memref<?xi8>, memref<?xi8>)
return
}
// CHECK-LABEL: func @select_captured_in_next_block
// CHECK: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: memref<?xi8>, [[ARG2:%.+]]: i1, [[ARG3:%.+]]: i1)
// CHECK: [[ALLOC0:%.+]] = memref.alloc(
// CHECK: [[ALLOC1:%.+]] = memref.alloca(
// CHECK: [[SELECT:%.+]] = arith.select
// CHECK: [[OWN0:%.+]]:2 = bufferization.dealloc ([[ALLOC0]] :{{.*}}) if ([[ARG3]]) retain ([[ALLOC0]], [[SELECT]] :
// CHECK: [[NOT_ARG3:%.+]] = arith.xori [[ARG3]], %true
// CHECK: [[OWN1:%.+]] = bufferization.dealloc ([[ALLOC0]] :{{.*}}) if ([[NOT_ARG3]]) retain ([[SELECT]] :
// CHECK: [[MERGED_OWN:%.+]] = arith.select [[ARG3]], [[OWN0]]#1, [[OWN1]]
// CHECK: cf.cond_br{{.*}}^bb1([[ALLOC0]], [[OWN0]]#0 :{{.*}}), ^bb1([[ARG1]], %false
// CHECK: ^bb1([[A0:%.+]]: memref<?xi8>, [[COND:%.+]]: i1)
// CHECK: test.copy
// CHECK: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[SELECT]]
// CHECK: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
// CHECK: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[MERGED_OWN]], [[COND]])
// There are two interesting parts here:
// * The dealloc condition of %0 in the second block should be the corresponding
// result of the dealloc operation of the first block, because %0 has unknown
// ownership status and thus would other wise require a clone in the first
// block.
// * The dealloc of the first block must make sure that the branch condition and
// respective retained values are handled correctly, i.e., only the ones for the
// actual branch taken have to be retained.
// -----
func.func @blocks_not_preordered_by_dominance() {
cf.br ^bb1
^bb2:
"test.read_buffer"(%alloc) : (memref<2xi32>) -> ()
return
^bb1:
%alloc = memref.alloc() : memref<2xi32>
cf.br ^bb2
}
// CHECK-LABEL: func @blocks_not_preordered_by_dominance
// CHECK-NEXT: [[TRUE:%.+]] = arith.constant true
// CHECK-NEXT: cf.br [[BB1:\^.+]]
// CHECK-NEXT: [[BB2:\^[a-zA-Z0-9_]+]]:
// CHECK-NEXT: "test.read_buffer"([[ALLOC:%[a-zA-Z0-9_]+]])
// CHECK-NEXT: bufferization.dealloc ([[ALLOC]] : {{.*}}) if ([[TRUE]])
// CHECK-NOT: retain
// CHECK-NEXT: return
// CHECK-NEXT: [[BB1]]:
// CHECK-NEXT: [[ALLOC]] = memref.alloc()
// CHECK-NEXT: cf.br [[BB2]]
// CHECK-NEXT: }