llvm/mlir/test/Analysis/test-match-reduction.mlir

// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-match-reduction))" -verify-diagnostics -split-input-file

// Verify that the generic reduction detection utility works on different
// dialects.

// expected-remark@below {{Testing function}}
func.func @linalg_red_add(%in0t : tensor<?xf32>, %out0t : tensor<1xf32>) {
  // expected-remark@below {{Reduction found in output #0!}}
  // expected-remark@below {{Reduced Value: <block argument> of type 'f32' at index: 0}}
  // expected-remark@below {{Combiner Op: %1 = arith.addf }}
  %red = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                          affine_map<(d0) -> (0)>],
                                          iterator_types = ["reduction"]}
   ins(%in0t : tensor<?xf32>)
   outs(%out0t : tensor<1xf32>) {
    ^bb0(%in0: f32, %out0: f32):
      %add = arith.addf %in0, %out0 : f32
      linalg.yield %add : f32
    } -> tensor<1xf32>
  return
}

// -----

// expected-remark@below {{Testing function}}
func.func @affine_red_add(%in: memref<256x512xf32>, %out: memref<256xf32>) {
 %cst = arith.constant 0.000000e+00 : f32
 affine.for %i = 0 to 256 {
   // expected-remark@below {{Reduction found in output #0!}}
   // expected-remark@below {{Reduced Value: %1 = affine.load }}
   // expected-remark@below {{Combiner Op: %2 = arith.addf }}
   %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
     %ld = affine.load %in[%i, %j] : memref<256x512xf32>
     %add = arith.addf %red_iter, %ld : f32
     affine.yield %add : f32
   }
   affine.store %final_red, %out[%i] : memref<256xf32>
 }
 return
}

// -----

// TODO: Iteration-carried values with multiple uses are not supported yet.
// expected-remark@below {{Testing function}}
func.func @linalg_red_max(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
  // expected-remark@below {{Reduction NOT found in output #0!}}
  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                          affine_map<(d0, d1) -> (d0)>],
   iterator_types = ["parallel", "reduction"]}
   ins(%in0t : tensor<4x4xf32>)
   outs(%out0t : tensor<4xf32>) {
    ^bb0(%in0: f32, %out0: f32):
      %cmp = arith.cmpf ogt, %in0, %out0 : f32
      %sel = arith.select %cmp, %in0, %out0 : f32
      linalg.yield %sel : f32
    } -> tensor<4xf32>
  return
}

// -----

// expected-remark@below {{Testing function}}
func.func @linalg_fused_red_add(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
  // expected-remark@below {{Reduction found in output #0!}}
  // expected-remark@below {{Reduced Value: %2 = arith.subf}}
  // expected-remark@below {{Combiner Op: %3 = arith.addf}}
  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                          affine_map<(d0, d1) -> (d0)>],
   iterator_types = ["parallel", "reduction"]}
   ins(%in0t : tensor<4x4xf32>)
   outs(%out0t : tensor<4xf32>) {
    ^bb0(%in0: f32, %out0: f32):
      %mul = arith.mulf %in0, %in0 : f32
      %sub = arith.subf %mul, %in0 : f32
      %add = arith.addf %sub, %out0 : f32
      linalg.yield %add : f32
    } -> tensor<4xf32>
  return
}

// -----

// expected-remark@below {{Testing function}}
func.func @affine_no_red_rec(%in: memref<512xf32>) {
 %cst = arith.constant 0.000000e+00 : f32
 // %rec is the value loaded in the previous iteration.
 // expected-remark@below {{Reduction NOT found in output #0!}}
 %final_val = affine.for %j = 0 to 512 iter_args(%rec = %cst) -> (f32) {
   %ld = affine.load %in[%j] : memref<512xf32>
   %add = arith.addf %ld, %rec : f32
   affine.yield %ld : f32
 }
 return
}

// -----

// expected-remark@below {{Testing function}}
func.func @affine_output_dep(%in: memref<512xf32>) {
 %cst = arith.constant 0.000000e+00 : f32
 // Reduction %red is not supported because it depends on another
 // loop-carried dependence.
 // expected-remark@below {{Reduction NOT found in output #0!}}
 // expected-remark@below {{Reduction NOT found in output #1!}}
 %final_red, %final_dep = affine.for %j = 0 to 512
  iter_args(%red = %cst, %dep = %cst) -> (f32, f32) {
   %ld = affine.load %in[%j] : memref<512xf32>
   %add = arith.addf %dep, %red : f32
   affine.yield %add, %ld : f32, f32
 }
 return
}