llvm/mlir/test/Dialect/Affine/access-analysis.mlir

// RUN: mlir-opt %s -split-input-file -test-affine-access-analysis -verify-diagnostics | FileCheck %s

// CHECK-LABEL: func @loop_simple
func.func @loop_simple(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
   %c0 = arith.constant 0 : index
   %M = memref.dim %A, %c0 : memref<?x?xf32>
   affine.for %i = 0 to %M {
     affine.for %j = 0 to %M {
       affine.load %A[%c0, %i] : memref<?x?xf32>
       // expected-remark@above {{contiguous along loop 0}}
       // expected-remark@above {{invariant along loop 1}}
       affine.load %A[%c0, 8 * %i + %j] : memref<?x?xf32>
       // expected-remark@above {{contiguous along loop 1}}
       // Note/FIXME: access stride isn't being checked.
       // expected-remark@-3 {{contiguous along loop 0}}

       // These are all non-contiguous along both loops. Nothing is emitted.
       affine.load %A[%i, %c0] : memref<?x?xf32>
       // expected-remark@above {{invariant along loop 1}}
       // Note/FIXME: access stride isn't being checked.
       affine.load %A[%i, 8 * %j] : memref<?x?xf32>
       // expected-remark@above {{contiguous along loop 1}}
       affine.load %A[%j, 4 * %i] : memref<?x?xf32>
       // expected-remark@above {{contiguous along loop 0}}
     }
   }
   return
}

// -----

// CHECK-LABEL: func @loop_unsimplified
func.func @loop_unsimplified(%A : memref<100xf32>) {
   affine.for %i = 0 to 100 {
     affine.load %A[2 * %i - %i - %i] : memref<100xf32>
     // expected-remark@above {{invariant along loop 0}}

     %m = affine.apply affine_map<(d0) -> (-2 * d0)>(%i)
     %n = affine.apply affine_map<(d0) -> (2 * d0)>(%i)
     affine.load %A[(%m + %n) floordiv 2] : memref<100xf32>
     // expected-remark@above {{invariant along loop 0}}
   }
   return
}

// -----

#map = affine_map<(d0) -> (d0 * 16)>
#map1 = affine_map<(d0) -> (d0 * 16 + 16)>
#map2 = affine_map<(d0) -> (d0)>
#map3 = affine_map<(d0) -> (d0 + 1)>

func.func @tiled(%arg0: memref<*xf32>) {
  %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x224x224x64xf32>
  %cast = memref.cast %arg0 : memref<*xf32> to memref<64xf32>
  affine.for %arg1 = 0 to 4 {
    affine.for %arg2 = 0 to 224 {
      affine.for %arg3 = 0 to 14 {
        %alloc_0 = memref.alloc() : memref<1x16x1x16xf32>
        affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
          affine.for %arg5 = #map(%arg3) to #map1(%arg3) {
            // TODO: here and below, the access isn't really invariant
            // along tile-space IVs where the intra-tile IVs' bounds
            // depend on them.
            %0 = affine.load %cast[%arg4] : memref<64xf32>
            // expected-remark@above {{contiguous along loop 3}}
            // expected-remark@above {{invariant along loop 0}}
            // expected-remark@above {{invariant along loop 1}}
            // expected-remark@above {{invariant along loop 2}}
            // expected-remark@above {{invariant along loop 4}}
            affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32>
            // expected-remark@above {{contiguous along loop 4}}
            // expected-remark@above {{contiguous along loop 2}}
            // expected-remark@above {{invariant along loop 1}}
          }
        }
        affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
          affine.for %arg5 = #map2(%arg2) to #map3(%arg2) {
            affine.for %arg6 = #map(%arg3) to #map1(%arg3) {
              %0 = affine.load %alloc_0[0, %arg1 * -16 + %arg4, -%arg2 + %arg5, %arg3 * -16 + %arg6] : memref<1x16x1x16xf32>
              // expected-remark@above {{contiguous along loop 5}}
              // expected-remark@above {{contiguous along loop 2}}
              affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32>
              // expected-remark@above {{contiguous along loop 3}}
              // expected-remark@above {{invariant along loop 0}}
              // expected-remark@above {{invariant along loop 1}}
              // expected-remark@above {{invariant along loop 2}}
            }
          }
        }
        memref.dealloc %alloc_0 : memref<1x16x1x16xf32>
      }
    }
  }
  return
}