// RUN: mlir-opt %s -split-input-file -canonicalize="test-convergence" | FileCheck %s
// CHECK-LABEL: func @known_oob_load
func.func @known_oob_load(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_2d
func.func @known_oob_load_2d(%arg0: memref<4x4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c0_i32 = arith.constant 0 : i32
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32, %c0_i32] : memref<4x4xf32>, i32, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_2d_on_last
func.func @known_oob_load_2d_on_last(%arg0: memref<4x4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c0_i32 = arith.constant 0 : i32
%c16_i32 = arith.constant 16 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c16_i32] : memref<4x4xf32>, i32, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_index
func.func @known_oob_load_index(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c0_i32 = arith.constant 0 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true, indexOffset = 4 : i32} %arg0[%c0_i32] : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_sgproffset
func.func @known_oob_load_sgproffset(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c2_i32 = arith.constant 2 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c2_i32] sgprOffset %c2_i32 : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @unknown_load
func.func @unknown_load(%arg0: memref<4xf32>, %arg1: i32) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%arg1] sgprOffset %c4_i32 : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @unknown_load_sgproffset
func.func @unknown_load_sgproffset(%arg0: memref<4xf32>, %arg1: i32) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] sgprOffset %arg1 : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @unranked
func.func @unranked(%arg0: memref<?xf32>) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<?xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @no_oob_check
func.func @no_oob_check(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = false} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @in_bounds_overall
func.func @in_bounds_overall(%arg0: memref<4x4xf32>) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c0_i32 = arith.constant 0 : i32
%c15_i32 = arith.constant 15 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c15_i32] : memref<4x4xf32>, i32, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @dead_store
func.func @dead_store(%arg0: memref<4xf32>, %arg1: f32) {
// CHECK-NOT: amdgpu.raw_buffer_store
%c4_i32 = arith.constant 4 : i32
amdgpu.raw_buffer_store {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32
func.return
}
// -----
// CHECK-LABEL: func @dead_atomic_add
func.func @dead_atomic_add(%arg0: memref<4xf32>, %arg1: f32) {
// CHECK-NOT: amdgpu.raw_buffer_atomic_fadd
%c4_i32 = arith.constant 4 : i32
amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32
func.return
}