; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S --passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
; Sink the GEP to make use of scalar+vector addressing modes.
define <vscale x 4 x float> @gather_offsets_sink_gep(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_gep(
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES]]
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
; CHECK: exit:
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
;
entry:
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices
br i1 %cond, label %cond.block, label %exit
cond.block:
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
br label %exit
exit:
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
ret <vscale x 4 x float> %ret
}
; Sink sext to make use of scalar+sxtw(vector) addressing modes.
define <vscale x 4 x float> @gather_offsets_sink_sext(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext(
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
; CHECK: exit:
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
;
entry:
%indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
br i1 %cond, label %cond.block, label %exit
cond.block:
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
br label %exit
exit:
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
ret <vscale x 4 x float> %ret
}
; As above but ensure both the GEP and sext is sunk.
define <vscale x 4 x float> @gather_offsets_sink_sext_get(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext_get(
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
; CHECK: exit:
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
;
entry:
%indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
br i1 %cond, label %cond.block, label %exit
cond.block:
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
br label %exit
exit:
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
ret <vscale x 4 x float> %ret
}
; Don't sink GEPs that cannot benefit from SVE's scalar+vector addressing modes.
define <vscale x 4 x float> @gather_no_scalar_base(<vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define <vscale x 4 x float> @gather_no_scalar_base(
; CHECK-SAME: <vscale x 4 x ptr> [[BASES:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, <vscale x 4 x ptr> [[BASES]], <vscale x 4 x i32> [[INDICES]]
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
; CHECK: exit:
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
;
entry:
%ptrs = getelementptr float, <vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices
br i1 %cond, label %cond.block, label %exit
cond.block:
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
br label %exit
exit:
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
ret <vscale x 4 x float> %ret
}
; Don't sink extends whose result type is already favourable for SVE's sxtw/uxtw addressing modes.
; NOTE: We still want to sink the GEP.
define <vscale x 4 x float> @gather_offset_type_too_small(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_small(
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i32>
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES_SEXT]]
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
; CHECK: exit:
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
;
entry:
%indices.sext = sext <vscale x 4 x i8> %indices to <vscale x 4 x i32>
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices.sext
br i1 %cond, label %cond.block, label %exit
cond.block:
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
br label %exit
exit:
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
ret <vscale x 4 x float> %ret
}
; Don't sink extends that cannot benefit from SVE's sxtw/uxtw addressing modes.
; NOTE: We still want to sink the GEP.
define <vscale x 4 x float> @gather_offset_type_too_big(ptr %base, <vscale x 4 x i48> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_big(
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i48> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i48> [[INDICES]] to <vscale x 4 x i64>
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[INDICES_SEXT]]
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
; CHECK: exit:
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
;
entry:
%indices.sext = sext <vscale x 4 x i48> %indices to <vscale x 4 x i64>
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
br i1 %cond, label %cond.block, label %exit
cond.block:
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
br label %exit
exit:
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
ret <vscale x 4 x float> %ret
}
; Sink zext to make use of scalar+uxtw(vector) addressing modes.
; TODO: There's an argument here to split the extend into i8->i32 and i32->i64,
; which would be especially useful if the i8s are the result of a load because
; it would maintain the use of sign-extending loads.
define <vscale x 4 x float> @gather_offset_sink_zext(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_sink_zext(
; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[TMP0:%.*]] = zext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i64>
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
; CHECK: exit:
; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
;
entry:
%indices.zext = zext <vscale x 4 x i8> %indices to <vscale x 4 x i64>
br i1 %cond, label %cond.block, label %exit
cond.block:
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.zext
%load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
br label %exit
exit:
%ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
ret <vscale x 4 x float> %ret
}
; Ensure we support scatters as well as gathers.
define void @scatter_offsets_sink_sext_get(<vscale x 4 x float> %data, ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
; CHECK-LABEL: define void @scatter_offsets_sink_sext_get(
; CHECK-SAME: <vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
; CHECK: cond.block:
; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
; CHECK-NEXT: tail call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[DATA]], <vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]])
; CHECK-NEXT: ret void
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
%ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
br i1 %cond, label %cond.block, label %exit
cond.block:
tail call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask)
br label %exit
exit:
ret void
}
declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)